diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:01 +0000 |
commit | 486754660bb926339aefcf012a3f848592babb8b (patch) | |
tree | ecdbc446c9876f4f120f701c243373cd3cb43db3 /lib/CodeGen | |
parent | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff) |
Notes
Diffstat (limited to 'lib/CodeGen')
73 files changed, 17740 insertions, 6904 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index 575506da84d4..feed3833f24a 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -53,12 +53,9 @@ namespace swiftcall { CodeGen::CodeGenTypes &CGT; protected: llvm::CallingConv::ID RuntimeCC; - llvm::CallingConv::ID BuiltinCC; public: ABIInfo(CodeGen::CodeGenTypes &cgt) - : CGT(cgt), - RuntimeCC(llvm::CallingConv::C), - BuiltinCC(llvm::CallingConv::C) {} + : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {} virtual ~ABIInfo(); @@ -77,11 +74,6 @@ namespace swiftcall { return RuntimeCC; } - /// Return the calling convention to use for compiler builtins - llvm::CallingConv::ID getBuiltinCC() const { - return BuiltinCC; - } - virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0; /// EmitVAArg - Emit the target dependent code to load a value of @@ -108,8 +100,6 @@ namespace swiftcall { virtual bool isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const; - virtual bool shouldSignExtUnsignedType(QualType Ty) const; - bool isHomogeneousAggregate(QualType Ty, const Type *&Base, uint64_t &Members) const; @@ -137,8 +127,7 @@ namespace swiftcall { bool supportsSwift() const final override { return true; } - virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type*> types, + virtual bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> types, bool asReturnValue) const = 0; virtual bool isLegalVectorTypeForSwift(CharUnits totalSize, diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index e2349da5f0a4..415bd9626220 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -26,6 +26,7 @@ #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,17 +45,19 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" +#include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> @@ -101,7 +104,18 @@ class EmitAssemblyHelper { /// /// \return True on success. bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action, - raw_pwrite_stream &OS); + raw_pwrite_stream &OS, raw_pwrite_stream *DwoOS); + + std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) { + std::error_code EC; + auto F = llvm::make_unique<llvm::ToolOutputFile>(Path, EC, + llvm::sys::fs::F_None); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message(); + F.reset(); + } + return F; + } public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, @@ -231,10 +245,9 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { PM.add(createAddressSanitizerFunctionPass( - /*CompileKernel*/ true, - /*Recover*/ true, /*UseAfterScope*/ false)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true, - /*Recover*/true)); + /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false)); + PM.add(createAddressSanitizerModulePass( + /*CompileKernel*/ true, /*Recover*/ true)); } static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -243,7 +256,13 @@ static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, static_cast<const PassManagerBuilderWrapper &>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::HWAddress); - PM.add(createHWAddressSanitizerPass(Recover)); + PM.add(createHWAddressSanitizerPass(/*CompileKernel*/ false, Recover)); +} + +static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + PM.add(createHWAddressSanitizerPass( + /*CompileKernel*/ true, /*Recover*/ true)); } static void addMemorySanitizerPass(const PassManagerBuilder &Builder, @@ -361,21 +380,6 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) { return static_cast<llvm::CodeModel::Model>(CodeModel); } -static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) { - // Keep this synced with the equivalent code in - // lib/Frontend/CompilerInvocation.cpp - llvm::Optional<llvm::Reloc::Model> RM; - RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) - .Case("static", llvm::Reloc::Static) - .Case("pic", llvm::Reloc::PIC_) - .Case("ropi", llvm::Reloc::ROPI) - .Case("rwpi", llvm::Reloc::RWPI) - .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) - .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); - assert(RM.hasValue() && "invalid PIC model!"); - return *RM; -} - static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) { if (Action == Backend_EmitObj) return TargetMachine::CGFT_ObjectFile; @@ -447,7 +451,10 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; + Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); + Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; + Options.EmitAddrsig = CodeGenOpts.Addrsig; if (CodeGenOpts.EnableSplitDwarf) Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; @@ -470,6 +477,23 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.MCOptions.IASSearchPaths.push_back( Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); } +static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { + if (CodeGenOpts.DisableGCov) + return None; + if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes) + return None; + // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if + // LLVM's -default-gcov-version flag is set to something invalid. + GCOVOptions Options; + Options.EmitNotes = CodeGenOpts.EmitGcovNotes; + Options.EmitData = CodeGenOpts.EmitGcovArcs; + llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version)); + Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum; + Options.NoRedZone = CodeGenOpts.DisableRedZone; + Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData; + Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; + return Options; +} void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { @@ -501,7 +525,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.Inliner = createFunctionInliningPass( CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize, (!CodeGenOpts.SampleProfileFile.empty() && - CodeGenOpts.EmitSummaryIndex)); + CodeGenOpts.PrepareForThinLTO)); } PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; @@ -511,7 +535,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; - PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex; + PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; @@ -535,6 +559,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addObjCARCOptPass); } + if (LangOpts.CoroutinesTS) + addCoroutinePassesToExtensionPoints(PMBuilder); + if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) { PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addBoundsCheckingPass); @@ -572,6 +599,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addHWAddressSanitizerPasses); } + if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addKernelHWAddressSanitizerPasses); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addKernelHWAddressSanitizerPasses); + } + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, addMemorySanitizerPass); @@ -593,9 +627,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addDataFlowSanitizerPass); } - if (LangOpts.CoroutinesTS) - addCoroutinePassesToExtensionPoints(PMBuilder); - if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) { PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, addEfficiencySanitizerPass); @@ -612,20 +643,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (!CodeGenOpts.RewriteMapFiles.empty()) addSymbolRewriterPass(CodeGenOpts, &MPM); - if (!CodeGenOpts.DisableGCov && - (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) { - // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if - // LLVM's -default-gcov-version flag is set to something invalid. - GCOVOptions Options; - Options.EmitNotes = CodeGenOpts.EmitGcovNotes; - Options.EmitData = CodeGenOpts.EmitGcovArcs; - memcpy(Options.Version, CodeGenOpts.CoverageVersion, 4); - Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum; - Options.NoRedZone = CodeGenOpts.DisableRedZone; - Options.FunctionNamesInData = - !CodeGenOpts.CoverageNoFunctionNamesInData; - Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; - MPM.add(createGCOVProfilerPass(Options)); + if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) { + MPM.add(createGCOVProfilerPass(*Options)); if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo) MPM.add(createStripSymbolsPass(true)); } @@ -664,8 +683,6 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { BackendArgs.push_back("-limit-float-precision"); BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); } - for (const std::string &BackendOption : CodeGenOpts.BackendOptions) - BackendArgs.push_back(BackendOption.c_str()); BackendArgs.push_back(nullptr); llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, BackendArgs.data()); @@ -685,7 +702,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); - llvm::Reloc::Model RM = getRelocModel(CodeGenOpts); + llvm::Reloc::Model RM = CodeGenOpts.RelocationModel; CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts); llvm::TargetOptions Options; @@ -696,7 +713,8 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action, - raw_pwrite_stream &OS) { + raw_pwrite_stream &OS, + raw_pwrite_stream *DwoOS) { // Add LibraryInfo. llvm::Triple TargetTriple(TheModule->getTargetTriple()); std::unique_ptr<TargetLibraryInfoImpl> TLII( @@ -713,7 +731,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, if (CodeGenOpts.OptimizationLevel > 0) CodeGenPasses.add(createObjCARCContractPass()); - if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT, + if (TM->addPassesToEmitFile(CodeGenPasses, OS, DwoOS, CGFT, /*DisableVerify=*/!CodeGenOpts.VerifyModule)) { Diags.Report(diag::err_fe_unable_to_interface_with_target); return false; @@ -724,7 +742,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { - TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); + TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); @@ -752,31 +770,35 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - std::unique_ptr<raw_fd_ostream> ThinLinkOS; + std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS; switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - if (CodeGenOpts.EmitSummaryIndex) { + if (CodeGenOpts.PrepareForThinLTO) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { - std::error_code EC; - ThinLinkOS.reset(new llvm::raw_fd_ostream( - CodeGenOpts.ThinLinkBitcodeFile, EC, - llvm::sys::fs::F_None)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile - << EC.message(); + ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); + if (!ThinLinkOS) return; - } } + PerModulePasses.add(createWriteThinLTOBitcodePass( + *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); + } else { + // Emit a module summary by default for Regular LTO except for ld64 + // targets + bool EmitLTOSummary = + (CodeGenOpts.PrepareForLTO && + llvm::Triple(TheModule->getTargetTriple()).getVendor() != + llvm::Triple::Apple); + if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO")) + TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + PerModulePasses.add( - createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get())); + createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, + EmitLTOSummary)); } - else - PerModulePasses.add( - createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists)); break; case Backend_EmitLL: @@ -785,7 +807,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, break; default: - if (!AddEmitPasses(CodeGenPasses, Action, *OS)) + if (!CodeGenOpts.SplitDwarfFile.empty()) { + DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); + if (!DwoOS) + return; + } + if (!AddEmitPasses(CodeGenPasses, Action, *OS, + DwoOS ? &DwoOS->os() : nullptr)) return; } @@ -814,6 +842,11 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, PrettyStackTraceString CrashInfo("Code generation"); CodeGenPasses.run(*TheModule); } + + if (ThinLinkOS) + ThinLinkOS->keep(); + if (DwoOS) + DwoOS->keep(); } static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { @@ -827,7 +860,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { case 2: switch (Opts.OptimizeSize) { default: - llvm_unreachable("Invalide optimization level for size!"); + llvm_unreachable("Invalid optimization level for size!"); case 0: return PassBuilder::O2; @@ -854,7 +887,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { /// `EmitAssembly` at some point in the future when the default switches. void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { - TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); + TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); // The new pass manager always makes a target machine available to passes @@ -913,10 +946,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( ModulePassManager MPM(CodeGenOpts.DebugPassManager); if (!CodeGenOpts.DisableLLVMPasses) { - bool IsThinLTO = CodeGenOpts.EmitSummaryIndex; + bool IsThinLTO = CodeGenOpts.PrepareForThinLTO; bool IsLTO = CodeGenOpts.PrepareForLTO; if (CodeGenOpts.OptimizationLevel == 0) { + if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) + MPM.addPass(GCOVProfilerPass(*Options)); + // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. MPM.addPass(AlwaysInlinerPass()); @@ -925,8 +961,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass())); - // Lastly, add a semantically necessary pass for ThinLTO. - if (IsThinLTO) + // Lastly, add a semantically necessary pass for LTO. + if (IsLTO || IsThinLTO) MPM.addPass(NameAnonGlobalPass()); } else { // Map our optimization levels into one of the distinct levels used to @@ -940,6 +976,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); + if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) + PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { + MPM.addPass(GCOVProfilerPass(*Options)); + }); if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( @@ -948,6 +988,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } else if (IsLTO) { MPM = PB.buildLTOPreLinkDefaultPipeline(Level, CodeGenOpts.DebugPassManager); + MPM.addPass(NameAnonGlobalPass()); } else { MPM = PB.buildPerModuleDefaultPipeline(Level, CodeGenOpts.DebugPassManager); @@ -959,7 +1000,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // create that pass manager here and use it as needed below. legacy::PassManager CodeGenPasses; bool NeedCodeGen = false; - Optional<raw_fd_ostream> ThinLinkOS; + std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS; // Append any output we need to the pass manager. switch (Action) { @@ -967,23 +1008,26 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( break; case Backend_EmitBC: - if (CodeGenOpts.EmitSummaryIndex) { + if (CodeGenOpts.PrepareForThinLTO) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { - std::error_code EC; - ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC, - llvm::sys::fs::F_None); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) - << CodeGenOpts.ThinLinkBitcodeFile << EC.message(); + ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); + if (!ThinLinkOS) return; - } } - MPM.addPass( - ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr)); + MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() + : nullptr)); } else { + // Emit a module summary by default for Regular LTO except for ld64 + // targets + bool EmitLTOSummary = + (CodeGenOpts.PrepareForLTO && + llvm::Triple(TheModule->getTargetTriple()).getVendor() != + llvm::Triple::Apple); + if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO")) + TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, - CodeGenOpts.EmitSummaryIndex, - CodeGenOpts.EmitSummaryIndex)); + EmitLTOSummary)); } break; @@ -997,7 +1041,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( NeedCodeGen = true; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - if (!AddEmitPasses(CodeGenPasses, Action, *OS)) + if (!CodeGenOpts.SplitDwarfFile.empty()) { + DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); + if (!DwoOS) + return; + } + if (!AddEmitPasses(CodeGenPasses, Action, *OS, + DwoOS ? &DwoOS->os() : nullptr)) // FIXME: Should we handle this error differently? return; break; @@ -1017,6 +1067,11 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PrettyStackTraceString CrashInfo("Code generation"); CodeGenPasses.run(*TheModule); } + + if (ThinLinkOS) + ThinLinkOS->keep(); + if (DwoOS) + DwoOS->keep(); } Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) { @@ -1026,16 +1081,22 @@ Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) { // The bitcode file may contain multiple modules, we want the one that is // marked as being the ThinLTO module. - for (BitcodeModule &BM : *BMsOrErr) { - Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); - if (LTOInfo && LTOInfo->IsThinLTO) - return BM; - } + if (const BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr)) + return *Bm; return make_error<StringError>("Could not find module summary", inconvertibleErrorCode()); } +BitcodeModule *clang::FindThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { + for (BitcodeModule &BM : BMs) { + Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); + if (LTOInfo && LTOInfo->IsThinLTO) + return &BM; + } + return nullptr; +} + static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, @@ -1067,9 +1128,8 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, // e.g. record required linkage changes. if (Summary->modulePath() == M->getModuleIdentifier()) continue; - // Doesn't matter what value we plug in to the map, just needs an entry - // to provoke importing by thinBackend. - ImportList[Summary->modulePath()][GUID] = 1; + // Add an entry to provoke importing by thinBackend. + ImportList[Summary->modulePath()].insert(GUID); } std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports; @@ -1100,15 +1160,27 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, return llvm::make_unique<lto::NativeObjectStream>(std::move(OS)); }; lto::Config Conf; + if (CGOpts.SaveTempsFilePrefix != "") { + if (Error E = Conf.addSaveTemps(CGOpts.SaveTempsFilePrefix + ".", + /* UseInputModulePath */ false)) { + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { + errs() << "Error setting up ThinLTO save-temps: " << EIB.message() + << '\n'; + }); + } + } Conf.CPU = TOpts.CPU; Conf.CodeModel = getCodeModel(CGOpts); Conf.MAttrs = TOpts.Features; - Conf.RelocModel = getRelocModel(CGOpts); + Conf.RelocModel = CGOpts.RelocationModel; Conf.CGOptLevel = getCGOptLevel(CGOpts); initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; Conf.DebugPassManager = CGOpts.DebugPassManager; + Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; + Conf.RemarksFilename = CGOpts.OptRecordFile; + Conf.DwoPath = CGOpts.SplitDwarfFile; switch (Action) { case Backend_EmitNothing: Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { @@ -1123,7 +1195,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, break; case Backend_EmitBC: Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { - WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists); + WriteBitcodeToFile(*M, *OS, CGOpts.EmitLLVMUseLists); return false; }; break; @@ -1132,7 +1204,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, break; } if (Error E = thinBackend( - Conf, 0, AddStream, *M, *CombinedIndex, ImportList, + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; @@ -1148,6 +1220,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const llvm::DataLayout &TDesc, Module *M, BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { + std::unique_ptr<llvm::Module> EmptyModule; if (!CGOpts.ThinLTOIndexFile.empty()) { // If we are performing a ThinLTO importing compile, load the function index // into memory and pass it into runThinLTOBackend, which will run the @@ -1165,11 +1238,22 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // A null CombinedIndex means we should skip ThinLTO compilation // (LLVM will optionally ignore empty index files, returning null instead // of an error). - bool DoThinLTOBackend = CombinedIndex != nullptr; - if (DoThinLTOBackend) { - runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, - LOpts, std::move(OS), CGOpts.SampleProfileFile, Action); - return; + if (CombinedIndex) { + if (!CombinedIndex->skipModuleByDistributedBackend()) { + runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, + LOpts, std::move(OS), CGOpts.SampleProfileFile, + Action); + return; + } + // Distributed indexing detected that nothing from the module is needed + // for the final linking. So we can skip the compilation. We sill need to + // output an empty object file to make sure that a linker does not fail + // trying to read it. Also for some features, like CFI, we must skip + // the compilation as CombinedIndex does not contain all required + // information. + EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext()); + EmptyModule->setTargetTriple(M->getTargetTriple()); + M = EmptyModule.get(); } } @@ -1228,7 +1312,7 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, // Save llvm.compiler.used and remote it. SmallVector<Constant*, 2> UsedArray; - SmallSet<GlobalValue*, 4> UsedGlobals; + SmallPtrSet<GlobalValue*, 4> UsedGlobals; Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0); GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true); for (auto *GV : UsedGlobals) { @@ -1253,7 +1337,7 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, // If the input is LLVM Assembly, bitcode is produced by serializing // the module. Use-lists order need to be perserved in this case. llvm::raw_string_ostream OS(Data); - llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true); ModuleData = ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size()); } else diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 6862fd811186..b34bcdc1fc38 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -18,6 +18,7 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -186,7 +187,7 @@ namespace { RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot, SourceLocation loc, bool AsValue) const; - /// \brief Converts a rvalue to integer value. + /// Converts a rvalue to integer value. llvm::Value *convertRValueToInt(RValue RVal) const; RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal, @@ -207,13 +208,13 @@ namespace { LVal.getBaseInfo(), LVal.getTBAAInfo()); } - /// \brief Emits atomic load. + /// Emits atomic load. /// \returns Loaded value. RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc, bool AsValue, llvm::AtomicOrdering AO, bool IsVolatile); - /// \brief Emits atomic compare-and-exchange sequence. + /// Emits atomic compare-and-exchange sequence. /// \param Expected Expected value. /// \param Desired Desired value. /// \param Success Atomic ordering for success operation. @@ -229,13 +230,13 @@ namespace { llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false); - /// \brief Emits atomic update. + /// Emits atomic update. /// \param AO Atomic ordering. /// \param UpdateOp Update operation for the current lvalue. void EmitAtomicUpdate(llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile); - /// \brief Emits atomic update. + /// Emits atomic update. /// \param AO Atomic ordering. void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal, bool IsVolatile); @@ -243,25 +244,25 @@ namespace { /// Materialize an atomic r-value in atomic-layout memory. Address materializeRValue(RValue rvalue) const; - /// \brief Creates temp alloca for intermediate operations on atomic value. + /// Creates temp alloca for intermediate operations on atomic value. Address CreateTempAlloca() const; private: bool requiresMemSetZero(llvm::Type *type) const; - /// \brief Emits atomic load as a libcall. + /// Emits atomic load as a libcall. void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, llvm::AtomicOrdering AO, bool IsVolatile); - /// \brief Emits atomic load as LLVM instruction. + /// Emits atomic load as LLVM instruction. llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile); - /// \brief Emits atomic compare-and-exchange op as a libcall. + /// Emits atomic compare-and-exchange op as a libcall. llvm::Value *EmitAtomicCompareExchangeLibcall( llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr, llvm::AtomicOrdering Success = llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure = llvm::AtomicOrdering::SequentiallyConsistent); - /// \brief Emits atomic compare-and-exchange op as LLVM instruction. + /// Emits atomic compare-and-exchange op as LLVM instruction. std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp( llvm::Value *ExpectedVal, llvm::Value *DesiredVal, llvm::AtomicOrdering Success = @@ -269,19 +270,19 @@ namespace { llvm::AtomicOrdering Failure = llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false); - /// \brief Emit atomic update as libcalls. + /// Emit atomic update as libcalls. void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile); - /// \brief Emit atomic update as LLVM instructions. + /// Emit atomic update as LLVM instructions. void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile); - /// \brief Emit atomic update as libcalls. + /// Emit atomic update as libcalls. void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal, bool IsVolatile); - /// \brief Emit atomic update as LLVM instructions. + /// Emit atomic update as LLVM instructions. void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal, bool IsVolatile); }; @@ -590,11 +591,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, break; case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_min: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min : llvm::AtomicRMWInst::UMin; break; case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_max: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max : llvm::AtomicRMWInst::UMax; break; @@ -751,6 +754,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { Address Dest = Address::invalid(); Address Ptr = EmitPointerWithAlignment(E->getPtr()); + if (E->getOp() == AtomicExpr::AO__c11_atomic_init || + E->getOp() == AtomicExpr::AO__opencl_atomic_init) { + LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); + EmitAtomicInit(E->getVal1(), lvalue); + return RValue::get(nullptr); + } + CharUnits sizeChars, alignChars; std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy); uint64_t Size = sizeChars.getQuantity(); @@ -758,12 +768,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 || getContext().toBits(sizeChars) > MaxInlineWidthInBits); - if (E->getOp() == AtomicExpr::AO__c11_atomic_init || - E->getOp() == AtomicExpr::AO__opencl_atomic_init) { - LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); - EmitAtomicInit(E->getVal1(), lvalue); - return RValue::get(nullptr); - } + if (UseLibcall) + CGM.getDiags().Report(E->getLocStart(), diag::warn_atomic_op_misaligned); llvm::Value *Order = EmitScalarExpr(E->getOrder()); llvm::Value *Scope = @@ -855,6 +861,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_max: Val1 = EmitValToTemp(*this, E->getVal1()); break; } @@ -909,6 +917,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__atomic_xor_fetch: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_max: // For these, only library calls for certain sizes exist. UseOptimizedLibcall = true; break; @@ -1091,6 +1101,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_min" @@ -1098,6 +1109,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), LoweredMemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_max" @@ -1160,7 +1172,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { if (UseOptimizedLibcall && Res.getScalarVal()) { llvm::Value *ResVal = Res.getScalarVal(); if (PostOp) { - llvm::Value *LoadVal1 = Args[1].RV.getScalarVal(); + llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal(); ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1); } if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) @@ -1508,11 +1520,13 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { // which means that the caller is responsible for having zeroed // any padding. Just do an aggregate copy of that type. if (rvalue.isAggregate()) { - CGF.EmitAggregateCopy(getAtomicAddress(), - rvalue.getAggregateAddress(), - getAtomicType(), - (rvalue.isVolatileQualified() - || LVal.isVolatileQualified())); + LValue Dest = CGF.MakeAddrLValue(getAtomicAddress(), getAtomicType()); + LValue Src = CGF.MakeAddrLValue(rvalue.getAggregateAddress(), + getAtomicType()); + bool IsVolatile = rvalue.isVolatileQualified() || + LVal.isVolatileQualified(); + CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), + AggValueSlot::DoesNotOverlap, IsVolatile); return; } @@ -2007,6 +2021,7 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed); diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index 5f73d4cf7913..617856a7b43e 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -66,7 +66,7 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM, /// buildBlockDescriptor - Build the block descriptor meta-data for a block. /// buildBlockDescriptor is accessed from 5th field of the Block_literal /// meta-data and contains stationary information about the block literal. -/// Its definition will have 4 (or optinally 6) words. +/// Its definition will have 4 (or optionally 6) words. /// \code /// struct Block_descriptor { /// unsigned long reserved; @@ -104,7 +104,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, elements.addInt(ulong, blockInfo.BlockSize.getQuantity()); // Optional copy/dispose helpers. - if (blockInfo.NeedsCopyDispose) { + if (blockInfo.needsCopyDisposeHelpers()) { // copy_func_helper_decl elements.add(buildCopyHelper(CGM, blockInfo)); @@ -159,6 +159,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, /// These are the flags (with corresponding bit number) that the /// compiler is actually supposed to know about. + /// 23. BLOCK_IS_NOESCAPE - indicates that the block is non-escaping /// 25. BLOCK_HAS_COPY_DISPOSE - indicates that the block /// descriptor provides copy and dispose helper functions /// 26. BLOCK_HAS_CXX_OBJ - indicates that there's a captured @@ -307,25 +308,12 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, assert(elementTypes.empty()); if (CGM.getLangOpts().OpenCL) { - // The header is basically 'struct { int; int; generic void *; + // The header is basically 'struct { int; int; // custom_fields; }'. Assert that struct is packed. - auto GenericAS = - CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); - auto GenPtrAlign = - CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); - auto GenPtrSize = - CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); - assert(CGM.getIntSize() <= GenPtrSize); - assert(CGM.getIntAlign() <= GenPtrAlign); - assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); elementTypes.push_back(CGM.IntTy); /* total size */ elementTypes.push_back(CGM.IntTy); /* align */ - elementTypes.push_back( - CGM.getOpenCLRuntime() - .getGenericVoidPointerType()); /* invoke function */ - unsigned Offset = - 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); - unsigned BlockAlign = GenPtrAlign.getQuantity(); + unsigned Offset = 2 * CGM.getIntSize().getQuantity(); + unsigned BlockAlign = CGM.getIntAlign().getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { @@ -343,7 +331,7 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, info.BlockSize = CharUnits::fromQuantity(Offset); } else { // The header is basically 'struct { void *; int; int; void *; void *; }'. - // Assert that that struct is packed. + // Assert that the struct is packed. assert(CGM.getIntSize() <= CGM.getPointerSize()); assert(CGM.getIntAlign() <= CGM.getPointerAlign()); assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); @@ -477,6 +465,14 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, info.NeedsCopyDispose = true; info.HasCXXObject = true; + // So do C structs that require non-trivial copy construction or + // destruction. + } else if (variable->getType().isNonTrivialToPrimitiveCopy() == + QualType::PCK_Struct || + variable->getType().isDestructedType() == + QualType::DK_nontrivial_c_struct) { + info.NeedsCopyDispose = true; + // And so do types with destructors. } else if (CGM.getLangOpts().CPlusPlus) { if (const CXXRecordDecl *record = @@ -705,11 +701,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { /// kind of cleanup object is a BlockDecl*. void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) { assert(E->getNumObjects() != 0); - ArrayRef<ExprWithCleanups::CleanupObject> cleanups = E->getObjects(); - for (ArrayRef<ExprWithCleanups::CleanupObject>::iterator - i = cleanups.begin(), e = cleanups.end(); i != e; ++i) { - enterBlockScope(*this, *i); - } + for (const ExprWithCleanups::CleanupObject &C : E->getObjects()) + enterBlockScope(*this, C); } /// Find the layout for the given block in a linked list and remove it. @@ -740,27 +733,19 @@ void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) { } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { // The block literal is emitted as a global variable, and the block invoke // function has to be extracted from its initializer. if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { - if (InvokeF) { - auto *GV = cast<llvm::GlobalVariable>( - cast<llvm::Constant>(Block)->stripPointerCasts()); - auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer()); - *InvokeF = cast<llvm::Function>( - BlockInit->getAggregateElement(2)->stripPointerCasts()); - } return Block; } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; - return EmitBlockLiteral(blockInfo, InvokeF); + return EmitBlockLiteral(blockInfo); } // Find the block info for this block and take ownership of it. @@ -769,28 +754,17 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo, InvokeF); + return EmitBlockLiteral(*blockInfo); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; - auto GenVoidPtrTy = - IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; - LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; - auto GenVoidPtrSize = CharUnits::fromQuantity( - CGM.getTarget().getPointerWidth( - CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / - 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); - if (InvokeF) - *InvokeF = InvokeFn; - auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) @@ -805,8 +779,13 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, llvm::Constant *descriptor; BlockFlags flags; if (!IsOpenCL) { - isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(), - VoidPtrTy); + // If the block is non-escaping, set field 'isa 'to NSConcreteGlobalBlock + // and set the BLOCK_IS_GLOBAL bit of field 'flags'. Copying a non-escaping + // block just returns the original block and releasing it is a no-op. + llvm::Constant *blockISA = blockInfo.getBlockDecl()->doesNotEscape() + ? CGM.getNSConcreteGlobalBlock() + : CGM.getNSConcreteStackBlock(); + isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy); // Build the block descriptor. descriptor = buildBlockDescriptor(CGM, blockInfo); @@ -815,12 +794,14 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, flags = BLOCK_HAS_SIGNATURE; if (blockInfo.HasCapturedVariableLayout) flags |= BLOCK_HAS_EXTENDED_LAYOUT; - if (blockInfo.NeedsCopyDispose) + if (blockInfo.needsCopyDisposeHelpers()) flags |= BLOCK_HAS_COPY_DISPOSE; if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; + if (blockInfo.getBlockDecl()->doesNotEscape()) + flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL; } auto projectField = @@ -859,11 +840,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } - addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); - if (!IsOpenCL) + if (!IsOpenCL) { + addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy), + getPointerSize(), "block.invoke"); addHeaderField(descriptor, getPointerSize(), "block.descriptor"); - else if (auto *Helper = - CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + } else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { addHeaderField( I.first, @@ -913,7 +895,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, const CGBlockInfo::Capture &enclosingCapture = BlockInfo->getCapture(variable); - // This is a [[type]]*, except that a byref entry wil just be an i8**. + // This is a [[type]]*, except that a byref entry will just be an i8**. src = Builder.CreateStructGEP(LoadBlockStruct(), enclosingCapture.getIndex(), enclosingCapture.getOffset(), @@ -955,7 +937,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, AggValueSlot::forAddr(blockField, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); EmitAggExpr(copyExpr, Slot); } else { EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr); @@ -1024,6 +1007,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, llvm::Value *result = Builder.CreatePointerCast( blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); + if (IsOpenCL) { + CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn, + result); + } + return result; } @@ -1061,38 +1049,23 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() { } llvm::Type *CodeGenModule::getGenericBlockLiteralType() { + assert(!getLangOpts().OpenCL && "OpenCL does not need this"); + if (GenericBlockLiteralType) return GenericBlockLiteralType; llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); - if (getLangOpts().OpenCL) { - // struct __opencl_block_literal_generic { - // int __size; - // int __align; - // __generic void *__invoke; - // /* custom fields */ - // }; - SmallVector<llvm::Type *, 8> StructFields( - {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()}); - if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { - for (auto I : Helper->getCustomFieldTypes()) - StructFields.push_back(I); - } - GenericBlockLiteralType = llvm::StructType::create( - StructFields, "struct.__opencl_block_literal_generic"); - } else { - // struct __block_literal_generic { - // void *__isa; - // int __flags; - // int __reserved; - // void (*__invoke)(void *); - // struct __block_descriptor *__descriptor; - // }; - GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, - IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); - } + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // }; + GenericBlockLiteralType = + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); return GenericBlockLiteralType; } @@ -1103,27 +1076,21 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, E->getCallee()->getType()->getAs<BlockPointerType>(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); + llvm::Value *FuncPtr; - // Get a pointer to the generic block literal. - // For OpenCL we generate generic AS void ptr to be able to reuse the same - // block definition for blocks with captures generated as private AS local - // variables and without captures generated as global AS program scope - // variables. - unsigned AddrSpace = 0; - if (getLangOpts().OpenCL) - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); + if (!CGM.getLangOpts().OpenCL) { + // Get a pointer to the generic block literal. + llvm::Type *BlockLiteralTy = + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0); - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); + // Bitcast the callee to a block literal. + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); - // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, - CGM.getLangOpts().OpenCL ? 2 : 3); + // Get the function pointer from the literal. + FuncPtr = + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); + } // Add the block literal. CallArgList Args; @@ -1146,7 +1113,11 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); // Load the function. - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + llvm::Value *Func; + if (CGM.getLangOpts().OpenCL) + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); + else + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1255,14 +1226,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, // Reserved fields.addInt(CGM.IntTy, 0); + + // Function + fields.add(blockFn); } else { fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); } - // Function - fields.add(blockFn); - if (!IsOpenCL) { // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); @@ -1287,6 +1258,10 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, llvm::Constant *Result = llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); + if (CGM.getContext().getLangOpts().OpenCL) + CGM.getOpenCLRuntime().recordBlockInfo( + blockInfo.BlockExpression, + cast<llvm::Function>(blockFn->stripPointerCasts()), Result); return Result; } @@ -1479,8 +1454,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (capture.isConstant()) { auto addr = LocalDeclMap.find(variable)->second; - DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(), - Builder); + (void)DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(), + Builder); continue; } @@ -1513,6 +1488,7 @@ enum class BlockCaptureEntityKind { CXXRecord, // Copy or destroy ARCWeak, ARCStrong, + NonTrivialCStruct, BlockObject, // Assign or release None }; @@ -1548,39 +1524,46 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, Flags |= BLOCK_FIELD_IS_WEAK; return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); } - if (!T->isObjCRetainableType()) - // For all other types, the memcpy is fine. - return std::make_pair(BlockCaptureEntityKind::None, Flags); Flags = BLOCK_FIELD_IS_OBJECT; bool isBlockPointer = T->isBlockPointerType(); if (isBlockPointer) Flags = BLOCK_FIELD_IS_BLOCK; - // Special rules for ARC captures: - Qualifiers QS = T.getQualifiers(); - - // We need to register __weak direct captures with the runtime. - if (QS.getObjCLifetime() == Qualifiers::OCL_Weak) + switch (T.isNonTrivialToPrimitiveCopy()) { + case QualType::PCK_Struct: + return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct, + BlockFieldFlags()); + case QualType::PCK_ARCWeak: + // We need to register __weak direct captures with the runtime. return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); - - // We need to retain the copied value for __strong direct captures. - if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) { - // If it's a block pointer, we have to copy the block and - // assign that to the destination pointer, so we might as - // well use _Block_object_assign. Otherwise we can avoid that. + case QualType::PCK_ARCStrong: + // We need to retain the copied value for __strong direct captures. + // If it's a block pointer, we have to copy the block and assign that to + // the destination pointer, so we might as well use _Block_object_assign. + // Otherwise we can avoid that. return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong : BlockCaptureEntityKind::BlockObject, Flags); - } + case QualType::PCK_Trivial: + case QualType::PCK_VolatileTrivial: { + if (!T->isObjCRetainableType()) + // For all other types, the memcpy is fine. + return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); - // Non-ARC captures of retainable pointers are strong and - // therefore require a call to _Block_object_assign. - if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount) - return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + // Special rules for ARC captures: + Qualifiers QS = T.getQualifiers(); - // Otherwise the memcpy is fine. - return std::make_pair(BlockCaptureEntityKind::None, Flags); + // Non-ARC captures of retainable pointers are strong and + // therefore require a call to _Block_object_assign. + if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount) + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + + // Otherwise the memcpy is fine. + return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); + } + } + llvm_unreachable("after exhaustive PrimitiveCopyKind switch"); } /// Find the set of block captures that need to be explicitly copied or destroy. @@ -1602,6 +1585,64 @@ static void findBlockCapturedManagedEntities( } } +namespace { +/// Release a __block variable. +struct CallBlockRelease final : EHScopeStack::Cleanup { + Address Addr; + BlockFieldFlags FieldFlags; + bool LoadBlockVarAddr; + + CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue) + : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {} + + void Emit(CodeGenFunction &CGF, Flags flags) override { + llvm::Value *BlockVarAddr; + if (LoadBlockVarAddr) { + BlockVarAddr = CGF.Builder.CreateLoad(Addr); + BlockVarAddr = CGF.Builder.CreateBitCast(BlockVarAddr, CGF.VoidPtrTy); + } else { + BlockVarAddr = Addr.getPointer(); + } + + CGF.BuildBlockRelease(BlockVarAddr, FieldFlags); + } +}; +} // end anonymous namespace + +static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind, + Address Field, QualType CaptureType, + BlockFieldFlags Flags, bool EHOnly, + CodeGenFunction &CGF) { + switch (CaptureKind) { + case BlockCaptureEntityKind::CXXRecord: + case BlockCaptureEntityKind::ARCWeak: + case BlockCaptureEntityKind::NonTrivialCStruct: + case BlockCaptureEntityKind::ARCStrong: { + if (CaptureType.isDestructedType() && + (!EHOnly || CGF.needsEHCleanup(CaptureType.isDestructedType()))) { + CodeGenFunction::Destroyer *Destroyer = + CaptureKind == BlockCaptureEntityKind::ARCStrong + ? CodeGenFunction::destroyARCStrongImprecise + : CGF.getDestroyer(CaptureType.isDestructedType()); + CleanupKind Kind = + EHOnly ? EHCleanup + : CGF.getCleanupKind(CaptureType.isDestructedType()); + CGF.pushDestroy(Kind, Field, CaptureType, Destroyer, Kind & EHCleanup); + } + break; + } + case BlockCaptureEntityKind::BlockObject: { + if (!EHOnly || CGF.getLangOpts().Exceptions) { + CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup; + CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true); + } + break; + } + case BlockCaptureEntityKind::None: + llvm_unreachable("unexpected BlockCaptureEntityKind"); + } +} + /// Generate the copy-helper function for a block closure object: /// static void block_copy_helper(block_t *dst, block_t *src); /// The runtime will have previously initialized 'dst' by doing a @@ -1644,7 +1685,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { false, false); - CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); StartFunction(FD, C.VoidTy, Fn, FI, args); ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; @@ -1665,6 +1706,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { for (const auto &CopiedCapture : CopiedCaptures) { const BlockDecl::Capture &CI = CopiedCapture.CI; const CGBlockInfo::Capture &capture = CopiedCapture.Capture; + QualType captureType = CI.getVariable()->getType(); BlockFieldFlags flags = CopiedCapture.Flags; unsigned index = capture.getIndex(); @@ -1677,6 +1719,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr()); } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { EmitARCCopyWeak(dstField, srcField); + // If this is a C struct that requires non-trivial copy construction, emit a + // call to its copy constructor. + } else if (CopiedCapture.Kind == + BlockCaptureEntityKind::NonTrivialCStruct) { + QualType varType = CI.getVariable()->getType(); + callCStructCopyConstructor(MakeAddrLValue(dstField, varType), + MakeAddrLValue(srcField, varType)); } else { llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { @@ -1695,9 +1744,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { } else { EmitARCRetainNonBlock(srcValue); - // We don't need this anymore, so kill it. It's not quite - // worth the annoyance to avoid creating it in the first place. - cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent(); + // Unless EH cleanup is required, we don't need this anymore, so kill + // it. It's not quite worth the annoyance to avoid creating it in the + // first place. + if (!needsEHCleanup(captureType.isDestructedType())) + cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent(); } } else { assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject); @@ -1725,6 +1776,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { } } } + + // Ensure that we destroy the copied object if an exception is thrown later + // in the helper function. + pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true, + *this); } FinishFunction(); @@ -1732,50 +1788,51 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } +static BlockFieldFlags +getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI, + QualType T) { + BlockFieldFlags Flags = BLOCK_FIELD_IS_OBJECT; + if (T->isBlockPointerType()) + Flags = BLOCK_FIELD_IS_BLOCK; + return Flags; +} + static std::pair<BlockCaptureEntityKind, BlockFieldFlags> computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, const LangOptions &LangOpts) { - BlockFieldFlags Flags; if (CI.isByRef()) { - Flags = BLOCK_FIELD_IS_BYREF; + BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); } - if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) { - if (Record->hasTrivialDestructor()) - return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); + switch (T.isDestructedType()) { + case QualType::DK_cxx_destructor: return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); - } - - // Other types don't need to be destroy explicitly. - if (!T->isObjCRetainableType()) - return std::make_pair(BlockCaptureEntityKind::None, Flags); - - Flags = BLOCK_FIELD_IS_OBJECT; - if (T->isBlockPointerType()) - Flags = BLOCK_FIELD_IS_BLOCK; - - // Special rules for ARC captures. - Qualifiers QS = T.getQualifiers(); - - // Use objc_storeStrong for __strong direct captures; the - // dynamic tools really like it when we do this. - if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) - return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags); - - // Support __weak direct captures. - if (QS.getObjCLifetime() == Qualifiers::OCL_Weak) - return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); - - // Non-ARC captures are strong, and we need to use - // _Block_object_dispose. - if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount) - return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); - - // Otherwise, we have nothing to do. - return std::make_pair(BlockCaptureEntityKind::None, Flags); + case QualType::DK_objc_strong_lifetime: + // Use objc_storeStrong for __strong direct captures; the + // dynamic tools really like it when we do this. + return std::make_pair(BlockCaptureEntityKind::ARCStrong, + getBlockFieldFlagsForObjCObjectPointer(CI, T)); + case QualType::DK_objc_weak_lifetime: + // Support __weak direct captures. + return std::make_pair(BlockCaptureEntityKind::ARCWeak, + getBlockFieldFlagsForObjCObjectPointer(CI, T)); + case QualType::DK_nontrivial_c_struct: + return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct, + BlockFieldFlags()); + case QualType::DK_none: { + // Non-ARC captures are strong, and we need to use _Block_object_dispose. + if (T->isObjCRetainableType() && !T.getQualifiers().hasObjCLifetime() && + !LangOpts.ObjCAutoRefCount) + return std::make_pair(BlockCaptureEntityKind::BlockObject, + getBlockFieldFlagsForObjCObjectPointer(CI, T)); + // Otherwise, we have nothing to do. + return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); + } + } + llvm_unreachable("after exhaustive DestructionKind switch"); } /// Generate the destroy-helper function for a block closure object: @@ -1814,7 +1871,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { nullptr, SC_Static, false, false); - CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); StartFunction(FD, C.VoidTy, Fn, FI, args); ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; @@ -1839,29 +1896,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { Address srcField = Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); - // If the captured record has a destructor then call it. - if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) { - const auto *Dtor = - CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor(); - PushDestructorCleanup(Dtor, srcField); - - // If this is a __weak capture, emit the release directly. - } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { - EmitARCDestroyWeak(srcField); - - // Destroy strong objects with a call if requested. - } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { - EmitARCDestroyStrong(srcField, ARCImpreciseLifetime); - - // Otherwise we call _Block_object_dispose. It wouldn't be too - // hard to just emit this as a cleanup if we wanted to make sure - // that things were done in reverse. - } else { - assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject); - llvm::Value *value = Builder.CreateLoad(srcField); - value = Builder.CreateBitCast(value, VoidPtrTy); - BuildBlockRelease(value, flags); - } + pushCaptureCleanup(DestroyedCapture.Kind, srcField, + CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this); } cleanups.ForceCleanup(); @@ -2020,6 +2056,36 @@ public: id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr()); } }; + +/// Emits the copy/dispose helpers for a __block variable that is a non-trivial +/// C struct. +class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers { + QualType VarType; + +public: + NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type) + : BlockByrefHelpers(alignment), VarType(type) {} + + void emitCopy(CodeGenFunction &CGF, Address destField, + Address srcField) override { + CGF.callCStructMoveConstructor(CGF.MakeAddrLValue(destField, VarType), + CGF.MakeAddrLValue(srcField, VarType)); + } + + bool needsDispose() const override { + return VarType.isDestructedType(); + } + + void emitDispose(CodeGenFunction &CGF, Address field) override { + EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin(); + CGF.pushDestroy(VarType.isDestructedType(), field, VarType); + CGF.PopCleanupBlocks(cleanupDepth); + } + + void profileImpl(llvm::FoldingSetNodeID &id) const override { + id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr()); + } +}; } // end anonymous namespace static llvm::Constant * @@ -2059,7 +2125,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, SC_Static, false, false); - CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); CGF.StartFunction(FD, R, Fn, FI, args); @@ -2133,7 +2199,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, SC_Static, false, false); - CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); CGF.StartFunction(FD, R, Fn, FI, args); @@ -2205,6 +2271,13 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType, CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr)); } + // If type is a non-trivial C struct type that is non-trivial to + // destructly move or destroy, build the copy and dispose helpers. + if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct || + type.isDestructedType() == QualType::DK_nontrivial_c_struct) + return ::buildByrefHelpers( + CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type)); + // Otherwise, if we don't have a retainable type, there's nothing to do. // that the runtime does extra copies. if (!type->isObjCRetainableType()) return nullptr; @@ -2503,30 +2576,10 @@ void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) { EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors? } -namespace { - /// Release a __block variable. - struct CallBlockRelease final : EHScopeStack::Cleanup { - llvm::Value *Addr; - CallBlockRelease(llvm::Value *Addr) : Addr(Addr) {} - - void Emit(CodeGenFunction &CGF, Flags flags) override { - // Should we be passing FIELD_IS_WEAK here? - CGF.BuildBlockRelease(Addr, BLOCK_FIELD_IS_BYREF); - } - }; -} // end anonymous namespace - -/// Enter a cleanup to destroy a __block variable. Note that this -/// cleanup should be a no-op if the variable hasn't left the stack -/// yet; if a cleanup is required for the variable itself, that needs -/// to be done externally. -void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) { - // We don't enter this cleanup if we're in pure-GC mode. - if (CGM.getLangOpts().getGC() == LangOptions::GCOnly) - return; - - EHStack.pushCleanup<CallBlockRelease>(NormalAndEHCleanup, - emission.Addr.getPointer()); +void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr, + BlockFieldFlags Flags, + bool LoadBlockVarAddr) { + EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr); } /// Adjust the declaration of something from the blocks API. @@ -2559,11 +2612,11 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM, } } - if (!CGM.getLangOpts().BlocksRuntimeOptional) - return; - - if (GV->isDeclaration() && GV->hasExternalLinkage()) + if (CGM.getLangOpts().BlocksRuntimeOptional && GV->isDeclaration() && + GV->hasExternalLinkage()) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); + + CGM.setDSOLocal(GV); } llvm::Constant *CodeGenModule::getBlockObjectDispose() { diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h index 80e255f75417..5a8e960ffcc1 100644 --- a/lib/CodeGen/CGBlocks.h +++ b/lib/CodeGen/CGBlocks.h @@ -54,6 +54,7 @@ enum BlockByrefFlags { }; enum BlockLiteralFlags { + BLOCK_IS_NOESCAPE = (1 << 23), BLOCK_HAS_COPY_DISPOSE = (1 << 25), BLOCK_HAS_CXX_OBJ = (1 << 26), BLOCK_IS_GLOBAL = (1 << 28), @@ -214,7 +215,8 @@ public: /// no non-constant captures. bool CanBeGlobal : 1; - /// True if the block needs a custom copy or dispose function. + /// True if the block has captures that would necessitate custom copy or + /// dispose helper functions if the block were escaping. bool NeedsCopyDispose : 1; /// HasCXXObject - True if the block's custom copy/dispose functions @@ -276,6 +278,11 @@ public: } CGBlockInfo(const BlockDecl *blockDecl, StringRef Name); + + // Indicates whether the block needs a custom copy or dispose function. + bool needsCopyDisposeHelpers() const { + return NeedsCopyDispose && !Block->doesNotEscape(); + } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 61fe4aac3afa..d2e5eb256d3b 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -20,7 +20,7 @@ namespace CodeGen { class CodeGenFunction; -/// \brief This is an IRBuilder insertion helper that forwards to +/// This is an IRBuilder insertion helper that forwards to /// CodeGenFunction::InsertHelper, which adds necessary metadata to /// instructions. class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter { @@ -29,7 +29,7 @@ public: explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {} protected: - /// \brief This forwards to CodeGenFunction::InsertHelper. + /// This forwards to CodeGenFunction::InsertHelper. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, llvm::BasicBlock::iterator InsertPt) const; @@ -244,6 +244,21 @@ public: Addr.getAlignment().alignmentAtOffset(Offset)); } + using CGBuilderBaseTy::CreateConstInBoundsGEP2_32; + Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, + unsigned Idx1, const llvm::DataLayout &DL, + const llvm::Twine &Name = "") { + auto *GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32( + Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name)); + llvm::APInt Offset( + DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0, + /*IsSigned=*/true); + if (!GEP->accumulateConstantOffset(DL, Offset)) + llvm_unreachable("offset of GEP with constants is always computable"); + return Address(GEP, Addr.getAlignment().alignmentAtOffset( + CharUnits::fromQuantity(Offset.getSExtValue()))); + } + llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset, const llvm::Twine &Name = "") { assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); @@ -258,23 +273,23 @@ public: using CGBuilderBaseTy::CreateMemCpy; llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { - auto Align = std::min(Dest.getAlignment(), Src.getAlignment()); - return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size, - Align.getQuantity(), IsVolatile); + return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(), + Src.getPointer(), Src.getAlignment().getQuantity(), + Size,IsVolatile); } llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size, bool IsVolatile = false) { - auto Align = std::min(Dest.getAlignment(), Src.getAlignment()); - return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size, - Align.getQuantity(), IsVolatile); + return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(), + Src.getPointer(), Src.getAlignment().getQuantity(), + Size, IsVolatile); } using CGBuilderBaseTy::CreateMemMove; llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { - auto Align = std::min(Dest.getAlignment(), Src.getAlignment()); - return CreateMemMove(Dest.getPointer(), Src.getPointer(), Size, - Align.getQuantity(), IsVolatile); + return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getQuantity(), + Src.getPointer(), Src.getAlignment().getQuantity(), + Size, IsVolatile); } using CGBuilderBaseTy::CreateMemSet; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index ba54f8342f1b..0892e84a044c 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -14,6 +14,7 @@ #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -188,7 +189,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, return RValue::get(Result); } -/// @brief Utility to insert an atomic cmpxchg instruction. +/// Utility to insert an atomic cmpxchg instruction. /// /// @param CGF The current codegen function. /// @param E Builtin call expression to convert to cmpxchg. @@ -319,7 +320,7 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); } -/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* +/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* /// depending on IntrinsicID. /// /// \arg CGF The current codegen function. @@ -384,7 +385,7 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { } // The encompassing type must have a width greater than or equal to the width - // of the specified types. Aditionally, if the encompassing type is signed, + // of the specified types. Additionally, if the encompassing type is signed, // its width must be strictly greater than the width of any unsigned types // given. unsigned Width = 0; @@ -478,13 +479,261 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. Value *Min = Builder.getInt1((Type & 2) != 0); - // For GCC compatability, __builtin_object_size treat NULL as unknown size. + // For GCC compatibility, __builtin_object_size treat NULL as unknown size. Value *NullIsUnknown = Builder.getTrue(); return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); } -// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we -// handle them here. +namespace { +/// A struct to generically desribe a bit test intrinsic. +struct BitTest { + enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; + enum InterlockingKind : uint8_t { + Unlocked, + Sequential, + Acquire, + Release, + NoFence + }; + + ActionKind Action; + InterlockingKind Interlocking; + bool Is64Bit; + + static BitTest decodeBitTestBuiltin(unsigned BuiltinID); +}; +} // namespace + +BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { + switch (BuiltinID) { + // Main portable variants. + case Builtin::BI_bittest: + return {TestOnly, Unlocked, false}; + case Builtin::BI_bittestandcomplement: + return {Complement, Unlocked, false}; + case Builtin::BI_bittestandreset: + return {Reset, Unlocked, false}; + case Builtin::BI_bittestandset: + return {Set, Unlocked, false}; + case Builtin::BI_interlockedbittestandreset: + return {Reset, Sequential, false}; + case Builtin::BI_interlockedbittestandset: + return {Set, Sequential, false}; + + // X86-specific 64-bit variants. + case Builtin::BI_bittest64: + return {TestOnly, Unlocked, true}; + case Builtin::BI_bittestandcomplement64: + return {Complement, Unlocked, true}; + case Builtin::BI_bittestandreset64: + return {Reset, Unlocked, true}; + case Builtin::BI_bittestandset64: + return {Set, Unlocked, true}; + case Builtin::BI_interlockedbittestandreset64: + return {Reset, Sequential, true}; + case Builtin::BI_interlockedbittestandset64: + return {Set, Sequential, true}; + + // ARM/AArch64-specific ordering variants. + case Builtin::BI_interlockedbittestandset_acq: + return {Set, Acquire, false}; + case Builtin::BI_interlockedbittestandset_rel: + return {Set, Release, false}; + case Builtin::BI_interlockedbittestandset_nf: + return {Set, NoFence, false}; + case Builtin::BI_interlockedbittestandreset_acq: + return {Reset, Acquire, false}; + case Builtin::BI_interlockedbittestandreset_rel: + return {Reset, Release, false}; + case Builtin::BI_interlockedbittestandreset_nf: + return {Reset, NoFence, false}; + } + llvm_unreachable("expected only bittest intrinsics"); +} + +static char bitActionToX86BTCode(BitTest::ActionKind A) { + switch (A) { + case BitTest::TestOnly: return '\0'; + case BitTest::Complement: return 'c'; + case BitTest::Reset: return 'r'; + case BitTest::Set: return 's'; + } + llvm_unreachable("invalid action"); +} + +static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, + BitTest BT, + const CallExpr *E, Value *BitBase, + Value *BitPos) { + char Action = bitActionToX86BTCode(BT.Action); + char SizeSuffix = BT.Is64Bit ? 'q' : 'l'; + + // Build the assembly. + SmallString<64> Asm; + raw_svector_ostream AsmOS(Asm); + if (BT.Interlocking != BitTest::Unlocked) + AsmOS << "lock "; + AsmOS << "bt"; + if (Action) + AsmOS << Action; + AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}"; + + // Build the constraints. FIXME: We should support immediates when possible. + std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}"; + llvm::IntegerType *IntType = llvm::IntegerType::get( + CGF.getLLVMContext(), + CGF.getContext().getTypeSize(E->getArg(1)->getType())); + llvm::Type *IntPtrType = IntType->getPointerTo(); + llvm::FunctionType *FTy = + llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false); + + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); +} + +static llvm::AtomicOrdering +getBitTestAtomicOrdering(BitTest::InterlockingKind I) { + switch (I) { + case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic; + case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent; + case BitTest::Acquire: return llvm::AtomicOrdering::Acquire; + case BitTest::Release: return llvm::AtomicOrdering::Release; + case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic; + } + llvm_unreachable("invalid interlocking"); +} + +/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of +/// bits and a bit position and read and optionally modify the bit at that +/// position. The position index can be arbitrarily large, i.e. it can be larger +/// than 31 or 63, so we need an indexed load in the general case. +static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, + unsigned BuiltinID, + const CallExpr *E) { + Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); + Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); + + BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID); + + // X86 has special BT, BTC, BTR, and BTS instructions that handle the array + // indexing operation internally. Use them if possible. + llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch(); + if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) + return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos); + + // Otherwise, use generic code to load one byte and test the bit. Use all but + // the bottom three bits as the array index, and the bottom three bits to form + // a mask. + // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0; + Value *ByteIndex = CGF.Builder.CreateAShr( + BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx"); + Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); + Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, + ByteIndex, "bittest.byteaddr"), + CharUnits::One()); + Value *PosLow = + CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), + llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); + + // The updating instructions will need a mask. + Value *Mask = nullptr; + if (BT.Action != BitTest::TestOnly) { + Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow, + "bittest.mask"); + } + + // Check the action and ordering of the interlocked intrinsics. + llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking); + + Value *OldByte = nullptr; + if (Ordering != llvm::AtomicOrdering::NotAtomic) { + // Emit a combined atomicrmw load/store operation for the interlocked + // intrinsics. + llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or; + if (BT.Action == BitTest::Reset) { + Mask = CGF.Builder.CreateNot(Mask); + RMWOp = llvm::AtomicRMWInst::And; + } + OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask, + Ordering); + } else { + // Emit a plain load for the non-interlocked intrinsics. + OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); + Value *NewByte = nullptr; + switch (BT.Action) { + case BitTest::TestOnly: + // Don't store anything. + break; + case BitTest::Complement: + NewByte = CGF.Builder.CreateXor(OldByte, Mask); + break; + case BitTest::Reset: + NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask)); + break; + case BitTest::Set: + NewByte = CGF.Builder.CreateOr(OldByte, Mask); + break; + } + if (NewByte) + CGF.Builder.CreateStore(NewByte, ByteAddr); + } + + // However we loaded the old byte, either by plain load or atomicrmw, shift + // the bit into the low position and mask it to 0 or 1. + Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr"); + return CGF.Builder.CreateAnd( + ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"); +} + +namespace { +enum class MSVCSetJmpKind { + _setjmpex, + _setjmp3, + _setjmp +}; +} + +/// MSVC handles setjmp a bit differently on different platforms. On every +/// architecture except 32-bit x86, the frame address is passed. On x86, extra +/// parameters can be passed as variadic arguments, but we always pass none. +static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, + const CallExpr *E) { + llvm::Value *Arg1 = nullptr; + llvm::Type *Arg1Ty = nullptr; + StringRef Name; + bool IsVarArg = false; + if (SJKind == MSVCSetJmpKind::_setjmp3) { + Name = "_setjmp3"; + Arg1Ty = CGF.Int32Ty; + Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0); + IsVarArg = true; + } else { + Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; + Arg1Ty = CGF.Int8PtrTy; + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); + } + + // Mark the call site and declaration with ReturnsTwice. + llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty}; + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); + llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, + ReturnsTwiceAttr, /*Local=*/true); + + llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( + CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); + llvm::Value *Args[] = {Buf, Arg1}; + llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); + CS.setAttributes(ReturnsTwiceAttr); + return RValue::get(CS.getInstruction()); +} + +// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, +// we handle them here. enum class CodeGenFunction::MSVCIntrin { _BitScanForward, _BitScanReverse, @@ -496,7 +745,6 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, - _interlockedbittestandset, __fastfail, }; @@ -564,22 +812,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); - case MSVCIntrin::_interlockedbittestandset: { - llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); - llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Or, Addr, - Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), - llvm::AtomicOrdering::SequentiallyConsistent); - // Shift the relevant bit to the least significant position, truncate to - // the result type, and test the low bit. - llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); - llvm::Value *Truncated = - Builder.CreateTrunc(Shifted, ConvertType(E->getType())); - return Builder.CreateAnd(Truncated, - ConstantInt::get(Truncated->getType(), 1)); - } - case MSVCIntrin::_InterlockedDecrement: { llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( @@ -915,7 +1147,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); } - Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy); + // Negate the product if it would be negative in infinite precision. + Result = CGF.Builder.CreateSelect( + IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); + + Result = CGF.Builder.CreateTrunc(Result, ResTy); } assert(Overflow && Result && "Missing overflow or result"); @@ -926,6 +1162,96 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, return RValue::get(Overflow); } +static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, + Value *&RecordPtr, CharUnits Align, Value *Func, + int Lvl) { + const auto *RT = RType->getAs<RecordType>(); + ASTContext &Context = CGF.getContext(); + RecordDecl *RD = RT->getDecl()->getDefinition(); + ASTContext &Ctx = RD->getASTContext(); + const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD); + std::string Pad = std::string(Lvl * 4, ' '); + + Value *GString = + CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n"); + Value *Res = CGF.Builder.CreateCall(Func, {GString}); + + static llvm::DenseMap<QualType, const char *> Types; + if (Types.empty()) { + Types[Context.CharTy] = "%c"; + Types[Context.BoolTy] = "%d"; + Types[Context.SignedCharTy] = "%hhd"; + Types[Context.UnsignedCharTy] = "%hhu"; + Types[Context.IntTy] = "%d"; + Types[Context.UnsignedIntTy] = "%u"; + Types[Context.LongTy] = "%ld"; + Types[Context.UnsignedLongTy] = "%lu"; + Types[Context.LongLongTy] = "%lld"; + Types[Context.UnsignedLongLongTy] = "%llu"; + Types[Context.ShortTy] = "%hd"; + Types[Context.UnsignedShortTy] = "%hu"; + Types[Context.VoidPtrTy] = "%p"; + Types[Context.FloatTy] = "%f"; + Types[Context.DoubleTy] = "%f"; + Types[Context.LongDoubleTy] = "%Lf"; + Types[Context.getPointerType(Context.CharTy)] = "%s"; + Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s"; + } + + for (const auto *FD : RD->fields()) { + uint64_t Off = RL.getFieldOffset(FD->getFieldIndex()); + Off = Ctx.toCharUnitsFromBits(Off).getQuantity(); + + Value *FieldPtr = RecordPtr; + if (RD->isUnion()) + FieldPtr = CGF.Builder.CreatePointerCast( + FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType()))); + else + FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr, + FD->getFieldIndex()); + + GString = CGF.Builder.CreateGlobalStringPtr( + llvm::Twine(Pad) + .concat(FD->getType().getAsString()) + .concat(llvm::Twine(' ')) + .concat(FD->getNameAsString()) + .concat(" : ") + .str()); + Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); + Res = CGF.Builder.CreateAdd(Res, TmpRes); + + QualType CanonicalType = + FD->getType().getUnqualifiedType().getCanonicalType(); + + // We check whether we are in a recursive type + if (CanonicalType->isRecordType()) { + Value *TmpRes = + dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1); + Res = CGF.Builder.CreateAdd(TmpRes, Res); + continue; + } + + // We try to determine the best format to print the current field + llvm::Twine Format = Types.find(CanonicalType) == Types.end() + ? Types[Context.VoidPtrTy] + : Types[CanonicalType]; + + Address FieldAddress = Address(FieldPtr, Align); + FieldPtr = CGF.Builder.CreateLoad(FieldAddress); + + // FIXME Need to handle bitfield here + GString = CGF.Builder.CreateGlobalStringPtr( + Format.concat(llvm::Twine('\n')).str()); + TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr}); + Res = CGF.Builder.CreateAdd(Res, TmpRes); + } + + GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n"); + Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); + Res = CGF.Builder.CreateAdd(Res, TmpRes); + return Res; +} + RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -962,6 +1288,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_copysign: case Builtin::BI__builtin_copysignf: case Builtin::BI__builtin_copysignl: + case Builtin::BI__builtin_copysignf128: return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); case Builtin::BIcos: @@ -994,6 +1321,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: case Builtin::BI__builtin_fabsl: + case Builtin::BI__builtin_fabsf128: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); case Builtin::BIfloor: @@ -1154,16 +1482,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_abs: case Builtin::BI__builtin_labs: case Builtin::BI__builtin_llabs: { + // X < 0 ? -X : X + // The negation has 'nsw' because abs of INT_MIN is undefined. Value *ArgValue = EmitScalarExpr(E->getArg(0)); - - Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); - Value *CmpResult = - Builder.CreateICmpSGE(ArgValue, - llvm::Constant::getNullValue(ArgValue->getType()), - "abscond"); - Value *Result = - Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); - + Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg"); + Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType()); + Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); + Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs"); return RValue::get(Result); } case Builtin::BI__builtin_conj: @@ -1190,6 +1515,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(ComplexVal.first); } + case Builtin::BI__builtin_dump_struct: { + Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); + CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); + + const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts(); + QualType Arg0Type = Arg0->getType()->getPointeeType(); + + Value *RecordPtr = EmitScalarExpr(Arg0); + Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0); + return RValue::get(Res); + } + case Builtin::BI__builtin_cimag: case Builtin::BI__builtin_cimagf: case Builtin::BI__builtin_cimagl: @@ -1300,20 +1637,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Type *ArgType = Val->getType(); Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); - Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); - Value *ArgZero = llvm::Constant::getNullValue(ArgType); - + unsigned ArgWidth = ArgType->getIntegerBitWidth(); Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - Shift = Builder.CreateAnd(Shift, Mask); - Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); - - Value *RightShifted = Builder.CreateLShr(Val, Shift); - Value *LeftShifted = Builder.CreateShl(Val, LeftShift); - Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); - Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); - Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); + Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask); + Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); + Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); + Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); + Value *Result = Builder.CreateOr(LeftShifted, RightShifted); return RValue::get(Result); } case Builtin::BI_rotl8: @@ -1326,20 +1657,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Type *ArgType = Val->getType(); Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); - Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); - Value *ArgZero = llvm::Constant::getNullValue(ArgType); - + unsigned ArgWidth = ArgType->getIntegerBitWidth(); Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - Shift = Builder.CreateAnd(Shift, Mask); - Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); - - Value *LeftShifted = Builder.CreateShl(Val, Shift); - Value *RightShifted = Builder.CreateLShr(Val, RightShift); - Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); - Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); - Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); + Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask); + Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); + Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); + Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); + Value *Result = Builder.CreateOr(LeftShifted, RightShifted); return RValue::get(Result); } case Builtin::BI__builtin_unpredictable: { @@ -1735,6 +2060,63 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest.getPointer()); } + case Builtin::BI__builtin_wmemcmp: { + // The MSVC runtime library does not provide a definition of wmemcmp, so we + // need an inline implementation. + if (!getTarget().getTriple().isOSMSVCRT()) + break; + + llvm::Type *WCharTy = ConvertType(getContext().WCharTy); + + Value *Dst = EmitScalarExpr(E->getArg(0)); + Value *Src = EmitScalarExpr(E->getArg(1)); + Value *Size = EmitScalarExpr(E->getArg(2)); + + BasicBlock *Entry = Builder.GetInsertBlock(); + BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt"); + BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt"); + BasicBlock *Next = createBasicBlock("wmemcmp.next"); + BasicBlock *Exit = createBasicBlock("wmemcmp.exit"); + Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); + Builder.CreateCondBr(SizeEq0, Exit, CmpGT); + + EmitBlock(CmpGT); + PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2); + DstPhi->addIncoming(Dst, Entry); + PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2); + SrcPhi->addIncoming(Src, Entry); + PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); + SizePhi->addIncoming(Size, Entry); + CharUnits WCharAlign = + getContext().getTypeAlignInChars(getContext().WCharTy); + Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign); + Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign); + Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh); + Builder.CreateCondBr(DstGtSrc, Exit, CmpLT); + + EmitBlock(CmpLT); + Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh); + Builder.CreateCondBr(DstLtSrc, Exit, Next); + + EmitBlock(Next); + Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1); + Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1); + Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); + Value *NextSizeEq0 = + Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); + Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT); + DstPhi->addIncoming(NextDst, Next); + SrcPhi->addIncoming(NextSrc, Next); + SizePhi->addIncoming(NextSize, Next); + + EmitBlock(Exit); + PHINode *Ret = Builder.CreatePHI(IntTy, 4); + Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry); + Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT); + Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT); + Ret->addIncoming(ConstantInt::get(IntTy, 0), Next); + return RValue::get(Ret); + } case Builtin::BI__builtin_dwarf_cfa: { // The offset in bytes from the first argument to the CFA. // @@ -2033,7 +2415,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__sync_synchronize: { // We assume this is supposed to correspond to a C++0x-style // sequentially-consistent fence (i.e. this is only usable for - // synchonization, not device I/O or anything like that). This intrinsic + // synchronization, not device I/O or anything like that). This intrinsic // is really badly designed in the sense that in theory, there isn't // any way to safely use it... but in practice, it mostly works // to use it with non-atomic loads and stores to get acquire/release @@ -2548,11 +2930,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_addressof: return RValue::get(EmitLValue(E->getArg(0)).getPointer()); case Builtin::BI__builtin_operator_new: - return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), - E->getArg(0), false); + return EmitBuiltinNewDeleteCall( + E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); case Builtin::BI__builtin_operator_delete: - return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), - E->getArg(0), true); + return EmitBuiltinNewDeleteCall( + E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); + case Builtin::BI__noop: // __noop always evaluates to an integer literal zero. return RValue::get(ConstantInt::get(IntTy, 0)); @@ -2639,9 +3022,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedXor16: case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); + + case Builtin::BI_bittest64: + case Builtin::BI_bittest: + case Builtin::BI_bittestandcomplement64: + case Builtin::BI_bittestandcomplement: + case Builtin::BI_bittestandreset64: + case Builtin::BI_bittestandreset: + case Builtin::BI_bittestandset64: + case Builtin::BI_bittestandset: + case Builtin::BI_interlockedbittestandreset: + case Builtin::BI_interlockedbittestandreset64: + case Builtin::BI_interlockedbittestandset64: case Builtin::BI_interlockedbittestandset: - return RValue::get( - EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); + case Builtin::BI_interlockedbittestandset_acq: + case Builtin::BI_interlockedbittestandset_rel: + case Builtin::BI_interlockedbittestandset_nf: + case Builtin::BI_interlockedbittestandreset_acq: + case Builtin::BI_interlockedbittestandreset_rel: + case Builtin::BI_interlockedbittestandreset_nf: + return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); case Builtin::BI__exception_code: case Builtin::BI_exception_code: @@ -2652,59 +3052,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__abnormal_termination: case Builtin::BI_abnormal_termination: return RValue::get(EmitSEHAbnormalTermination()); - case Builtin::BI_setjmpex: { - if (getTarget().getTriple().isOSMSVCRT()) { - llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; - llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( - getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReturnsTwice); - llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), - "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); - llvm::Value *Buf = Builder.CreateBitOrPointerCast( - EmitScalarExpr(E->getArg(0)), Int8PtrTy); - llvm::Value *FrameAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), - ConstantInt::get(Int32Ty, 0)); - llvm::Value *Args[] = {Buf, FrameAddr}; - llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); - CS.setAttributes(ReturnsTwiceAttr); - return RValue::get(CS.getInstruction()); - } + case Builtin::BI_setjmpex: + if (getTarget().getTriple().isOSMSVCRT()) + return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); break; - } - case Builtin::BI_setjmp: { + case Builtin::BI_setjmp: if (getTarget().getTriple().isOSMSVCRT()) { - llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( - getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReturnsTwice); - llvm::Value *Buf = Builder.CreateBitOrPointerCast( - EmitScalarExpr(E->getArg(0)), Int8PtrTy); - llvm::CallSite CS; - if (getTarget().getTriple().getArch() == llvm::Triple::x86) { - llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; - llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), - "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); - llvm::Value *Count = ConstantInt::get(IntTy, 0); - llvm::Value *Args[] = {Buf, Count}; - CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); - } else { - llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; - llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), - "_setjmp", ReturnsTwiceAttr, /*Local=*/true); - llvm::Value *FrameAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), - ConstantInt::get(Int32Ty, 0)); - llvm::Value *Args[] = {Buf, FrameAddr}; - CS = EmitRuntimeCallOrInvoke(SetJmp, Args); - } - CS.setAttributes(ReturnsTwiceAttr); - return RValue::get(CS.getInstruction()); + if (getTarget().getTriple().getArch() == llvm::Triple::x86) + return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); + else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) + return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); + return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E); } break; - } case Builtin::BI__GetExceptionInfo: { if (llvm::GlobalVariable *GV = @@ -2732,6 +3092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); case Builtin::BI__builtin_coro_frame: return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); + case Builtin::BI__builtin_coro_noop: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop); case Builtin::BI__builtin_coro_free: return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); case Builtin::BI__builtin_coro_destroy: @@ -2882,11 +3244,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions case Builtin::BIget_pipe_num_packets: case Builtin::BIget_pipe_max_packets: { - const char *Name; + const char *BaseName; + const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>(); if (BuiltinID == Builtin::BIget_pipe_num_packets) - Name = "__get_pipe_num_packets"; + BaseName = "__get_pipe_num_packets"; else - Name = "__get_pipe_max_packets"; + BaseName = "__get_pipe_max_packets"; + auto Name = std::string(BaseName) + + std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); // Building the generic function prototype. Value *Arg0 = EmitScalarExpr(E->getArg(0)); @@ -2992,10 +3357,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return Ptr; }; - // Could have events and/or vaargs. + // Could have events and/or varargs. if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. - Name = "__enqueue_kernel_vaargs"; + Name = "__enqueue_kernel_varargs"; auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); llvm::Value *Kernel = @@ -3063,7 +3428,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Pass the number of variadics to the runtime function too. Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); ArgTys.push_back(Int32Ty); - Name = "__enqueue_kernel_events_vaargs"; + Name = "__enqueue_kernel_events_varargs"; auto *PtrToSizeArray = CreateArrayForSizeVar(7); Args.push_back(PtrToSizeArray); @@ -3104,7 +3469,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, false), - "__get_kernel_preferred_work_group_multiple_impl"), + "__get_kernel_preferred_work_group_size_multiple_impl"), {Kernel, Arg})); } case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: @@ -3175,6 +3540,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__xray_customevent: { if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); + + if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::Custom)) + return RValue::getIgnored(); + if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) return RValue::getIgnored(); @@ -3198,6 +3568,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); } + case Builtin::BI__xray_typedevent: { + // TODO: There should be a way to always emit events even if the current + // function is not instrumented. Losing events in a stream can cripple + // a trace. + if (!ShouldXRayInstrumentFunction()) + return RValue::getIgnored(); + + if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::Typed)) + return RValue::getIgnored(); + + if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) + if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents()) + return RValue::getIgnored(); + + Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent); + auto FTy = F->getFunctionType(); + auto Arg0 = EmitScalarExpr(E->getArg(0)); + auto PTy0 = FTy->getParamType(0); + if (PTy0 != Arg0->getType()) + Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0); + auto Arg1 = E->getArg(1); + auto Arg1Val = EmitScalarExpr(Arg1); + auto Arg1Ty = Arg1->getType(); + auto PTy1 = FTy->getParamType(1); + if (PTy1 != Arg1Val->getType()) { + if (Arg1Ty->isArrayType()) + Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer(); + else + Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); + } + auto Arg2 = EmitScalarExpr(E->getArg(2)); + auto PTy2 = FTy->getParamType(2); + if (PTy2 != Arg2->getType()) + Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2); + return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2})); + } + case Builtin::BI__builtin_ms_va_start: case Builtin::BI__builtin_ms_va_end: return RValue::get( @@ -3246,6 +3654,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // can move this up to the beginning of the function. checkTargetFeatures(E, FD); + if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) + LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); + // See if we have a target specific intrinsic. const char *Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; @@ -3253,7 +3664,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); if (!Prefix.empty()) { IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); - // NOTE we dont need to perform a compatibility flag check here since the + // NOTE we don't need to perform a compatibility flag check here since the // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. if (IntrinsicID == Intrinsic::not_intrinsic) @@ -3378,7 +3789,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, - llvm::Triple::ArchType Arch, + bool HasLegalHalfType=true, bool V1Ty=false) { int IsQuad = TypeFlags.isQuad(); switch (TypeFlags.getEltType()) { @@ -3389,9 +3800,7 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, case NeonTypeFlags::Poly16: return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Float16: - // FIXME: Only AArch64 backend can so far properly handle half types. - // Remove else part once ARM backend support for half is complete. - if (Arch == llvm::Triple::aarch64) + if (HasLegalHalfType) return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); else return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); @@ -3454,7 +3863,7 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, return ConstantInt::get(Ty, neg ? -SV : SV); } -// \brief Right-shift a vector by a constant. +// Right-shift a vector by a constant. Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, llvm::Type *Ty, bool usgn, const char *name) { @@ -3557,13 +3966,24 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vcaleq_v, arm_neon_vacge, 0), NEONMAP1(vcalt_v, arm_neon_vacgt, 0), NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), + NEONMAP0(vceqz_v), + NEONMAP0(vceqzq_v), + NEONMAP0(vcgez_v), + NEONMAP0(vcgezq_v), + NEONMAP0(vcgtz_v), + NEONMAP0(vcgtzq_v), + NEONMAP0(vclez_v), + NEONMAP0(vclezq_v), NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), + NEONMAP0(vcltz_v), + NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), + NEONMAP0(vcvt_f16_v), NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), @@ -3583,6 +4003,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), + NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), @@ -3627,6 +4048,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), + NEONMAP0(vcvtq_f16_v), NEONMAP0(vcvtq_f32_v), NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), @@ -3642,6 +4064,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vcvtq_u16_v), NEONMAP0(vcvtq_u32_v), NEONMAP0(vcvtq_u64_v), + NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0), + NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), @@ -3652,18 +4076,30 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), NEONMAP0(vld1_dup_v), NEONMAP1(vld1_v, arm_neon_vld1, 0), + NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0), + NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0), + NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0), NEONMAP0(vld1q_dup_v), NEONMAP1(vld1q_v, arm_neon_vld1, 0), + NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0), + NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0), + NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0), + NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0), NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), NEONMAP1(vld2_v, arm_neon_vld2, 0), + NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0), NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), NEONMAP1(vld2q_v, arm_neon_vld2, 0), + NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0), NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), NEONMAP1(vld3_v, arm_neon_vld3, 0), + NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0), NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), NEONMAP1(vld3q_v, arm_neon_vld3, 0), + NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0), NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4_v, arm_neon_vld4, 0), + NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0), NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4q_v, arm_neon_vld4, 0), NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), @@ -3722,6 +4158,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), + NEONMAP0(vrndi_v), + NEONMAP0(vrndiq_v), NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), @@ -3755,7 +4193,13 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vshrn_n_v), NEONMAP0(vshrq_n_v), NEONMAP1(vst1_v, arm_neon_vst1, 0), + NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0), + NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0), + NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0), NEONMAP1(vst1q_v, arm_neon_vst1, 0), + NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0), + NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0), + NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0), NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), NEONMAP1(vst2_v, arm_neon_vst2, 0), NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), @@ -3795,8 +4239,18 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), + NEONMAP0(vceqz_v), + NEONMAP0(vceqzq_v), + NEONMAP0(vcgez_v), + NEONMAP0(vcgezq_v), + NEONMAP0(vcgtz_v), + NEONMAP0(vcgtzq_v), + NEONMAP0(vclez_v), + NEONMAP0(vclezq_v), NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), + NEONMAP0(vcltz_v), + NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), @@ -3826,6 +4280,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), + NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0), + NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), @@ -3834,6 +4290,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), + NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), + NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), + NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), + NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), + NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), + NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), @@ -3874,6 +4336,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), + NEONMAP0(vrndi_v), + NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), @@ -3897,6 +4361,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vshr_n_v), NEONMAP0(vshrn_n_v), NEONMAP0(vshrq_n_v), + NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), + NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), + NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), + NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), + NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), + NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), NEONMAP0(vsubhn_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), @@ -4095,6 +4565,37 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), + // FP16 scalar intrinisics go here. + NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), + NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), + NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), + NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), + NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), }; #undef NEONMAP0 @@ -4244,8 +4745,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( NeonTypeFlags Type(NeonTypeConst.getZExtValue()); bool Usgn = Type.isUnsigned(); bool Quad = Type.isQuad(); + const bool HasLegalHalfType = getTarget().hasLegalHalfType(); - llvm::VectorType *VTy = GetNeonType(this, Type, Arch); + llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -4310,6 +4812,26 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); return EmitNeonCall(F, Ops, NameHint); } + case NEON::BI__builtin_neon_vceqz_v: + case NEON::BI__builtin_neon_vceqzq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, + ICmpInst::ICMP_EQ, "vceqz"); + case NEON::BI__builtin_neon_vcgez_v: + case NEON::BI__builtin_neon_vcgezq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, + ICmpInst::ICMP_SGE, "vcgez"); + case NEON::BI__builtin_neon_vclez_v: + case NEON::BI__builtin_neon_vclezq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, + ICmpInst::ICMP_SLE, "vclez"); + case NEON::BI__builtin_neon_vcgtz_v: + case NEON::BI__builtin_neon_vcgtzq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, + ICmpInst::ICMP_SGT, "vcgtz"); + case NEON::BI__builtin_neon_vcltz_v: + case NEON::BI__builtin_neon_vcltzq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, + ICmpInst::ICMP_SLT, "vcltz"); case NEON::BI__builtin_neon_vclz_v: case NEON::BI__builtin_neon_vclzq_v: // We generate target-independent intrinsic, which needs a second argument @@ -4319,13 +4841,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvt_f32_v: case NEON::BI__builtin_neon_vcvtq_f32_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch); + Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), + HasLegalHalfType); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_f16_v: case NEON::BI__builtin_neon_vcvtq_f16_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch); + Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), + HasLegalHalfType); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_n_f16_v: @@ -4374,6 +4898,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvta_s16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: + case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_u32_v: case NEON::BI__builtin_neon_vcvta_u64_v: case NEON::BI__builtin_neon_vcvtaq_s16_v: @@ -4448,12 +4973,33 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops.push_back(getAlignmentValue32(PtrOp0)); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); } + case NEON::BI__builtin_neon_vld1_x2_v: + case NEON::BI__builtin_neon_vld1q_x2_v: + case NEON::BI__builtin_neon_vld1_x3_v: + case NEON::BI__builtin_neon_vld1q_x3_v: + case NEON::BI__builtin_neon_vld1_x4_v: + case NEON::BI__builtin_neon_vld1q_x4_v: { + llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); + Ops[1] = Builder.CreateBitCast(Ops[1], PTy); + llvm::Type *Tys[2] = { VTy, PTy }; + Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); + Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); + Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); + } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: case NEON::BI__builtin_neon_vld4_v: - case NEON::BI__builtin_neon_vld4q_v: { + case NEON::BI__builtin_neon_vld4q_v: + case NEON::BI__builtin_neon_vld2_dup_v: + case NEON::BI__builtin_neon_vld2q_dup_v: + case NEON::BI__builtin_neon_vld3_dup_v: + case NEON::BI__builtin_neon_vld3q_dup_v: + case NEON::BI__builtin_neon_vld4_dup_v: + case NEON::BI__builtin_neon_vld4q_dup_v: { llvm::Type *Tys[] = {Ty, Int8PtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Value *Align = getAlignmentValue32(PtrOp1); @@ -4552,7 +5098,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vrsqrteq_v: Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); - + case NEON::BI__builtin_neon_vrndi_v: + case NEON::BI__builtin_neon_vrndiq_v: + Int = Intrinsic::nearbyint; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", @@ -4603,6 +5152,23 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops.push_back(getAlignmentValue32(PtrOp0)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); } + case NEON::BI__builtin_neon_vst1_x2_v: + case NEON::BI__builtin_neon_vst1q_x2_v: + case NEON::BI__builtin_neon_vst1_x3_v: + case NEON::BI__builtin_neon_vst1q_x3_v: + case NEON::BI__builtin_neon_vst1_x4_v: + case NEON::BI__builtin_neon_vst1q_x4_v: { + llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); + // TODO: Currently in AArch32 mode the pointer operand comes first, whereas + // in AArch64 it comes last. We may want to stick to one or another. + if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) { + llvm::Type *Tys[2] = { VTy, PTy }; + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); + return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); + } + llvm::Type *Tys[2] = { PTy, VTy }; + return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); + } case NEON::BI__builtin_neon_vsubhn_v: { llvm::VectorType *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); @@ -4685,6 +5251,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } return SV; } + case NEON::BI__builtin_neon_vdot_v: + case NEON::BI__builtin_neon_vdotq_v: { + llvm::Type *InputTy = + llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); + } } assert(Int && "Expected valid intrinsic number"); @@ -4893,6 +5467,34 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { return true; } +Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + LoadSize.getQuantity() * 8); + Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; +} + +Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Value = EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + StoreSize.getQuantity() * 8); + Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = + Builder.CreateAlignedStore(Value, Ptr, + StoreSize); + Store->setVolatile(true); + return Store; +} + Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -5135,35 +5737,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__iso_volatile_load8: case ARM::BI__iso_volatile_load16: case ARM::BI__iso_volatile_load32: - case ARM::BI__iso_volatile_load64: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - LoadSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, LoadSize); - Load->setVolatile(true); - return Load; - } + case ARM::BI__iso_volatile_load64: + return EmitISOVolatileLoad(E); case ARM::BI__iso_volatile_store8: case ARM::BI__iso_volatile_store16: case ARM::BI__iso_volatile_store32: - case ARM::BI__iso_volatile_store64: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Value = EmitScalarExpr(E->getArg(1)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - StoreSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::StoreInst *Store = - Builder.CreateAlignedStore(Value, Ptr, - StoreSize); - Store->setVolatile(true); - return Store; - } + case ARM::BI__iso_volatile_store64: + return EmitISOVolatileStore(E); } if (BuiltinID == ARM::BI__builtin_arm_clrex) { @@ -5308,8 +5888,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: case NEON::BI__builtin_neon_vld2_dup_v: + case NEON::BI__builtin_neon_vld2q_dup_v: case NEON::BI__builtin_neon_vld3_dup_v: + case NEON::BI__builtin_neon_vld3q_dup_v: case NEON::BI__builtin_neon_vld4_dup_v: + case NEON::BI__builtin_neon_vld4q_dup_v: // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); @@ -5345,6 +5928,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vgetq_lane_f32: return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); + case NEON::BI__builtin_neon_vrndns_f32: { + Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *Tys[] = {Arg->getType()}; + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); + return Builder.CreateCall(F, {Arg}, "vrndn"); } + case NEON::BI__builtin_neon_vset_lane_i8: case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_i32: @@ -5434,7 +6023,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, bool usgn = Type.isUnsigned(); bool rightShift = false; - llvm::VectorType *VTy = GetNeonType(this, Type, Arch); + llvm::VectorType *VTy = GetNeonType(this, Type, + getTarget().hasLegalHalfType()); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -5479,68 +6069,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Ld = Builder.CreateLoad(PtrOp0); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: { - // Handle 64-bit elements as a special-case. There is no "dup" needed. - if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4; - break; - default: llvm_unreachable("unknown vld_dup intrinsic?"); - } - llvm::Type *Tys[] = {Ty, Int8PtrTy}; - Function *F = CGM.getIntrinsic(Int, Tys); - llvm::Value *Align = getAlignmentValue32(PtrOp1); - Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); - } - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2lane; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3lane; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4lane; - break; - default: llvm_unreachable("unknown vld_dup intrinsic?"); - } - llvm::Type *Tys[] = {Ty, Int8PtrTy}; - Function *F = CGM.getIntrinsic(Int, Tys); - llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); - - SmallVector<Value*, 6> Args; - Args.push_back(Ops[1]); - Args.append(STy->getNumElements(), UndefValue::get(Ty)); - - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); - Args.push_back(CI); - Args.push_back(getAlignmentValue32(PtrOp1)); - - Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); - // splat lane 0 to all elts in each vector of the result. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Value *Val = Builder.CreateExtractValue(Ops[1], i); - Value *Elt = Builder.CreateBitCast(Val, Ty); - Elt = EmitNeonSplat(Elt, CI); - Elt = Builder.CreateBitCast(Elt, Val->getType()); - Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); - } - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); - } case NEON::BI__builtin_neon_vqrshrn_n_v: Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; @@ -5680,7 +6208,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID // Determine the type of this overloaded NEON intrinsic. NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch); + llvm::VectorType *Ty = GetNeonType(&CGF, Type); if (!Ty) return nullptr; @@ -5799,18 +6327,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, HintID = 0; break; case AArch64::BI__builtin_arm_yield: + case AArch64::BI__yield: HintID = 1; break; case AArch64::BI__builtin_arm_wfe: + case AArch64::BI__wfe: HintID = 2; break; case AArch64::BI__builtin_arm_wfi: + case AArch64::BI__wfi: HintID = 3; break; case AArch64::BI__builtin_arm_sev: + case AArch64::BI__sev: HintID = 4; break; case AArch64::BI__builtin_arm_sevl: + case AArch64::BI__sevl: HintID = 5; break; } @@ -6077,6 +6610,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // Handle non-overloaded intrinsics first. switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vabsh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(0))); + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); case NEON::BI__builtin_neon_vldrq_p128: { llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); @@ -6119,6 +6655,153 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateUIToFP(Ops[0], FTy); return Builder.CreateSIToFP(Ops[0], FTy); } + case NEON::BI__builtin_neon_vcvth_f16_u16: + case NEON::BI__builtin_neon_vcvth_f16_u32: + case NEON::BI__builtin_neon_vcvth_f16_u64: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_f16_s16: + case NEON::BI__builtin_neon_vcvth_f16_s32: + case NEON::BI__builtin_neon_vcvth_f16_s64: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + llvm::Type *FTy = HalfTy; + llvm::Type *InTy; + if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) + InTy = Int64Ty; + else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) + InTy = Int32Ty; + else + InTy = Int16Ty; + Ops[0] = Builder.CreateBitCast(Ops[0], InTy); + if (usgn) + return Builder.CreateUIToFP(Ops[0], FTy); + return Builder.CreateSIToFP(Ops[0], FTy); + } + case NEON::BI__builtin_neon_vcvth_u16_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s16_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int16Ty); + return Builder.CreateFPToSI(Ops[0], Int16Ty); + } + case NEON::BI__builtin_neon_vcvth_u32_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s32_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int32Ty); + return Builder.CreateFPToSI(Ops[0], Int32Ty); + } + case NEON::BI__builtin_neon_vcvth_u64_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s64_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int64Ty); + return Builder.CreateFPToSI(Ops[0], Int64Ty); + } + case NEON::BI__builtin_neon_vcvtah_u16_f16: + case NEON::BI__builtin_neon_vcvtmh_u16_f16: + case NEON::BI__builtin_neon_vcvtnh_u16_f16: + case NEON::BI__builtin_neon_vcvtph_u16_f16: + case NEON::BI__builtin_neon_vcvtah_s16_f16: + case NEON::BI__builtin_neon_vcvtmh_s16_f16: + case NEON::BI__builtin_neon_vcvtnh_s16_f16: + case NEON::BI__builtin_neon_vcvtph_s16_f16: { + unsigned Int; + llvm::Type* InTy = Int32Ty; + llvm::Type* FTy = HalfTy; + llvm::Type *Tys[2] = {InTy, FTy}; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + switch (BuiltinID) { + default: llvm_unreachable("missing builtin ID in switch!"); + case NEON::BI__builtin_neon_vcvtah_u16_f16: + Int = Intrinsic::aarch64_neon_fcvtau; break; + case NEON::BI__builtin_neon_vcvtmh_u16_f16: + Int = Intrinsic::aarch64_neon_fcvtmu; break; + case NEON::BI__builtin_neon_vcvtnh_u16_f16: + Int = Intrinsic::aarch64_neon_fcvtnu; break; + case NEON::BI__builtin_neon_vcvtph_u16_f16: + Int = Intrinsic::aarch64_neon_fcvtpu; break; + case NEON::BI__builtin_neon_vcvtah_s16_f16: + Int = Intrinsic::aarch64_neon_fcvtas; break; + case NEON::BI__builtin_neon_vcvtmh_s16_f16: + Int = Intrinsic::aarch64_neon_fcvtms; break; + case NEON::BI__builtin_neon_vcvtnh_s16_f16: + Int = Intrinsic::aarch64_neon_fcvtns; break; + case NEON::BI__builtin_neon_vcvtph_s16_f16: + Int = Intrinsic::aarch64_neon_fcvtps; break; + } + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); + return Builder.CreateTrunc(Ops[0], Int16Ty); + } + case NEON::BI__builtin_neon_vcaleh_f16: + case NEON::BI__builtin_neon_vcalth_f16: + case NEON::BI__builtin_neon_vcageh_f16: + case NEON::BI__builtin_neon_vcagth_f16: { + unsigned Int; + llvm::Type* InTy = Int32Ty; + llvm::Type* FTy = HalfTy; + llvm::Type *Tys[2] = {InTy, FTy}; + Ops.push_back(EmitScalarExpr(E->getArg(1))); + switch (BuiltinID) { + default: llvm_unreachable("missing builtin ID in switch!"); + case NEON::BI__builtin_neon_vcageh_f16: + Int = Intrinsic::aarch64_neon_facge; break; + case NEON::BI__builtin_neon_vcagth_f16: + Int = Intrinsic::aarch64_neon_facgt; break; + case NEON::BI__builtin_neon_vcaleh_f16: + Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break; + case NEON::BI__builtin_neon_vcalth_f16: + Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break; + } + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg"); + return Builder.CreateTrunc(Ops[0], Int16Ty); + } + case NEON::BI__builtin_neon_vcvth_n_s16_f16: + case NEON::BI__builtin_neon_vcvth_n_u16_f16: { + unsigned Int; + llvm::Type* InTy = Int32Ty; + llvm::Type* FTy = HalfTy; + llvm::Type *Tys[2] = {InTy, FTy}; + Ops.push_back(EmitScalarExpr(E->getArg(1))); + switch (BuiltinID) { + default: llvm_unreachable("missing builtin ID in switch!"); + case NEON::BI__builtin_neon_vcvth_n_s16_f16: + Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break; + case NEON::BI__builtin_neon_vcvth_n_u16_f16: + Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break; + } + Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); + return Builder.CreateTrunc(Ops[0], Int16Ty); + } + case NEON::BI__builtin_neon_vcvth_n_f16_s16: + case NEON::BI__builtin_neon_vcvth_n_f16_u16: { + unsigned Int; + llvm::Type* FTy = HalfTy; + llvm::Type* InTy = Int32Ty; + llvm::Type *Tys[2] = {FTy, InTy}; + Ops.push_back(EmitScalarExpr(E->getArg(1))); + switch (BuiltinID) { + default: llvm_unreachable("missing builtin ID in switch!"); + case NEON::BI__builtin_neon_vcvth_n_f16_s16: + Int = Intrinsic::aarch64_neon_vcvtfxs2fp; + Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext"); + break; + case NEON::BI__builtin_neon_vcvth_n_f16_u16: + Int = Intrinsic::aarch64_neon_vcvtfxu2fp; + Ops[0] = Builder.CreateZExt(Ops[0], InTy); + break; + } + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); + } case NEON::BI__builtin_neon_vpaddd_s64: { llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); @@ -6160,6 +6843,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vceqzd_s64: case NEON::BI__builtin_neon_vceqzd_f64: case NEON::BI__builtin_neon_vceqzs_f32: + case NEON::BI__builtin_neon_vceqzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6167,6 +6851,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcgezd_s64: case NEON::BI__builtin_neon_vcgezd_f64: case NEON::BI__builtin_neon_vcgezs_f32: + case NEON::BI__builtin_neon_vcgezh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6174,6 +6859,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vclezd_s64: case NEON::BI__builtin_neon_vclezd_f64: case NEON::BI__builtin_neon_vclezs_f32: + case NEON::BI__builtin_neon_vclezh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6181,6 +6867,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcgtzd_s64: case NEON::BI__builtin_neon_vcgtzd_f64: case NEON::BI__builtin_neon_vcgtzs_f32: + case NEON::BI__builtin_neon_vcgtzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6188,6 +6875,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcltzd_s64: case NEON::BI__builtin_neon_vcltzd_f64: case NEON::BI__builtin_neon_vcltzs_f32: + case NEON::BI__builtin_neon_vcltzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6240,6 +6928,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); } + case NEON::BI__builtin_neon_vceqh_f16: + case NEON::BI__builtin_neon_vcleh_f16: + case NEON::BI__builtin_neon_vclth_f16: + case NEON::BI__builtin_neon_vcgeh_f16: + case NEON::BI__builtin_neon_vcgth_f16: { + llvm::CmpInst::Predicate P; + switch (BuiltinID) { + default: llvm_unreachable("missing builtin ID in switch!"); + case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; + case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; + case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; + case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; + case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; + } + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); + } case NEON::BI__builtin_neon_vceqd_s64: case NEON::BI__builtin_neon_vceqd_u64: case NEON::BI__builtin_neon_vcgtd_s64: @@ -6377,6 +7085,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::VectorType::get(DoubleTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); + case NEON::BI__builtin_neon_vaddh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); + case NEON::BI__builtin_neon_vsubh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); + case NEON::BI__builtin_neon_vmulh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); + case NEON::BI__builtin_neon_vdivh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); + case NEON::BI__builtin_neon_vfmah_f16: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. + return Builder.CreateCall(F, + {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); + } + case NEON::BI__builtin_neon_vfmsh_f16: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); + Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. + return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); + } case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); @@ -6534,7 +7267,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } } - llvm::VectorType *VTy = GetNeonType(this, Type, Arch); + llvm::VectorType *VTy = GetNeonType(this, Type); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -6599,7 +7332,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch); + NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); @@ -6651,12 +7384,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); + case NEON::BI__builtin_neon_vmaxh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fmax; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); + } case NEON::BI__builtin_neon_vmin_v: case NEON::BI__builtin_neon_vminq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); + case NEON::BI__builtin_neon_vminh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fmin; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); + } case NEON::BI__builtin_neon_vabd_v: case NEON::BI__builtin_neon_vabdq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. @@ -6695,20 +7438,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminnmq_v: Int = Intrinsic::aarch64_neon_fminnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); + case NEON::BI__builtin_neon_vminnmh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fminnm; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); case NEON::BI__builtin_neon_vmaxnm_v: case NEON::BI__builtin_neon_vmaxnmq_v: Int = Intrinsic::aarch64_neon_fmaxnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); + case NEON::BI__builtin_neon_vmaxnmh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fmaxnm; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); case NEON::BI__builtin_neon_vrecpss_f32: { Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), Ops, "vrecps"); } - case NEON::BI__builtin_neon_vrecpsd_f64: { + case NEON::BI__builtin_neon_vrecpsd_f64: Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), Ops, "vrecps"); - } + case NEON::BI__builtin_neon_vrecpsh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), + Ops, "vrecps"); case NEON::BI__builtin_neon_vqshrun_n_v: Int = Intrinsic::aarch64_neon_sqshrun; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); @@ -6724,72 +7478,87 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vqrshrn_n_v: Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); + case NEON::BI__builtin_neon_vrndah_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::round; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); + } case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { Int = Intrinsic::round; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); } - case NEON::BI__builtin_neon_vrndi_v: - case NEON::BI__builtin_neon_vrndiq_v: { + case NEON::BI__builtin_neon_vrndih_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::nearbyint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); + } + case NEON::BI__builtin_neon_vrndmh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::floor; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); } case NEON::BI__builtin_neon_vrndm_v: case NEON::BI__builtin_neon_vrndmq_v: { Int = Intrinsic::floor; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); } + case NEON::BI__builtin_neon_vrndnh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frintn; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); + } case NEON::BI__builtin_neon_vrndn_v: case NEON::BI__builtin_neon_vrndnq_v: { Int = Intrinsic::aarch64_neon_frintn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); } + case NEON::BI__builtin_neon_vrndns_f32: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frintn; + return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); + } + case NEON::BI__builtin_neon_vrndph_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::ceil; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); + } case NEON::BI__builtin_neon_vrndp_v: case NEON::BI__builtin_neon_vrndpq_v: { Int = Intrinsic::ceil; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); } + case NEON::BI__builtin_neon_vrndxh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::rint; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); + } case NEON::BI__builtin_neon_vrndx_v: case NEON::BI__builtin_neon_vrndxq_v: { Int = Intrinsic::rint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); } + case NEON::BI__builtin_neon_vrndh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::trunc; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); + } case NEON::BI__builtin_neon_vrnd_v: case NEON::BI__builtin_neon_vrndq_v: { Int = Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); } - case NEON::BI__builtin_neon_vceqz_v: - case NEON::BI__builtin_neon_vceqzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, - ICmpInst::ICMP_EQ, "vceqz"); - case NEON::BI__builtin_neon_vcgez_v: - case NEON::BI__builtin_neon_vcgezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, - ICmpInst::ICMP_SGE, "vcgez"); - case NEON::BI__builtin_neon_vclez_v: - case NEON::BI__builtin_neon_vclezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, - ICmpInst::ICMP_SLE, "vclez"); - case NEON::BI__builtin_neon_vcgtz_v: - case NEON::BI__builtin_neon_vcgtzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, - ICmpInst::ICMP_SGT, "vcgtz"); - case NEON::BI__builtin_neon_vcltz_v: - case NEON::BI__builtin_neon_vcltzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, - ICmpInst::ICMP_SLT, "vcltz"); case NEON::BI__builtin_neon_vcvt_f64_v: case NEON::BI__builtin_neon_vcvtq_f64_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch); + Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_f64_f32: { assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && "unexpected vcvt_f64_f32 builtin"); NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); - Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch)); + Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); } @@ -6797,7 +7566,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, assert(Type.getEltType() == NeonTypeFlags::Float32 && "unexpected vcvt_f32_f64 builtin"); NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); - Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch)); + Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); } @@ -6805,20 +7574,21 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: - case NEON::BI__builtin_neon_vcvt_s16_v: - case NEON::BI__builtin_neon_vcvt_u16_v: + case NEON::BI__builtin_neon_vcvt_s16_v: + case NEON::BI__builtin_neon_vcvt_u16_v: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: - case NEON::BI__builtin_neon_vcvtq_s16_v: - case NEON::BI__builtin_neon_vcvtq_u16_v: { + case NEON::BI__builtin_neon_vcvtq_s16_v: + case NEON::BI__builtin_neon_vcvtq_u16_v: { Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); if (usgn) return Builder.CreateFPToUI(Ops[0], Ty); return Builder.CreateFPToSI(Ops[0], Ty); } case NEON::BI__builtin_neon_vcvta_s16_v: + case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvtaq_s16_v: case NEON::BI__builtin_neon_vcvtaq_s32_v: @@ -6886,6 +7656,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fmulx; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); } + case NEON::BI__builtin_neon_vmulxh_lane_f16: + case NEON::BI__builtin_neon_vmulxh_laneq_f16: { + // vmulx_lane should be mapped to Neon scalar mulx after + // extracting the scalar element + Ops.push_back(EmitScalarExpr(E->getArg(2))); + Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); + Ops.pop_back(); + Int = Intrinsic::aarch64_neon_fmulx; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); + } case NEON::BI__builtin_neon_vmul_lane_v: case NEON::BI__builtin_neon_vmul_laneq_v: { // v1f64 vmul_lane should be mapped to Neon scalar mul lane @@ -6894,7 +7674,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Quad = true; Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch); + NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); @@ -6902,6 +7682,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vnegd_s64: return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); + case NEON::BI__builtin_neon_vnegh_f16: + return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); case NEON::BI__builtin_neon_vpmaxnm_v: case NEON::BI__builtin_neon_vpmaxnmq_v: { Int = Intrinsic::aarch64_neon_fmaxnmp; @@ -6912,6 +7694,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fminnmp; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); } + case NEON::BI__builtin_neon_vsqrth_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::sqrt; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); + } case NEON::BI__builtin_neon_vsqrt_v: case NEON::BI__builtin_neon_vsqrtq_v: { Int = Intrinsic::sqrt; @@ -7289,64 +8076,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], VTy); return Builder.CreateAdd(Ops[0], tmp); } - // FIXME: Sharing loads & stores with 32-bit is complicated by the absence - // of an Align parameter here. - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - Int = Intrinsic::aarch64_neon_ld1x2; - break; - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - Int = Intrinsic::aarch64_neon_ld1x3; - break; - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - Int = Intrinsic::aarch64_neon_ld1x4; - break; - } - Function *F = CGM.getIntrinsic(Int, Tys); - Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); - llvm::Type *Tys[2] = { VTy, PTy }; - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - Int = Intrinsic::aarch64_neon_st1x2; - break; - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - Int = Intrinsic::aarch64_neon_st1x3; - break; - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - Int = Intrinsic::aarch64_neon_st1x4; - break; - } - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); - } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); @@ -7653,6 +8382,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } + case AArch64::BI__iso_volatile_load8: + case AArch64::BI__iso_volatile_load16: + case AArch64::BI__iso_volatile_load32: + case AArch64::BI__iso_volatile_load64: + return EmitISOVolatileLoad(E); + case AArch64::BI__iso_volatile_store8: + case AArch64::BI__iso_volatile_store16: + case AArch64::BI__iso_volatile_store32: + case AArch64::BI__iso_volatile_store64: + return EmitISOVolatileStore(E); + case AArch64::BI_BitScanForward: + case AArch64::BI_BitScanForward64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); + case AArch64::BI_BitScanReverse: + case AArch64::BI_BitScanReverse64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); + case AArch64::BI_InterlockedAnd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); + case AArch64::BI_InterlockedExchange64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); + case AArch64::BI_InterlockedExchangeAdd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); + case AArch64::BI_InterlockedExchangeSub64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); + case AArch64::BI_InterlockedOr64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); + case AArch64::BI_InterlockedXor64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); + case AArch64::BI_InterlockedDecrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); + case AArch64::BI_InterlockedIncrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); } } @@ -7704,42 +8465,66 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, } static Value *EmitX86MaskedStore(CodeGenFunction &CGF, - SmallVectorImpl<Value *> &Ops, + ArrayRef<Value *> Ops, unsigned Align) { // Cast the pointer to right type. - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - // If the mask is all ones just emit a regular store. - if (const auto *C = dyn_cast<Constant>(Ops[2])) - if (C->isAllOnesValue()) - return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], Ops[1]->getType()->getVectorNumElements()); - return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); + return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec); } static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, - SmallVectorImpl<Value *> &Ops, unsigned Align) { + ArrayRef<Value *> Ops, unsigned Align) { // Cast the pointer to right type. - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - // If the mask is all ones just emit a regular store. - if (const auto *C = dyn_cast<Constant>(Ops[2])) - if (C->isAllOnesValue()) - return CGF.Builder.CreateAlignedLoad(Ops[0], Align); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], Ops[1]->getType()->getVectorNumElements()); - return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); + return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]); +} + +static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, + ArrayRef<Value *> Ops) { + llvm::Type *ResultTy = Ops[1]->getType(); + llvm::Type *PtrTy = ResultTy->getVectorElementType(); + + // Cast the pointer to element type. + Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(PtrTy)); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, + ResultTy); + return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); +} + +static Value *EmitX86CompressStore(CodeGenFunction &CGF, + ArrayRef<Value *> Ops) { + llvm::Type *ResultTy = Ops[1]->getType(); + llvm::Type *PtrTy = ResultTy->getVectorElementType(); + + // Cast the pointer to element type. + Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(PtrTy)); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore, + ResultTy); + return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec }); } static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, - unsigned NumElts, SmallVectorImpl<Value *> &Ops, + unsigned NumElts, ArrayRef<Value *> Ops, bool InvertLHS = false) { Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); @@ -7751,26 +8536,6 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, CGF.Builder.getIntNTy(std::max(NumElts, 8U))); } -static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, - SmallVectorImpl<Value *> &Ops, - llvm::Type *DstTy, - unsigned SrcSizeInBits, - unsigned Align) { - // Load the subvector. - Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); - - // Create broadcast mask. - unsigned NumDstElts = DstTy->getVectorNumElements(); - unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); - - SmallVector<uint32_t, 8> Mask; - for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) - for (unsigned j = 0; j != NumSrcElts; ++j) - Mask.push_back(j); - - return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); -} - static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -7784,8 +8549,48 @@ static Value *EmitX86Select(CodeGenFunction &CGF, return CGF.Builder.CreateSelect(Mask, Op0, Op1); } +static Value *EmitX86ScalarSelect(CodeGenFunction &CGF, + Value *Mask, Value *Op0, Value *Op1) { + // If the mask is all ones just return first argument. + if (const auto *C = dyn_cast<Constant>(Mask)) + if (C->isAllOnesValue()) + return Op0; + + llvm::VectorType *MaskTy = + llvm::VectorType::get(CGF.Builder.getInt1Ty(), + Mask->getType()->getIntegerBitWidth()); + Mask = CGF.Builder.CreateBitCast(Mask, MaskTy); + Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0); + return CGF.Builder.CreateSelect(Mask, Op0, Op1); +} + +static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, + unsigned NumElts, Value *MaskIn) { + if (MaskIn) { + const auto *C = dyn_cast<Constant>(MaskIn); + if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); + } + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = i % NumElts + NumElts; + Cmp = CGF.Builder.CreateShuffleVector( + Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); + } + + return CGF.Builder.CreateBitCast(Cmp, + IntegerType::get(CGF.getLLVMContext(), + std::max(NumElts, 8U))); +} + static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, - bool Signed, SmallVectorImpl<Value *> &Ops) { + bool Signed, ArrayRef<Value *> Ops) { + assert((Ops.size() == 2 || Ops.size() == 4) && + "Unexpected number of arguments"); unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); Value *Cmp; @@ -7809,22 +8614,16 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); } - const auto *C = dyn_cast<Constant>(Ops.back()); - if (!C || !C->isAllOnesValue()) - Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); + Value *MaskIn = nullptr; + if (Ops.size() == 4) + MaskIn = Ops[3]; - if (NumElts < 8) { - uint32_t Indices[8]; - for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i; - for (unsigned i = NumElts; i != 8; ++i) - Indices[i] = i % NumElts + NumElts; - Cmp = CGF.Builder.CreateShuffleVector( - Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); - } - return CGF.Builder.CreateBitCast(Cmp, - IntegerType::get(CGF.getLLVMContext(), - std::max(NumElts, 8U))); + return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); +} + +static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { + Value *Zero = Constant::getNullValue(In->getType()); + return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { @@ -7834,9 +8633,7 @@ static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); - if (Ops.size() == 1) - return Res; - return EmitX86Select(CGF, Ops[2], Res, Ops[1]); + return Res; } static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, @@ -7844,11 +8641,211 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - if (Ops.size() == 2) - return Res; + assert(Ops.size() == 2); + return Res; +} + +// Lowers X86 FMA intrinsics to IR. +static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + unsigned BuiltinID, bool IsAddSub) { - assert(Ops.size() == 4); - return EmitX86Select(CGF, Ops[3], Res, Ops[2]); + bool Subtract = false; + Intrinsic::ID IID = Intrinsic::not_intrinsic; + switch (BuiltinID) { + default: break; + case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; + case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; + case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; + break; + case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; + break; + } + + Value *A = Ops[0]; + Value *B = Ops[1]; + Value *C = Ops[2]; + + if (Subtract) + C = CGF.Builder.CreateFNeg(C); + + Value *Res; + + // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). + if (IID != Intrinsic::not_intrinsic && + cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) { + Function *Intr = CGF.CGM.getIntrinsic(IID); + Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); + } else { + llvm::Type *Ty = A->getType(); + Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); + Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); + + if (IsAddSub) { + // Negate even elts in C using a mask. + unsigned NumElts = Ty->getVectorNumElements(); + SmallVector<uint32_t, 16> Indices(NumElts); + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + (i % 2) * NumElts; + + Value *NegC = CGF.Builder.CreateFNeg(C); + Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); + Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices); + } + } + + // Handle any required masking. + Value *MaskFalseVal = nullptr; + switch (BuiltinID) { + case clang::X86::BI__builtin_ia32_vfmaddps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: + MaskFalseVal = Ops[0]; + break; + case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); + break; + case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + MaskFalseVal = Ops[2]; + break; + } + + if (MaskFalseVal) + return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); + + return Res; +} + +static Value * +EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops, + Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0, + bool NegAcc = false) { + unsigned Rnd = 4; + if (Ops.size() > 4) + Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); + + if (NegAcc) + Ops[2] = CGF.Builder.CreateFNeg(Ops[2]); + + Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0); + Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0); + Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0); + Value *Res; + if (Rnd != 4) { + Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ? + Intrinsic::x86_avx512_vfmadd_f32 : + Intrinsic::x86_avx512_vfmadd_f64; + Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), + {Ops[0], Ops[1], Ops[2], Ops[4]}); + } else { + Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType()); + Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3)); + } + // If we have more than 3 arguments, we need to do masking. + if (Ops.size() > 3) { + Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType()) + : Ops[PTIdx]; + + // If we negated the accumulator and the its the PassThru value we need to + // bypass the negate. Conveniently Upper should be the same thing in this + // case. + if (NegAcc && PTIdx == 2) + PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0); + + Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru); + } + return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0); +} + +static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, + ArrayRef<Value *> Ops) { + llvm::Type *Ty = Ops[0]->getType(); + // Arguments have a vXi32 type so cast to vXi64. + Ty = llvm::VectorType::get(CGF.Int64Ty, + Ty->getPrimitiveSizeInBits() / 64); + Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); + Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); + + if (IsSigned) { + // Shift left then arithmetic shift right. + Constant *ShiftAmt = ConstantInt::get(Ty, 32); + LHS = CGF.Builder.CreateShl(LHS, ShiftAmt); + LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt); + RHS = CGF.Builder.CreateShl(RHS, ShiftAmt); + RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt); + } else { + // Clear the upper bits. + Constant *Mask = ConstantInt::get(Ty, 0xffffffff); + LHS = CGF.Builder.CreateAnd(LHS, Mask); + RHS = CGF.Builder.CreateAnd(RHS, Mask); + } + + return CGF.Builder.CreateMul(LHS, RHS); +} + +// Emit a masked pternlog intrinsic. This only exists because the header has to +// use a macro and we aren't able to pass the input argument to a pternlog +// builtin and a select builtin without evaluating it twice. +static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, + ArrayRef<Value *> Ops) { + llvm::Type *Ty = Ops[0]->getType(); + + unsigned VecWidth = Ty->getPrimitiveSizeInBits(); + unsigned EltWidth = Ty->getScalarSizeInBits(); + Intrinsic::ID IID; + if (VecWidth == 128 && EltWidth == 32) + IID = Intrinsic::x86_avx512_pternlog_d_128; + else if (VecWidth == 256 && EltWidth == 32) + IID = Intrinsic::x86_avx512_pternlog_d_256; + else if (VecWidth == 512 && EltWidth == 32) + IID = Intrinsic::x86_avx512_pternlog_d_512; + else if (VecWidth == 128 && EltWidth == 64) + IID = Intrinsic::x86_avx512_pternlog_q_128; + else if (VecWidth == 256 && EltWidth == 64) + IID = Intrinsic::x86_avx512_pternlog_q_256; + else if (VecWidth == 512 && EltWidth == 64) + IID = Intrinsic::x86_avx512_pternlog_q_512; + else + llvm_unreachable("Unexpected intrinsic"); + + Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), + Ops.drop_back()); + Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0]; + return EmitX86Select(CGF, Ops[4], Ternlog, PassThru); } static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, @@ -7914,11 +8911,10 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { return EmitX86CpuSupports(FeatureStr); } -Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { +uint32_t +CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { // Processor features and mapping to processor feature value. - uint32_t FeaturesMask = 0; - for (const StringRef &FeatureStr : FeatureStrs) { unsigned Feature = StringSwitch<unsigned>(FeatureStr) @@ -7927,7 +8923,14 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { ; FeaturesMask |= (1U << Feature); } + return FeaturesMask; +} + +Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { + return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs)); +} +llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) { // Matching the struct layout from the compiler-rt/libgcc structure that is // filled in: // unsigned int __cpu_vendor; @@ -8063,8 +9066,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateBitCast(BuildVector(Ops), llvm::Type::getX86_MMXTy(getLLVMContext())); case X86::BI__builtin_ia32_vec_ext_v2si: - return Builder.CreateExtractElement(Ops[0], - llvm::ConstantInt::get(Ops[1]->getType(), 0)); + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v4sf: + case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vec_ext_v32qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); + Index &= NumElts - 1; + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return Builder.CreateExtractElement(Ops[0], Index); + } + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vec_set_v32qi: + case X86::BI__builtin_ia32_vec_set_v16hi: + case X86::BI__builtin_ia32_vec_set_v8si: + case X86::BI__builtin_ia32_vec_set_v4di: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); + Index &= NumElts - 1; + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return Builder.CreateInsertElement(Ops[0], Ops[1], Index); + } case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { Address Tmp = CreateMemTemp(E->getArg(0)->getType()); @@ -8141,7 +9173,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: { - return EmitX86MaskedStore(*this, Ops, 16); + return EmitX86MaskedStore(*this, Ops, 1); } case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: @@ -8173,6 +9205,66 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtmask2q512: return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return EmitX86ConvertToMask(*this, Ops[0]); + + case X86::BI__builtin_ia32_vfmaddss3: + case X86::BI__builtin_ia32_vfmaddsd3: + case X86::BI__builtin_ia32_vfmaddss3_mask: + case X86::BI__builtin_ia32_vfmaddsd3_mask: + return EmitScalarFMAExpr(*this, Ops, Ops[0]); + case X86::BI__builtin_ia32_vfmaddss: + case X86::BI__builtin_ia32_vfmaddsd: + return EmitScalarFMAExpr(*this, Ops, + Constant::getNullValue(Ops[0]->getType())); + case X86::BI__builtin_ia32_vfmaddss3_maskz: + case X86::BI__builtin_ia32_vfmaddsd3_maskz: + return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true); + case X86::BI__builtin_ia32_vfmaddss3_mask3: + case X86::BI__builtin_ia32_vfmaddsd3_mask3: + return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2); + case X86::BI__builtin_ia32_vfmsubss3_mask3: + case X86::BI__builtin_ia32_vfmsubsd3_mask3: + return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2, + /*NegAcc*/true); + case X86::BI__builtin_ia32_vfmaddps: + case X86::BI__builtin_ia32_vfmaddpd: + case X86::BI__builtin_ia32_vfmaddps256: + case X86::BI__builtin_ia32_vfmaddpd256: + case X86::BI__builtin_ia32_vfmaddps512_mask: + case X86::BI__builtin_ia32_vfmaddps512_maskz: + case X86::BI__builtin_ia32_vfmaddps512_mask3: + case X86::BI__builtin_ia32_vfmsubps512_mask3: + case X86::BI__builtin_ia32_vfmaddpd512_mask: + case X86::BI__builtin_ia32_vfmaddpd512_maskz: + case X86::BI__builtin_ia32_vfmaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: + return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); + case X86::BI__builtin_ia32_vfmaddsubps: + case X86::BI__builtin_ia32_vfmaddsubpd: + case X86::BI__builtin_ia32_vfmaddsubps256: + case X86::BI__builtin_ia32_vfmaddsubpd256: + case X86::BI__builtin_ia32_vfmaddsubps512_mask: + case X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -8211,7 +9303,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: - return EmitX86MaskedLoad(*this, Ops, 16); + return EmitX86MaskedLoad(*this, Ops, 1); case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: @@ -8230,11 +9322,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedLoad(*this, Ops, Align); } - case X86::BI__builtin_ia32_vbroadcastf128_pd256: - case X86::BI__builtin_ia32_vbroadcastf128_ps256: { - llvm::Type *DstTy = ConvertType(E->getType()); - return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); - } + case X86::BI__builtin_ia32_expandloaddf128_mask: + case X86::BI__builtin_ia32_expandloaddf256_mask: + case X86::BI__builtin_ia32_expandloaddf512_mask: + case X86::BI__builtin_ia32_expandloadsf128_mask: + case X86::BI__builtin_ia32_expandloadsf256_mask: + case X86::BI__builtin_ia32_expandloadsf512_mask: + case X86::BI__builtin_ia32_expandloaddi128_mask: + case X86::BI__builtin_ia32_expandloaddi256_mask: + case X86::BI__builtin_ia32_expandloaddi512_mask: + case X86::BI__builtin_ia32_expandloadsi128_mask: + case X86::BI__builtin_ia32_expandloadsi256_mask: + case X86::BI__builtin_ia32_expandloadsi512_mask: + case X86::BI__builtin_ia32_expandloadhi128_mask: + case X86::BI__builtin_ia32_expandloadhi256_mask: + case X86::BI__builtin_ia32_expandloadhi512_mask: + case X86::BI__builtin_ia32_expandloadqi128_mask: + case X86::BI__builtin_ia32_expandloadqi256_mask: + case X86::BI__builtin_ia32_expandloadqi512_mask: + return EmitX86ExpandLoad(*this, Ops); + + case X86::BI__builtin_ia32_compressstoredf128_mask: + case X86::BI__builtin_ia32_compressstoredf256_mask: + case X86::BI__builtin_ia32_compressstoredf512_mask: + case X86::BI__builtin_ia32_compressstoresf128_mask: + case X86::BI__builtin_ia32_compressstoresf256_mask: + case X86::BI__builtin_ia32_compressstoresf512_mask: + case X86::BI__builtin_ia32_compressstoredi128_mask: + case X86::BI__builtin_ia32_compressstoredi256_mask: + case X86::BI__builtin_ia32_compressstoredi512_mask: + case X86::BI__builtin_ia32_compressstoresi128_mask: + case X86::BI__builtin_ia32_compressstoresi256_mask: + case X86::BI__builtin_ia32_compressstoresi512_mask: + case X86::BI__builtin_ia32_compressstorehi128_mask: + case X86::BI__builtin_ia32_compressstorehi256_mask: + case X86::BI__builtin_ia32_compressstorehi512_mask: + case X86::BI__builtin_ia32_compressstoreqi128_mask: + case X86::BI__builtin_ia32_compressstoreqi256_mask: + case X86::BI__builtin_ia32_compressstoreqi512_mask: + return EmitX86CompressStore(*this, Ops); case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { @@ -8246,17 +9372,275 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // extract (0, 1) unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); - Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); + Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract"); // cast pointer to i64 & store Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: { + llvm::Type *DstTy = ConvertType(E->getType()); + unsigned NumElts = DstTy->getVectorNumElements(); + unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned SubVectors = SrcNumElts / NumElts; + unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); + assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); + Index &= SubVectors - 1; // Remove any extra bits. + Index *= NumElts; + + uint32_t Indices[16]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + Index; + + Value *Res = Builder.CreateShuffleVector(Ops[0], + UndefValue::get(Ops[0]->getType()), + makeArrayRef(Indices, NumElts), + "extract"); + + if (Ops.size() == 4) + Res = EmitX86Select(*this, Ops[3], Res, Ops[2]); + + return Res; + } + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: { + unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements(); + unsigned SubVectors = DstNumElts / SrcNumElts; + unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); + assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); + Index &= SubVectors - 1; // Remove any extra bits. + Index *= SrcNumElts; + + uint32_t Indices[16]; + for (unsigned i = 0; i != DstNumElts; ++i) + Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; + + Value *Op1 = Builder.CreateShuffleVector(Ops[1], + UndefValue::get(Ops[1]->getType()), + makeArrayRef(Indices, DstNumElts), + "widen"); + + for (unsigned i = 0; i != DstNumElts; ++i) { + if (i >= Index && i < (Index + SrcNumElts)) + Indices[i] = (i - Index) + DstNumElts; + else + Indices[i] = i; + } + + return Builder.CreateShuffleVector(Ops[0], Op1, + makeArrayRef(Indices, DstNumElts), + "insert"); + } + case X86::BI__builtin_ia32_pmovqd512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: { + Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType()); + return EmitX86Select(*this, Ops[2], Res, Ops[1]); + } + case X86::BI__builtin_ia32_pmovdb512_mask: + case X86::BI__builtin_ia32_pmovdw512_mask: + case X86::BI__builtin_ia32_pmovqw512_mask: { + if (const auto *C = dyn_cast<Constant>(Ops[2])) + if (C->isAllOnesValue()) + return Builder.CreateTrunc(Ops[0], Ops[1]->getType()); + + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_pmovdb512_mask: + IID = Intrinsic::x86_avx512_mask_pmov_db_512; + break; + case X86::BI__builtin_ia32_pmovdw512_mask: + IID = Intrinsic::x86_avx512_mask_pmov_dw_512; + break; + case X86::BI__builtin_ia32_pmovqw512_mask: + IID = Intrinsic::x86_avx512_mask_pmov_qw_512; + break; + } + + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + + uint32_t Indices[16]; + // If there are more than 8 elements, the immediate is used twice so make + // sure we handle that. + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i; + + return Builder.CreateShuffleVector(Ops[0], Ops[1], + makeArrayRef(Indices, NumElts), + "blend"); + } + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[32]; + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) { + Indices[l + i] = l + (Imm & 3); + Imm >>= 2; + } + for (unsigned i = 4; i != 8; ++i) + Indices[l + i] = l + i; + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "pshuflw"); + } + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[32]; + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) + Indices[l + i] = l + i; + for (unsigned i = 4; i != 8; ++i) { + Indices[l + i] = l + 4 + (Imm & 3); + Imm >>= 2; + } + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "pshufhw"); + } + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilpd512: + case X86::BI__builtin_ia32_vpermilps512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[16]; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + Indices[i + l] = (Imm % NumLaneElts) + l; + Imm /= NumLaneElts; + } + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "permil"); + } + case X86::BI__builtin_ia32_shufpd: + case X86::BI__builtin_ia32_shufpd256: + case X86::BI__builtin_ia32_shufpd512: + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[16]; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Index = Imm % NumLaneElts; + Imm /= NumLaneElts; + if (i >= (NumLaneElts / 2)) + Index += NumElts; + Indices[l + i] = l + Index; + } + } + + return Builder.CreateShuffleVector(Ops[0], Ops[1], + makeArrayRef(Indices, NumElts), + "shufp"); + } + case X86::BI__builtin_ia32_permdi256: + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi512: + case X86::BI__builtin_ia32_permdf512: { + unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // These intrinsics operate on 256-bit lanes of four 64-bit elements. + uint32_t Indices[8]; + for (unsigned l = 0; l != NumElts; l += 4) + for (unsigned i = 0; i != 4; ++i) + Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "perm"); + } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: - case X86::BI__builtin_ia32_palignr512_mask: { - unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + case X86::BI__builtin_ia32_palignr512: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); assert(NumElts % 16 == 0); @@ -8285,15 +9669,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], - makeArrayRef(Indices, NumElts), - "palignr"); + return Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "palignr"); + } + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; + + // Mask the shift amount to width of two vectors. + ShiftVal &= (2 * NumElts) - 1; - // If this isn't a masked builtin, just return the align operation. - if (Ops.size() == 3) - return Align; + uint32_t Indices[16]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + ShiftVal; + + return Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "valign"); + } + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_i64x2: { + unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; + unsigned NumLaneElts = NumElts / NumLanes; + + uint32_t Indices[16]; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + unsigned Index = (Imm % NumLanes) * NumLaneElts; + Imm /= NumLanes; // Discard the bits we just used. + if (l >= (NumElts / 2)) + Index += NumElts; // Switch to other source. + for (unsigned i = 0; i != NumLaneElts; ++i) { + Indices[l + i] = Index + i; + } + } - return EmitX86Select(*this, Ops[4], Align, Ops[3]); + return Builder.CreateShuffleVector(Ops[0], Ops[1], + makeArrayRef(Indices, NumElts), + "shuf"); } case X86::BI__builtin_ia32_vperm2f128_pd256: @@ -8335,6 +9762,66 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, "vperm"); } + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: + case X86::BI__builtin_ia32_pslldqi512_byteshift: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; + llvm::Type *ResultType = Ops[0]->getType(); + // Builtin type is vXi64 so multiply by 8 to get bytes. + unsigned NumElts = ResultType->getVectorNumElements() * 8; + + // If pslldq is shifting the vector more than 15 bytes, emit zero. + if (ShiftVal >= 16) + return llvm::Constant::getNullValue(ResultType); + + uint32_t Indices[64]; + // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != NumElts; l += 16) { + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = NumElts + i - ShiftVal; + if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand. + Indices[l + i] = Idx + l; + } + } + + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts); + Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Value *Zero = llvm::Constant::getNullValue(VecTy); + Value *SV = Builder.CreateShuffleVector(Zero, Cast, + makeArrayRef(Indices, NumElts), + "pslldq"); + return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast"); + } + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + case X86::BI__builtin_ia32_psrldqi512_byteshift: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; + llvm::Type *ResultType = Ops[0]->getType(); + // Builtin type is vXi64 so multiply by 8 to get bytes. + unsigned NumElts = ResultType->getVectorNumElements() * 8; + + // If psrldq is shifting the vector more than 15 bytes, emit zero. + if (ShiftVal >= 16) + return llvm::Constant::getNullValue(ResultType); + + uint32_t Indices[64]; + // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != NumElts; l += 16) { + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = i + ShiftVal; + if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand. + Indices[l + i] = Idx + l; + } + } + + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts); + Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Value *Zero = llvm::Constant::getNullValue(VecTy); + Value *SV = Builder.CreateShuffleVector(Cast, Zero, + makeArrayRef(Indices, NumElts), + "psrldq"); + return Builder.CreateBitCast(SV, ResultType, "cast"); + } case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: @@ -8380,6 +9867,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_selectpd_256: case X86::BI__builtin_ia32_selectpd_512: return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: { + Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); + Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0); + A = EmitX86ScalarSelect(*this, Ops[0], A, B); + return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); + } case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpb256_mask: case X86::BI__builtin_ia32_cmpb512_mask: @@ -8411,6 +9905,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kortestchi: + case X86::BI__builtin_ia32_kortestzhi: { + Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + Value *C; + if (BuiltinID == X86::BI__builtin_ia32_kortestchi) + C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); + else + C = llvm::Constant::getNullValue(Builder.getInt16Ty()); + Value *Cmp = Builder.CreateICmpEQ(Or, C); + return Builder.CreateZExt(Cmp, ConvertType(E->getType())); + } + case X86::BI__builtin_ia32_kandhi: return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); case X86::BI__builtin_ia32_kandnhi: @@ -8427,85 +9933,176 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Builder.getInt16Ty()); } - case X86::BI__builtin_ia32_vplzcntd_128_mask: - case X86::BI__builtin_ia32_vplzcntd_256_mask: - case X86::BI__builtin_ia32_vplzcntd_512_mask: - case X86::BI__builtin_ia32_vplzcntq_128_mask: - case X86::BI__builtin_ia32_vplzcntq_256_mask: - case X86::BI__builtin_ia32_vplzcntq_512_mask: { + case X86::BI__builtin_ia32_kunpckdi: + case X86::BI__builtin_ia32_kunpcksi: + case X86::BI__builtin_ia32_kunpckhi: { + unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits(); + Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); + Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + + // First extract half of each vector. This gives better codegen than + // doing it in a single shuffle. + LHS = Builder.CreateShuffleVector(LHS, LHS, + makeArrayRef(Indices, NumElts / 2)); + RHS = Builder.CreateShuffleVector(RHS, RHS, + makeArrayRef(Indices, NumElts / 2)); + // Concat the vectors. + // NOTE: Operands are swapped to match the intrinsic definition. + Value *Res = Builder.CreateShuffleVector(RHS, LHS, + makeArrayRef(Indices, NumElts)); + return Builder.CreateBitCast(Res, Ops[0]->getType()); + } + + case X86::BI__builtin_ia32_vplzcntd_128: + case X86::BI__builtin_ia32_vplzcntd_256: + case X86::BI__builtin_ia32_vplzcntd_512: + case X86::BI__builtin_ia32_vplzcntq_128: + case X86::BI__builtin_ia32_vplzcntq_256: + case X86::BI__builtin_ia32_vplzcntq_512: { Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); - return EmitX86Select(*this, Ops[2], - Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), - Ops[1]); + return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}); + } + case X86::BI__builtin_ia32_sqrtss: + case X86::BI__builtin_ia32_sqrtsd: { + Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); + Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); + A = Builder.CreateCall(F, {A}); + return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); + } + case X86::BI__builtin_ia32_sqrtsd_round_mask: + case X86::BI__builtin_ia32_sqrtss_round_mask: { + unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); + // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), + // otherwise keep the intrinsic. + if (CC != 4) { + Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ? + Intrinsic::x86_avx512_mask_sqrt_sd : + Intrinsic::x86_avx512_mask_sqrt_ss; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } + Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); + Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); + A = Builder.CreateCall(F, A); + Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); + A = EmitX86ScalarSelect(*this, Ops[3], A, Src); + return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); + } + case X86::BI__builtin_ia32_sqrtpd256: + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: { + if (Ops.size() == 2) { + unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), + // otherwise keep the intrinsic. + if (CC != 4) { + Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ? + Intrinsic::x86_avx512_sqrt_ps_512 : + Intrinsic::x86_avx512_sqrt_pd_512; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } + } + Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); + return Builder.CreateCall(F, Ops[0]); } - case X86::BI__builtin_ia32_pabsb128: case X86::BI__builtin_ia32_pabsw128: case X86::BI__builtin_ia32_pabsd128: case X86::BI__builtin_ia32_pabsb256: case X86::BI__builtin_ia32_pabsw256: case X86::BI__builtin_ia32_pabsd256: - case X86::BI__builtin_ia32_pabsq128_mask: - case X86::BI__builtin_ia32_pabsq256_mask: - case X86::BI__builtin_ia32_pabsb512_mask: - case X86::BI__builtin_ia32_pabsw512_mask: - case X86::BI__builtin_ia32_pabsd512_mask: - case X86::BI__builtin_ia32_pabsq512_mask: + case X86::BI__builtin_ia32_pabsq128: + case X86::BI__builtin_ia32_pabsq256: + case X86::BI__builtin_ia32_pabsb512: + case X86::BI__builtin_ia32_pabsw512: + case X86::BI__builtin_ia32_pabsd512: + case X86::BI__builtin_ia32_pabsq512: return EmitX86Abs(*this, Ops); case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: - case X86::BI__builtin_ia32_pmaxsq128_mask: + case X86::BI__builtin_ia32_pmaxsq128: case X86::BI__builtin_ia32_pmaxsb256: case X86::BI__builtin_ia32_pmaxsw256: case X86::BI__builtin_ia32_pmaxsd256: - case X86::BI__builtin_ia32_pmaxsq256_mask: - case X86::BI__builtin_ia32_pmaxsb512_mask: - case X86::BI__builtin_ia32_pmaxsw512_mask: - case X86::BI__builtin_ia32_pmaxsd512_mask: - case X86::BI__builtin_ia32_pmaxsq512_mask: + case X86::BI__builtin_ia32_pmaxsq256: + case X86::BI__builtin_ia32_pmaxsb512: + case X86::BI__builtin_ia32_pmaxsw512: + case X86::BI__builtin_ia32_pmaxsd512: + case X86::BI__builtin_ia32_pmaxsq512: return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); case X86::BI__builtin_ia32_pmaxub128: case X86::BI__builtin_ia32_pmaxuw128: case X86::BI__builtin_ia32_pmaxud128: - case X86::BI__builtin_ia32_pmaxuq128_mask: + case X86::BI__builtin_ia32_pmaxuq128: case X86::BI__builtin_ia32_pmaxub256: case X86::BI__builtin_ia32_pmaxuw256: case X86::BI__builtin_ia32_pmaxud256: - case X86::BI__builtin_ia32_pmaxuq256_mask: - case X86::BI__builtin_ia32_pmaxub512_mask: - case X86::BI__builtin_ia32_pmaxuw512_mask: - case X86::BI__builtin_ia32_pmaxud512_mask: - case X86::BI__builtin_ia32_pmaxuq512_mask: + case X86::BI__builtin_ia32_pmaxuq256: + case X86::BI__builtin_ia32_pmaxub512: + case X86::BI__builtin_ia32_pmaxuw512: + case X86::BI__builtin_ia32_pmaxud512: + case X86::BI__builtin_ia32_pmaxuq512: return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); case X86::BI__builtin_ia32_pminsb128: case X86::BI__builtin_ia32_pminsw128: case X86::BI__builtin_ia32_pminsd128: - case X86::BI__builtin_ia32_pminsq128_mask: + case X86::BI__builtin_ia32_pminsq128: case X86::BI__builtin_ia32_pminsb256: case X86::BI__builtin_ia32_pminsw256: case X86::BI__builtin_ia32_pminsd256: - case X86::BI__builtin_ia32_pminsq256_mask: - case X86::BI__builtin_ia32_pminsb512_mask: - case X86::BI__builtin_ia32_pminsw512_mask: - case X86::BI__builtin_ia32_pminsd512_mask: - case X86::BI__builtin_ia32_pminsq512_mask: + case X86::BI__builtin_ia32_pminsq256: + case X86::BI__builtin_ia32_pminsb512: + case X86::BI__builtin_ia32_pminsw512: + case X86::BI__builtin_ia32_pminsd512: + case X86::BI__builtin_ia32_pminsq512: return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); case X86::BI__builtin_ia32_pminub128: case X86::BI__builtin_ia32_pminuw128: case X86::BI__builtin_ia32_pminud128: - case X86::BI__builtin_ia32_pminuq128_mask: + case X86::BI__builtin_ia32_pminuq128: case X86::BI__builtin_ia32_pminub256: case X86::BI__builtin_ia32_pminuw256: case X86::BI__builtin_ia32_pminud256: - case X86::BI__builtin_ia32_pminuq256_mask: - case X86::BI__builtin_ia32_pminub512_mask: - case X86::BI__builtin_ia32_pminuw512_mask: - case X86::BI__builtin_ia32_pminud512_mask: - case X86::BI__builtin_ia32_pminuq512_mask: + case X86::BI__builtin_ia32_pminuq256: + case X86::BI__builtin_ia32_pminub512: + case X86::BI__builtin_ia32_pminuw512: + case X86::BI__builtin_ia32_pminud512: + case X86::BI__builtin_ia32_pminuq512: return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); + case X86::BI__builtin_ia32_pmuludq128: + case X86::BI__builtin_ia32_pmuludq256: + case X86::BI__builtin_ia32_pmuludq512: + return EmitX86Muldq(*this, /*IsSigned*/false, Ops); + + case X86::BI__builtin_ia32_pmuldq128: + case X86::BI__builtin_ia32_pmuldq256: + case X86::BI__builtin_ia32_pmuldq512: + return EmitX86Muldq(*this, /*IsSigned*/true, Ops); + + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops); + + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { @@ -8549,7 +10146,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractValue(Call, 1); } - // SSE packed comparison intrinsics + case X86::BI__builtin_ia32_fpclassps128_mask: + case X86::BI__builtin_ia32_fpclassps256_mask: + case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_fpclasspd128_mask: + case X86::BI__builtin_ia32_fpclasspd256_mask: + case X86::BI__builtin_ia32_fpclasspd512_mask: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *MaskIn = Ops[2]; + Ops.erase(&Ops[2]); + + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_fpclassps128_mask: + ID = Intrinsic::x86_avx512_fpclass_ps_128; + break; + case X86::BI__builtin_ia32_fpclassps256_mask: + ID = Intrinsic::x86_avx512_fpclass_ps_256; + break; + case X86::BI__builtin_ia32_fpclassps512_mask: + ID = Intrinsic::x86_avx512_fpclass_ps_512; + break; + case X86::BI__builtin_ia32_fpclasspd128_mask: + ID = Intrinsic::x86_avx512_fpclass_pd_128; + break; + case X86::BI__builtin_ia32_fpclasspd256_mask: + ID = Intrinsic::x86_avx512_fpclass_pd_256; + break; + case X86::BI__builtin_ia32_fpclasspd512_mask: + ID = Intrinsic::x86_avx512_fpclass_pd_512; + break; + } + + Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); + } + + // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: return getVectorFCmpIR(CmpInst::FCMP_OEQ); @@ -8577,64 +10211,79 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: { - unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - // If this one of the SSE immediates, we can use native IR. - if (CC < 8) { - FCmpInst::Predicate Pred; - switch (CC) { - case 0: Pred = FCmpInst::FCMP_OEQ; break; - case 1: Pred = FCmpInst::FCMP_OLT; break; - case 2: Pred = FCmpInst::FCMP_OLE; break; - case 3: Pred = FCmpInst::FCMP_UNO; break; - case 4: Pred = FCmpInst::FCMP_UNE; break; - case 5: Pred = FCmpInst::FCMP_UGE; break; - case 6: Pred = FCmpInst::FCMP_UGT; break; - case 7: Pred = FCmpInst::FCMP_ORD; break; - } - return getVectorFCmpIR(Pred); + case X86::BI__builtin_ia32_cmppd256: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: { + // Lowering vector comparisons to fcmp instructions, while + // ignoring signalling behaviour requested + // ignoring rounding mode requested + // This is is only possible as long as FENV_ACCESS is not implemented. + // See also: https://reviews.llvm.org/D45616 + + // The third argument is the comparison condition, and integer in the + // range [0, 31] + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f; + + // Lowering to IR fcmp instruction. + // Ignoring requested signaling behaviour, + // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. + FCmpInst::Predicate Pred; + switch (CC) { + case 0x00: Pred = FCmpInst::FCMP_OEQ; break; + case 0x01: Pred = FCmpInst::FCMP_OLT; break; + case 0x02: Pred = FCmpInst::FCMP_OLE; break; + case 0x03: Pred = FCmpInst::FCMP_UNO; break; + case 0x04: Pred = FCmpInst::FCMP_UNE; break; + case 0x05: Pred = FCmpInst::FCMP_UGE; break; + case 0x06: Pred = FCmpInst::FCMP_UGT; break; + case 0x07: Pred = FCmpInst::FCMP_ORD; break; + case 0x08: Pred = FCmpInst::FCMP_UEQ; break; + case 0x09: Pred = FCmpInst::FCMP_ULT; break; + case 0x0a: Pred = FCmpInst::FCMP_ULE; break; + case 0x0b: Pred = FCmpInst::FCMP_FALSE; break; + case 0x0c: Pred = FCmpInst::FCMP_ONE; break; + case 0x0d: Pred = FCmpInst::FCMP_OGE; break; + case 0x0e: Pred = FCmpInst::FCMP_OGT; break; + case 0x0f: Pred = FCmpInst::FCMP_TRUE; break; + case 0x10: Pred = FCmpInst::FCMP_OEQ; break; + case 0x11: Pred = FCmpInst::FCMP_OLT; break; + case 0x12: Pred = FCmpInst::FCMP_OLE; break; + case 0x13: Pred = FCmpInst::FCMP_UNO; break; + case 0x14: Pred = FCmpInst::FCMP_UNE; break; + case 0x15: Pred = FCmpInst::FCMP_UGE; break; + case 0x16: Pred = FCmpInst::FCMP_UGT; break; + case 0x17: Pred = FCmpInst::FCMP_ORD; break; + case 0x18: Pred = FCmpInst::FCMP_UEQ; break; + case 0x19: Pred = FCmpInst::FCMP_ULT; break; + case 0x1a: Pred = FCmpInst::FCMP_ULE; break; + case 0x1b: Pred = FCmpInst::FCMP_FALSE; break; + case 0x1c: Pred = FCmpInst::FCMP_ONE; break; + case 0x1d: Pred = FCmpInst::FCMP_OGE; break; + case 0x1e: Pred = FCmpInst::FCMP_OGT; break; + case 0x1f: Pred = FCmpInst::FCMP_TRUE; break; + default: llvm_unreachable("Unhandled CC"); } - // We can't handle 8-31 immediates with native IR, use the intrinsic. - // Except for predicates that create constants. - Intrinsic::ID ID; + // Builtins without the _mask suffix return a vector of integers + // of the same width as the input vectors switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpps: - ID = Intrinsic::x86_sse_cmp_ps; - break; - case X86::BI__builtin_ia32_cmpps256: - // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector - // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... - if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { - Value *Constant = (CC == 0xf || CC == 0x1f) ? - llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : - llvm::Constant::getNullValue(Builder.getInt32Ty()); - Value *Vec = Builder.CreateVectorSplat( - Ops[0]->getType()->getVectorNumElements(), Constant); - return Builder.CreateBitCast(Vec, Ops[0]->getType()); - } - ID = Intrinsic::x86_avx_cmp_ps_256; - break; - case X86::BI__builtin_ia32_cmppd: - ID = Intrinsic::x86_sse2_cmp_pd; - break; - case X86::BI__builtin_ia32_cmppd256: - // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector - // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... - if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { - Value *Constant = (CC == 0xf || CC == 0x1f) ? - llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : - llvm::Constant::getNullValue(Builder.getInt64Ty()); - Value *Vec = Builder.CreateVectorSplat( - Ops[0]->getType()->getVectorNumElements(), Constant); - return Builder.CreateBitCast(Vec, Ops[0]->getType()); - } - ID = Intrinsic::x86_avx_cmp_pd_256; - break; + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd512_mask: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); + } + default: + return getVectorFCmpIR(Pred); } - - return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } // SSE scalar comparison intrinsics @@ -9195,19 +10844,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); - // Element zero comes from the first input vector and element one comes from - // the second. The element indices within each vector are numbered in big - // endian order so the shuffle mask must be adjusted for this on little - // endian platforms (i.e. index is complemented and source vector reversed). - unsigned ElemIdx0; - unsigned ElemIdx1; - if (getTarget().isLittleEndian()) { - ElemIdx0 = (~Index & 1) + 2; - ElemIdx1 = (~Index & 2) >> 1; - } else { // BigEndian - ElemIdx0 = (Index & 2) >> 1; - ElemIdx1 = 2 + (Index & 1); - } + // Account for endianness by treating this as just a shuffle. So we use the + // same indices for both LE and BE in order to produce expected results in + // both cases. + unsigned ElemIdx0 = (Index & 2) >> 1; + unsigned ElemIdx1 = 2 + (Index & 1); Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), ConstantInt::get(Int32Ty, ElemIdx1)}; @@ -9398,6 +11039,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CI->setConvergent(); return CI; } + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + llvm::SmallVector<llvm::Value *, 5> Args; + for (unsigned I = 0; I != 5; ++I) + Args.push_back(EmitScalarExpr(E->getArg(I))); + const llvm::Type *PtrTy = Args[0]->getType(); + // check pointer parameter + if (!PtrTy->isPointerTy() || + E->getArg(0) + ->getType() + ->getPointeeType() + .getQualifiers() + .getAddressSpace() != LangAS::opencl_local || + !PtrTy->getPointerElementType()->isFloatTy()) { + CGM.Error(E->getArg(0)->getLocStart(), + "parameter should have type \"local float*\""); + return nullptr; + } + // check float parameter + if (!Args[1]->getType()->isFloatTy()) { + CGM.Error(E->getArg(1)->getLocStart(), + "parameter should have type \"float\""); + return nullptr; + } + + Intrinsic::ID ID; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + ID = Intrinsic::amdgcn_ds_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + ID = Intrinsic::amdgcn_ds_fmin; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + ID = Intrinsic::amdgcn_ds_fmax; + break; + default: + llvm_unreachable("Unknown BuiltinID"); + } + Value *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Args); + } // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: @@ -10028,7 +11712,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m16n16k16_ld_a: case NVPTX::BI__hmma_m16n16k16_ld_b: case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - case NVPTX::BI__hmma_m16n16k16_ld_c_f32: { + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + case NVPTX::BI__hmma_m32n8k16_ld_a: + case NVPTX::BI__hmma_m32n8k16_ld_b: + case NVPTX::BI__hmma_m32n8k16_ld_c_f16: + case NVPTX::BI__hmma_m32n8k16_ld_c_f32: + case NVPTX::BI__hmma_m8n32k16_ld_a: + case NVPTX::BI__hmma_m8n32k16_ld_b: + case NVPTX::BI__hmma_m8n32k16_ld_c_f16: + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -10040,31 +11732,70 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, unsigned NumResults; switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_load_a_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; NumResults = 8; break; case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_load_b_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; NumResults = 8; break; case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_load_c_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; NumResults = 4; break; case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_load_c_f32_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m32n8k16_ld_a: + IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m32n8k16_ld_b: + IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m32n8k16_ld_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m32n8k16_ld_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m8n32k16_ld_a: + IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m8n32k16_ld_b: + IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m8n32k16_ld_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride; NumResults = 8; break; default: llvm_unreachable("Unexpected builtin ID."); } Value *Result = - Builder.CreateCall(CGM.getIntrinsic(IID), - {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm}); + Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); // Save returned values. for (unsigned i = 0; i < NumResults; ++i) { @@ -10078,7 +11809,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, } case NVPTX::BI__hmma_m16n16k16_st_c_f16: - case NVPTX::BI__hmma_m16n16k16_st_c_f32: { + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + case NVPTX::BI__hmma_m32n8k16_st_c_f16: + case NVPTX::BI__hmma_m32n8k16_st_c_f32: + case NVPTX::BI__hmma_m8n32k16_st_c_f16: + case NVPTX::BI__hmma_m8n32k16_st_c_f32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -10092,21 +11827,38 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, // for some reason nvcc builtins use _c_. switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_store_d_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; NumResults = 4; break; case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_store_d_f32_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; + break; + case NVPTX::BI__hmma_m32n8k16_st_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m32n8k16_st_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride; + break; + case NVPTX::BI__hmma_m8n32k16_st_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride; break; default: llvm_unreachable("Unexpected builtin ID."); } - Function *Intrinsic = CGM.getIntrinsic(IID); + Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); - SmallVector<Value *, 10> Values; - Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy)); + SmallVector<Value *, 10> Values = {Dst}; for (unsigned i = 0; i < NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -10118,12 +11870,20 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Result; } - // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) - // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf> + // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) --> + // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf> case NVPTX::BI__hmma_m16n16k16_mma_f16f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - case NVPTX::BI__hmma_m16n16k16_mma_f16f32: { + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); @@ -10140,15 +11900,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, bool Satf = SatfArg.getSExtValue(); // clang-format off -#define MMA_VARIANTS(type) {{ \ - Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \ - Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \ - Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \ - Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \ - Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \ +#define MMA_VARIANTS(geom, type) {{ \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ }} // clang-format on @@ -10162,22 +11922,62 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, unsigned NumEltsD; switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16)); + IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16)); NumEltsC = 4; NumEltsD = 4; break; case NVPTX::BI__hmma_m16n16k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16)); + IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16)); NumEltsC = 4; NumEltsD = 8; break; case NVPTX::BI__hmma_m16n16k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32)); + IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32)); NumEltsC = 8; NumEltsD = 4; break; case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32)); + IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32)); + NumEltsC = 8; + NumEltsD = 8; + break; + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16)); + NumEltsC = 4; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16)); + NumEltsC = 4; + NumEltsD = 8; + break; + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32)); + NumEltsC = 8; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32)); + NumEltsC = 8; + NumEltsD = 8; + break; + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16)); + NumEltsC = 4; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16)); + NumEltsC = 4; + NumEltsD = 8; + break; + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32)); + NumEltsC = 8; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32)); NumEltsC = 8; NumEltsD = 8; break; @@ -10231,6 +12031,36 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_memory_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *I = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + return Builder.CreateCall(Callee, I); + } + case WebAssembly::BI__builtin_wasm_memory_grow: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Args[] = { + EmitScalarExpr(E->getArg(0)), + EmitScalarExpr(E->getArg(1)) + }; + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + return Builder.CreateCall(Callee, Args); + } + case WebAssembly::BI__builtin_wasm_mem_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *I = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType); + return Builder.CreateCall(Callee, I); + } + case WebAssembly::BI__builtin_wasm_mem_grow: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Args[] = { + EmitScalarExpr(E->getArg(0)), + EmitScalarExpr(E->getArg(1)) + }; + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType); + return Builder.CreateCall(Callee, Args); + } case WebAssembly::BI__builtin_wasm_current_memory: { llvm::Type *ResultType = ConvertType(E->getType()); Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); @@ -10262,6 +12092,93 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, SmallVector<llvm::Value *, 4> Ops; Intrinsic::ID ID = Intrinsic::not_intrinsic; + auto MakeCircLd = [&](unsigned IntID, bool HasImm) { + // The base pointer is passed by address, so it needs to be loaded. + Address BP = EmitPointerWithAlignment(E->getArg(0)); + BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy), + BP.getAlignment()); + llvm::Value *Base = Builder.CreateLoad(BP); + // Operands are Base, Increment, Modifier, Start. + if (HasImm) + Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)) }; + else + Ops = { Base, EmitScalarExpr(E->getArg(1)), + EmitScalarExpr(E->getArg(2)) }; + + llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); + llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1); + llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), + NewBase->getType()->getPointerTo()); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + // The intrinsic generates two results. The new value for the base pointer + // needs to be stored. + Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); + return Builder.CreateExtractValue(Result, 0); + }; + + auto MakeCircSt = [&](unsigned IntID, bool HasImm) { + // The base pointer is passed by address, so it needs to be loaded. + Address BP = EmitPointerWithAlignment(E->getArg(0)); + BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy), + BP.getAlignment()); + llvm::Value *Base = Builder.CreateLoad(BP); + // Operands are Base, Increment, Modifier, Value, Start. + if (HasImm) + Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) }; + else + Ops = { Base, EmitScalarExpr(E->getArg(1)), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) }; + + llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); + llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), + NewBase->getType()->getPointerTo()); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + // The intrinsic generates one result, which is the new value for the base + // pointer. It needs to be stored. + return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); + }; + + // Handle the conversion of bit-reverse load intrinsics to bit code. + // The intrinsic call after this function only reads from memory and the + // write to memory is dealt by the store instruction. + auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) { + // The intrinsic generates one result, which is the new value for the base + // pointer. It needs to be returned. The result of the load instruction is + // passed to intrinsic by address, so the value needs to be stored. + llvm::Value *BaseAddress = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); + + // Expressions like &(*pt++) will be incremented per evaluation. + // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression + // per call. + Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); + DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy), + DestAddr.getAlignment()); + llvm::Value *DestAddress = DestAddr.getPointer(); + + // Operands are Base, Dest, Modifier. + // The intrinsic format in LLVM IR is defined as + // { ValueType, i8* } (i8*, i32). + Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))}; + + llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); + // The value needs to be stored as the variable is passed by reference. + llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); + + // The store needs to be truncated to fit the destination type. + // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs + // to be handled with stores of respective destination type. + DestVal = Builder.CreateTrunc(DestVal, DestTy); + + llvm::Value *DestForStore = + Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo()); + Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment()); + // The updated value of the base pointer is returned. + return Builder.CreateExtractValue(Result, 1); + }; + switch (BuiltinID) { case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: { @@ -10307,6 +12224,64 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); return Builder.CreateExtractValue(Result, 0); } + case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: + return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: + return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: + return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: + return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: + return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: + return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: + return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: + return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: + return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: + return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: + return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: + return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: + return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: + return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: + return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: + return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: + return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true); + case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: + return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: + return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: + return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: + return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: + return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false); + case Hexagon::BI__builtin_brev_ldub: + return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); + case Hexagon::BI__builtin_brev_ldb: + return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty); + case Hexagon::BI__builtin_brev_lduh: + return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty); + case Hexagon::BI__builtin_brev_ldh: + return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty); + case Hexagon::BI__builtin_brev_ldw: + return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); + case Hexagon::BI__builtin_brev_ldd: + return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); + default: + break; } // switch return nullptr; diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index d24ef0a8a974..5fcc9e011bcb 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -15,17 +15,20 @@ #include "CGCUDARuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/Format.h" using namespace clang; using namespace CodeGen; namespace { +constexpr unsigned CudaFatMagic = 0x466243b1; +constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF" class CGNVCUDARuntime : public CGCUDARuntime { @@ -41,14 +44,22 @@ private: /// Keeps track of kernel launch stubs emitted in this module llvm::SmallVector<llvm::Function *, 16> EmittedKernels; llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; - /// Keeps track of variables containing handles of GPU binaries. Populated by + /// Keeps track of variable containing handle of GPU binary. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() - llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles; + llvm::GlobalVariable *GpuBinaryHandle = nullptr; + /// Whether we generate relocatable device code. + bool RelocatableDeviceCode; llvm::Constant *getSetupArgumentFn() const; llvm::Constant *getLaunchFn() const; + llvm::FunctionType *getRegisterGlobalsFnTy() const; + llvm::FunctionType *getCallbackFnTy() const; + llvm::FunctionType *getRegisterLinkedBinaryFnTy() const; + std::string addPrefixToName(StringRef FuncName) const; + std::string addUnderscoredPrefixToName(StringRef FuncName) const; + /// Creates a function to register all kernel stubs generated in this module. llvm::Function *makeRegisterGlobalsFn(); @@ -64,14 +75,34 @@ private: auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str()); llvm::GlobalVariable *GV = cast<llvm::GlobalVariable>(ConstStr.getPointer()); - if (!SectionName.empty()) + if (!SectionName.empty()) { GV->setSection(SectionName); + // Mark the address as used which make sure that this section isn't + // merged and we will really have it in the object file. + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); + } if (Alignment) GV->setAlignment(Alignment); return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), ConstStr.getPointer(), Zeros); - } + } + + /// Helper function that generates an empty dummy function returning void. + llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) { + assert(FnTy->getReturnType()->isVoidTy() && + "Can only generate dummy functions returning void!"); + llvm::Function *DummyFunc = llvm::Function::Create( + FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule); + + llvm::BasicBlock *DummyBlock = + llvm::BasicBlock::Create(Context, "", DummyFunc); + CGBuilderTy FuncBuilder(CGM, Context); + FuncBuilder.SetInsertPoint(DummyBlock); + FuncBuilder.CreateRetVoid(); + + return DummyFunc; + } void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); @@ -91,9 +122,22 @@ public: } +std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const { + if (CGM.getLangOpts().HIP) + return ((Twine("hip") + Twine(FuncName)).str()); + return ((Twine("cuda") + Twine(FuncName)).str()); +} +std::string +CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { + if (CGM.getLangOpts().HIP) + return ((Twine("__hip") + Twine(FuncName)).str()); + return ((Twine("__cuda") + Twine(FuncName)).str()); +} + CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), - TheModule(CGM.getModule()) { + TheModule(CGM.getModule()), + RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -109,15 +153,37 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { // cudaError_t cudaSetupArgument(void *, size_t, size_t) llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy}; - return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, - Params, false), - "cudaSetupArgument"); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, Params, false), + addPrefixToName("SetupArgument")); } llvm::Constant *CGNVCUDARuntime::getLaunchFn() const { - // cudaError_t cudaLaunch(char *) - return CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch"); + if (CGM.getLangOpts().HIP) { + // hipError_t hipLaunchByPtr(char *); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr"); + } else { + // cudaError_t cudaLaunch(char *); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch"); + } +} + +llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const { + return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false); +} + +llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const { + return llvm::FunctionType::get(VoidTy, VoidPtrTy, false); +} + +llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { + auto CallbackFnTy = getCallbackFnTy(); + auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy(); + llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy, + VoidPtrTy, CallbackFnTy->getPointerTo()}; + return llvm::FunctionType::get(VoidTy, Params, false); } void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, @@ -181,8 +247,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { return nullptr; llvm::Function *RegisterKernelsFunc = llvm::Function::Create( - llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule); + getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage, + addUnderscoredPrefixToName("_register_globals"), &TheModule); llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc); CGBuilderTy Builder(CGM, Context); @@ -195,7 +261,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterFuncParams, false), - "__cudaRegisterFunction"); + addUnderscoredPrefixToName("RegisterFunction")); // Extract GpuBinaryHandle passed as the first argument passed to // __cuda_register_globals() and generate __cudaRegisterFunction() call for @@ -219,7 +285,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { IntTy, IntTy}; llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), - "__cudaRegisterVar"); + addUnderscoredPrefixToName("RegisterVar")); for (auto &Pair : DeviceVars) { llvm::GlobalVariable *Var = Pair.first; unsigned Flags = Pair.second; @@ -243,133 +309,307 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { } /// Creates a global constructor function for the module: +/// +/// For CUDA: /// \code /// void __cuda_module_ctor(void*) { -/// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0); -/// __cuda_register_globals(Handle0); -/// ... -/// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN); -/// __cuda_register_globals(HandleN); +/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob); +/// __cuda_register_globals(Handle); +/// } +/// \endcode +/// +/// For HIP: +/// \code +/// void __hip_module_ctor(void*) { +/// if (__hip_gpubin_handle == 0) { +/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob); +/// __hip_register_globals(__hip_gpubin_handle); +/// } /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { - // No need to generate ctors/dtors if there are no GPU binaries. - if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty()) + bool IsHIP = CGM.getLangOpts().HIP; + // No need to generate ctors/dtors if there is no GPU binary. + StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; + if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; - // void __cuda_register_globals(void* handle); + // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); - // void ** __cudaRegisterFatBinary(void *); + // We always need a function to pass in as callback. Create a dummy + // implementation if we don't need to register anything. + if (RelocatableDeviceCode && !RegisterGlobalsFunc) + RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); + + // void ** __{cuda|hip}RegisterFatBinary(void *); llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), - "__cudaRegisterFatBinary"); + addUnderscoredPrefixToName("RegisterFatBinary")); // struct { int magic, int version, void * gpu_binary, void * dont_care }; llvm::StructType *FatbinWrapperTy = llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy); + // Register GPU binary with the CUDA runtime, store returned handle in a + // global variable and save a reference in GpuBinaryHandle to be cleaned up + // in destructor on exit. Then associate all known kernels with the GPU binary + // handle so CUDA runtime can figure out what to call on the GPU side. + std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary; + if (!IsHIP) { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName); + if (std::error_code EC = CudaGpuBinaryOrErr.getError()) { + CGM.getDiags().Report(diag::err_cannot_open_file) + << CudaGpuBinaryFileName << EC.message(); + return nullptr; + } + CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get()); + } + llvm::Function *ModuleCtorFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule); + llvm::GlobalValue::InternalLinkage, + addUnderscoredPrefixToName("_module_ctor"), &TheModule); llvm::BasicBlock *CtorEntryBB = llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc); CGBuilderTy CtorBuilder(CGM, Context); CtorBuilder.SetInsertPoint(CtorEntryBB); - // For each GPU binary, register it with the CUDA runtime and store returned - // handle in a global variable and save the handle in GpuBinaryHandles vector - // to be cleaned up in destructor on exit. Then associate all known kernels - // with the GPU binary handle so CUDA runtime can figure out what to call on - // the GPU side. - for (const std::string &GpuBinaryFileName : - CGM.getCodeGenOpts().CudaGpuBinaryFileNames) { - llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName); - if (std::error_code EC = GpuBinaryOrErr.getError()) { - CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName - << EC.message(); - continue; - } - - const char *FatbinConstantName = - CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"; + const char *FatbinConstantName; + const char *FatbinSectionName; + const char *ModuleIDSectionName; + StringRef ModuleIDPrefix; + llvm::Constant *FatBinStr; + unsigned FatMagic; + if (IsHIP) { + FatbinConstantName = ".hip_fatbin"; + FatbinSectionName = ".hipFatBinSegment"; + + ModuleIDSectionName = "__hip_module_id"; + ModuleIDPrefix = "__hip_"; + + // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin. + // The external symbol is supposed to contain the fat binary but will be + // populated somewhere else, e.g. by lld through link script. + FatBinStr = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, + "__hip_fatbin", nullptr, + llvm::GlobalVariable::NotThreadLocal); + cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName); + + FatMagic = HIPFatMagic; + } else { + if (RelocatableDeviceCode) + FatbinConstantName = CGM.getTriple().isMacOSX() + ? "__NV_CUDA,__nv_relfatbin" + : "__nv_relfatbin"; + else + FatbinConstantName = + CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"; // NVIDIA's cuobjdump looks for fatbins in this section. - const char *FatbinSectionName = + FatbinSectionName = CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment"; - // Create initialized wrapper structure that points to the loaded GPU binary - ConstantInitBuilder Builder(CGM); - auto Values = Builder.beginStruct(FatbinWrapperTy); - // Fatbin wrapper magic. - Values.addInt(IntTy, 0x466243b1); - // Fatbin version. - Values.addInt(IntTy, 1); - // Data. - Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(), - "", FatbinConstantName, 8)); - // Unused in fatbin v1. - Values.add(llvm::ConstantPointerNull::get(VoidPtrTy)); - llvm::GlobalVariable *FatbinWrapper = - Values.finishAndCreateGlobal("__cuda_fatbin_wrapper", - CGM.getPointerAlign(), - /*constant*/ true); - FatbinWrapper->setSection(FatbinSectionName); + ModuleIDSectionName = CGM.getTriple().isMacOSX() + ? "__NV_CUDA,__nv_module_id" + : "__nv_module_id"; + ModuleIDPrefix = "__nv_"; + + // For CUDA, create a string literal containing the fat binary loaded from + // the given file. + FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "", + FatbinConstantName, 8); + FatMagic = CudaFatMagic; + } + // Create initialized wrapper structure that points to the loaded GPU binary + ConstantInitBuilder Builder(CGM); + auto Values = Builder.beginStruct(FatbinWrapperTy); + // Fatbin wrapper magic. + Values.addInt(IntTy, FatMagic); + // Fatbin version. + Values.addInt(IntTy, 1); + // Data. + Values.add(FatBinStr); + // Unused in fatbin v1. + Values.add(llvm::ConstantPointerNull::get(VoidPtrTy)); + llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal( + addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(), + /*constant*/ true); + FatbinWrapper->setSection(FatbinSectionName); + + // There is only one HIP fat binary per linked module, however there are + // multiple constructor functions. Make sure the fat binary is registered + // only once. The constructor functions are executed by the dynamic loader + // before the program gains control. The dynamic loader cannot execute the + // constructor functions concurrently since doing that would not guarantee + // thread safety of the loaded program. Therefore we can assume sequential + // execution of constructor functions here. + if (IsHIP) { + llvm::BasicBlock *IfBlock = + llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc); + llvm::BasicBlock *ExitBlock = + llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc); + // The name, size, and initialization pattern of this variable is part + // of HIP ABI. + GpuBinaryHandle = new llvm::GlobalVariable( + TheModule, VoidPtrPtrTy, /*isConstant=*/false, + llvm::GlobalValue::LinkOnceAnyLinkage, + /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), + "__hip_gpubin_handle"); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + Address GpuBinaryAddr( + GpuBinaryHandle, + CharUnits::fromQuantity(GpuBinaryHandle->getAlignment())); + { + auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); + llvm::Constant *Zero = + llvm::Constant::getNullValue(HandleValue->getType()); + llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero); + CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock); + } + { + CtorBuilder.SetInsertPoint(IfBlock); + // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper); + llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( + RegisterFatbinFunc, + CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy)); + CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr); + CtorBuilder.CreateBr(ExitBlock); + } + { + CtorBuilder.SetInsertPoint(ExitBlock); + // Call __hip_register_globals(GpuBinaryHandle); + if (RegisterGlobalsFunc) { + auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); + CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue); + } + } + } else if (!RelocatableDeviceCode) { + // Register binary with CUDA runtime. This is substantially different in + // default mode vs. separate compilation! // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper); llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( RegisterFatbinFunc, CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy)); - llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable( + GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle"); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); // Call __cuda_register_globals(GpuBinaryHandle); if (RegisterGlobalsFunc) CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + } else { + // Generate a unique module ID. + SmallString<64> ModuleID; + llvm::raw_svector_ostream OS(ModuleID); + OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID()); + llvm::Constant *ModuleIDConstant = + makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32); + + // Create an alias for the FatbinWrapper that nvcc will look for. + llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage, + Twine("__fatbinwrap") + ModuleID, FatbinWrapper); + + // void __cudaRegisterLinkedBinary%ModuleID%(void (*)(void *), void *, + // void *, void (*)(void **)) + SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary"); + RegisterLinkedBinaryName += ModuleID; + llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( + getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName); + + assert(RegisterGlobalsFunc && "Expecting at least dummy function!"); + llvm::Value *Args[] = {RegisterGlobalsFunc, + CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy), + ModuleIDConstant, + makeDummyFunction(getCallbackFnTy())}; + CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args); + } - // Save GpuBinaryHandle so we can unregister it in destructor. - GpuBinaryHandles.push_back(GpuBinaryHandle); + // Create destructor and register it with atexit() the way NVCC does it. Doing + // it during regular destructor phase worked in CUDA before 9.2 but results in + // double-free in 9.2. + if (llvm::Function *CleanupFn = makeModuleDtorFunction()) { + // extern "C" int atexit(void (*f)(void)); + llvm::FunctionType *AtExitTy = + llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); + llvm::Constant *AtExitFunc = + CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), + /*Local=*/true); + CtorBuilder.CreateCall(AtExitFunc, CleanupFn); } CtorBuilder.CreateRetVoid(); return ModuleCtorFunc; } -/// Creates a global destructor function that unregisters all GPU code blobs +/// Creates a global destructor function that unregisters the GPU code blob /// registered by constructor. +/// +/// For CUDA: /// \code /// void __cuda_module_dtor(void*) { -/// __cudaUnregisterFatBinary(Handle0); -/// ... -/// __cudaUnregisterFatBinary(HandleN); +/// __cudaUnregisterFatBinary(Handle); +/// } +/// \endcode +/// +/// For HIP: +/// \code +/// void __hip_module_dtor(void*) { +/// if (__hip_gpubin_handle) { +/// __hipUnregisterFatBinary(__hip_gpubin_handle); +/// __hip_gpubin_handle = 0; +/// } /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { - // No need for destructor if we don't have handles to unregister. - if (GpuBinaryHandles.empty()) + // No need for destructor if we don't have a handle to unregister. + if (!GpuBinaryHandle) return nullptr; // void __cudaUnregisterFatBinary(void ** handle); llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), - "__cudaUnregisterFatBinary"); + addUnderscoredPrefixToName("UnregisterFatBinary")); llvm::Function *ModuleDtorFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule); + llvm::GlobalValue::InternalLinkage, + addUnderscoredPrefixToName("_module_dtor"), &TheModule); + llvm::BasicBlock *DtorEntryBB = llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc); CGBuilderTy DtorBuilder(CGM, Context); DtorBuilder.SetInsertPoint(DtorEntryBB); - for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) { - auto HandleValue = - DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign()); + Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity( + GpuBinaryHandle->getAlignment())); + auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr); + // There is only one HIP fat binary per linked module, however there are + // multiple destructor functions. Make sure the fat binary is unregistered + // only once. + if (CGM.getLangOpts().HIP) { + llvm::BasicBlock *IfBlock = + llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc); + llvm::BasicBlock *ExitBlock = + llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc); + llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType()); + llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero); + DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock); + + DtorBuilder.SetInsertPoint(IfBlock); DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue); - } + DtorBuilder.CreateStore(Zero, GpuBinaryAddr); + DtorBuilder.CreateBr(ExitBlock); + DtorBuilder.SetInsertPoint(ExitBlock); + } else { + DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue); + } DtorBuilder.CreateRetVoid(); return ModuleDtorFunc; } diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 5ef4dc45fba1..475f17b77d92 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -109,17 +109,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { D->getType()->getAs<FunctionType>()->getCallConv()) return true; - return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base), - GlobalDecl(BaseD, Dtor_Base)); -} - -/// Try to emit a definition as a global alias for another definition. -/// If \p InEveryTU is true, we know that an equivalent alias can be produced -/// in every translation unit. -bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, - GlobalDecl TargetDecl) { - if (!getCodeGenOpts().CXXCtorDtorAliases) - return true; + GlobalDecl AliasDecl(D, Dtor_Base); + GlobalDecl TargetDecl(BaseD, Dtor_Base); // The alias will use the linkage of the referent. If we can't // support aliases with that linkage, fail. @@ -193,6 +184,9 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "", Aliasee, &getModule()); + // Destructors are always unnamed_addr. + Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + // Switch any previous uses to the alias. if (Entry) { assert(Entry->getType() == AliasType && @@ -205,7 +199,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, } // Finally, set up the alias with its proper name and attributes. - setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias); + SetCommonAttributes(AliasDecl, Alias); return false; } @@ -227,10 +221,9 @@ llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD, } setFunctionLinkage(GD, Fn); - setFunctionDLLStorageClass(GD, Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo); - setFunctionDefinitionAttributes(MD, Fn); + setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(MD, Fn); return Fn; } @@ -243,6 +236,11 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor( if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { GD = GlobalDecl(CD, toCXXCtorType(Type)); } else { + // Always alias equivalent complete destructors to base destructors in the + // MS ABI. + if (getTarget().getCXXABI().isMicrosoft() && + Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0) + Type = StructorType::Base; GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type)); } @@ -263,7 +261,6 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, const CXXRecordDecl *RD) { assert(!CGF.CGM.getTarget().getCXXABI().isMicrosoft() && "No kext in Microsoft ABI"); - GD = GD.getCanonicalDecl(); CodeGenModule &CGM = CGF.CGM; llvm::Value *VTable = CGM.getCXXABI().getAddrOfVTable(RD, CharUnits()); Ty = Ty->getPointerTo()->getPointerTo(); @@ -279,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt"); llvm::Value *VFunc = CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes); - CGCallee Callee(GD.getDecl(), VFunc); + CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc); return Callee; } diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index a27c3e9d27e3..0611749acf17 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -287,6 +287,20 @@ CGCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF, return nullptr; } +void CGCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV, + const CXXDestructorDecl *Dtor, + CXXDtorType DT) const { + // Assume the base C++ ABI has no special rules for destructor variants. + CGM.setDLLImportDLLExport(GV, Dtor); +} + +llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage( + GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { + // Delegate back to CGM by default. + return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, + /*isConstantVariable=*/false); +} + bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) { return false; } diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 83426dc3a03c..65b50e14f436 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -40,7 +40,7 @@ class CodeGenFunction; class CodeGenModule; struct CatchTypeInfo; -/// \brief Implements C++ ABI-specific code generation functions. +/// Implements C++ ABI-specific code generation functions. class CGCXXABI { protected: CodeGenModule &CGM; @@ -222,7 +222,7 @@ protected: /// is required. llvm::Constant *getMemberPointerAdjustment(const CastExpr *E); - /// \brief Computes the non-virtual adjustment needed for a member pointer + /// Computes the non-virtual adjustment needed for a member pointer /// conversion along an inheritance path stored in an APValue. Unlike /// getMemberPointerAdjustment(), the adjustment can be negative if the path /// is from a derived type to a base type. @@ -237,7 +237,7 @@ public: virtual void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) = 0; virtual llvm::GlobalVariable *getThrowInfo(QualType T) { return nullptr; } - /// \brief Determine whether it's possible to emit a vtable for \p RD, even + /// Determine whether it's possible to emit a vtable for \p RD, even /// though we do not know that the vtable has been marked as used by semantic /// analysis. virtual bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const = 0; @@ -319,6 +319,14 @@ public: virtual bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, CXXDtorType DT) const = 0; + virtual void setCXXDestructorDLLStorage(llvm::GlobalValue *GV, + const CXXDestructorDecl *Dtor, + CXXDtorType DT) const; + + virtual llvm::GlobalValue::LinkageTypes + getCXXDestructorLinkage(GVALinkage Linkage, const CXXDestructorDecl *Dtor, + CXXDtorType DT) const; + /// Emit destructor variants required by this ABI. virtual void EmitCXXDestructors(const CXXDestructorDecl *D) = 0; @@ -414,8 +422,7 @@ public: /// Build a virtual function pointer in the ABI-specific way. virtual CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF, - GlobalDecl GD, - Address This, + GlobalDecl GD, Address This, llvm::Type *Ty, SourceLocation Loc) = 0; @@ -434,6 +441,7 @@ public: /// base tables. virtual void emitVirtualInheritanceTables(const CXXRecordDecl *RD) = 0; + virtual bool exportThunk() = 0; virtual void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD, bool ReturnAdjustment) = 0; @@ -599,6 +607,17 @@ CGCXXABI *CreateItaniumCXXABI(CodeGenModule &CGM); /// Creates a Microsoft-family ABI. CGCXXABI *CreateMicrosoftCXXABI(CodeGenModule &CGM); +struct CatchRetScope final : EHScopeStack::Cleanup { + llvm::CatchPadInst *CPI; + + CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {} + + void Emit(CodeGenFunction &CGF, Flags flags) override { + llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest"); + CGF.Builder.CreateCatchRet(CPI, BB); + CGF.EmitBlock(BB); + } +}; } } diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 38d7344572d3..f066ce168588 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -29,15 +29,15 @@ #include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/Intrinsics.h" using namespace clang; using namespace CodeGen; @@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD); } +/// Set calling convention for CUDA/HIP kernel. +static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM, + const FunctionDecl *FD) { + if (FD->hasAttr<CUDAGlobalAttr>()) { + const FunctionType *FT = FTy->getAs<FunctionType>(); + CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT); + FTy = FT->getCanonicalTypeUnqualified(); + } +} + /// Arrange the argument and result information for a declaration or /// definition of the given C++ non-static member function. The /// member function must be an ordinary function, i.e. not a @@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!"); assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!"); - CanQual<FunctionProtoType> prototype = GetFormalType(MD); + CanQualType FT = GetFormalType(MD).getAs<Type>(); + setCUDAKernelCallingConvention(FT, CGM, MD); + auto prototype = FT.getAs<FunctionProtoType>(); if (MD->isInstance()) { // The abstract case is perfectly fine. @@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified(); assert(isa<FunctionType>(FTy)); + setCUDAKernelCallingConvention(FTy, CGM, FD); // When declaring a function without a prototype, always use a // non-variadic type. @@ -513,8 +526,8 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) { /// correct type, and the caller will bitcast the function to the correct /// prototype. const CGFunctionInfo & -CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) { - assert(MD->isVirtual() && "only virtual memptrs have thunks"); +CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { + assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) }; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, @@ -803,6 +816,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->NoReturn = info.getNoReturn(); FI->ReturnsRetained = info.getProducesResult(); FI->NoCallerSavedRegs = info.getNoCallerSavedRegs(); + FI->NoCfCheck = info.getNoCfCheck(); FI->Required = required; FI->HasRegParm = info.getHasRegParm(); FI->RegParm = info.getRegParm(); @@ -904,8 +918,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) { CharUnits UnionSize = CharUnits::Zero(); for (const auto *FD : RD->fields()) { - // Skip zero length bitfields. - if (FD->isBitField() && FD->getBitWidthValue(Context) == 0) + if (FD->isZeroLengthBitField(Context)) continue; assert(!FD->isBitField() && "Cannot expand structure with bit-field members."); @@ -926,8 +939,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) { } for (const auto *FD : RD->fields()) { - // Skip zero length bitfields. - if (FD->isBitField() && FD->getBitWidthValue(Context) == 0) + if (FD->isZeroLengthBitField(Context)) continue; assert(!FD->isBitField() && "Cannot expand structure with bit-field members."); @@ -1040,42 +1052,49 @@ void CodeGenFunction::ExpandTypeFromArgs( } void CodeGenFunction::ExpandTypeToArgs( - QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy, + QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy, SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) { auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) { - forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(), - [&](Address EltAddr) { - RValue EltRV = - convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()); - ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos); - }); + Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + : Arg.getKnownRValue().getAggregateAddress(); + forConstantArrayExpansion( + *this, CAExp, Addr, [&](Address EltAddr) { + CallArg EltArg = CallArg( + convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()), + CAExp->EltTy); + ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs, + IRCallArgPos); + }); } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) { - Address This = RV.getAggregateAddress(); + Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + : Arg.getKnownRValue().getAggregateAddress(); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. Address Base = GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1, /*NullCheckValue=*/false, SourceLocation()); - RValue BaseRV = RValue::getAggregate(Base); + CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType()); // Recurse onto bases. - ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs, + ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs, IRCallArgPos); } LValue LV = MakeAddrLValue(This, Ty); for (auto FD : RExp->Fields) { - RValue FldRV = EmitRValueForField(LV, FD, SourceLocation()); - ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs, + CallArg FldArg = + CallArg(EmitRValueForField(LV, FD, SourceLocation()), FD->getType()); + ExpandTypeToArgs(FD->getType(), FldArg, IRFuncTy, IRCallArgs, IRCallArgPos); } } else if (isa<ComplexExpansion>(Exp.get())) { - ComplexPairTy CV = RV.getComplexVal(); + ComplexPairTy CV = Arg.getKnownRValue().getComplexVal(); IRCallArgs[IRCallArgPos++] = CV.first; IRCallArgs[IRCallArgPos++] = CV.second; } else { assert(isa<NoExpansion>(Exp.get())); + auto RV = Arg.getKnownRValue(); assert(RV.isScalar() && "Unexpected non-scalar rvalue during struct expansion."); @@ -1479,7 +1498,8 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context, /***/ bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) { - return FI.getReturnInfo().isIndirect(); + const auto &RI = FI.getReturnInfo(); + return RI.isIndirect() || (RI.isInAlloca() && RI.getInAllocaSRet()); } bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) { @@ -1672,7 +1692,7 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, return; if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) && - FPT->isNothrow(Ctx)) + FPT->isNothrow()) FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } @@ -1714,12 +1734,19 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute("less-precise-fpmad", llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); + if (CodeGenOpts.NullPointerIsValid) + FuncAttrs.addAttribute("null-pointer-is-valid", "true"); if (!CodeGenOpts.FPDenormalMode.empty()) FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode); FuncAttrs.addAttribute("no-trapping-math", llvm::toStringRef(CodeGenOpts.NoTrappingMath)); + // Strict (compliant) code is the default, so only add this attribute to + // indicate that we are trying to workaround a problem case. + if (!CodeGenOpts.StrictFloatCastOverflow) + FuncAttrs.addAttribute("strict-float-cast-overflow", "false"); + // TODO: Are these all needed? // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. FuncAttrs.addAttribute("no-infs-fp-math", @@ -1738,6 +1765,10 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, "correctly-rounded-divide-sqrt-fp-math", llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); + if (getLangOpts().OpenCL) + FuncAttrs.addAttribute("denorms-are-zero", + llvm::toStringRef(CodeGenOpts.FlushDenorm)); + // TODO: Reciprocal estimate codegen options should apply to instructions? const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals; if (!Recips.empty()) @@ -1769,7 +1800,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // Respect -fcuda-flush-denormals-to-zero. - if (getLangOpts().CUDADeviceFlushDenormalsToZero) + if (CodeGenOpts.FlushDenorm) FuncAttrs.addAttribute("nvptx-f32ftz", "true"); } } @@ -1793,7 +1824,7 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoReturn); // If we have information about the function prototype, we can learn - // attributes form there. + // attributes from there. AddAttributesFromFunctionProtoType(getContext(), FuncAttrs, CalleeInfo.getCalleeFunctionProtoType()); @@ -1838,18 +1869,20 @@ void CodeGenModule::ConstructAttributeList( } if (TargetDecl->hasAttr<RestrictAttr>()) RetAttrs.addAttribute(llvm::Attribute::NoAlias); - if (TargetDecl->hasAttr<ReturnsNonNullAttr>()) + if (TargetDecl->hasAttr<ReturnsNonNullAttr>() && + !CodeGenOpts.NullPointerIsValid) RetAttrs.addAttribute(llvm::Attribute::NonNull); if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>()) FuncAttrs.addAttribute("no_caller_saved_registers"); + if (TargetDecl->hasAttr<AnyX86NoCfCheckAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { Optional<unsigned> NumElemsParam; - // alloc_size args are base-1, 0 means not present. - if (unsigned N = AllocSize->getNumElemsParam()) - NumElemsParam = N - 1; - FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam() - 1, + if (AllocSize->getNumElemsParam().isValid()) + NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex(); + FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(), NumElemsParam); } } @@ -1870,53 +1903,40 @@ void CodeGenModule::ConstructAttributeList( } } - if (!AttrOnCallSite) { - bool DisableTailCalls = - CodeGenOpts.DisableTailCalls || - (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() || - TargetDecl->hasAttr<AnyX86InterruptAttr>())); - FuncAttrs.addAttribute("disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); - - // Add target-cpu and target-features attributes to functions. If - // we have a decl for the function and it has a target attribute then - // parse that and add it to the feature set. - StringRef TargetCPU = getTarget().getTargetOpts().CPU; - std::vector<std::string> Features; - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl); - if (FD && FD->hasAttr<TargetAttr>()) { - llvm::StringMap<bool> FeatureMap; - getFunctionFeatureMap(FeatureMap, FD); - - // Produce the canonical string for this set of features. - for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(), - ie = FeatureMap.end(); - it != ie; ++it) - Features.push_back((it->second ? "+" : "-") + it->first().str()); - - // Now add the target-cpu and target-features to the function. - // While we populated the feature map above, we still need to - // get and parse the target attribute so we can get the cpu for - // the function. - const auto *TD = FD->getAttr<TargetAttr>(); - TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.Architecture != "" && - getTarget().isValidCPUName(ParsedAttr.Architecture)) - TargetCPU = ParsedAttr.Architecture; + if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) { + if (getLangOpts().OpenCLVersion <= 120) { + // OpenCL v1.2 Work groups are always uniform + FuncAttrs.addAttribute("uniform-work-group-size", "true"); } else { - // Otherwise just add the existing target cpu and target features to the - // function. - Features = getTarget().getTargetOpts().Features; + // OpenCL v2.0 Work groups may be whether uniform or not. + // '-cl-uniform-work-group-size' compile option gets a hint + // to the compiler that the global work-size be a multiple of + // the work-group size specified to clEnqueueNDRangeKernel + // (i.e. work groups are uniform). + FuncAttrs.addAttribute("uniform-work-group-size", + llvm::toStringRef(CodeGenOpts.UniformWGSize)); } + } - if (TargetCPU != "") - FuncAttrs.addAttribute("target-cpu", TargetCPU); - if (!Features.empty()) { - std::sort(Features.begin(), Features.end()); - FuncAttrs.addAttribute( - "target-features", - llvm::join(Features, ",")); + if (!AttrOnCallSite) { + bool DisableTailCalls = false; + + if (CodeGenOpts.DisableTailCalls) + DisableTailCalls = true; + else if (TargetDecl) { + if (TargetDecl->hasAttr<DisableTailCallsAttr>() || + TargetDecl->hasAttr<AnyX86InterruptAttr>()) + DisableTailCalls = true; + else if (CodeGenOpts.NoEscapingBlockTailCalls) { + if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl)) + if (!BD->doesNotEscape()) + DisableTailCalls = true; + } } + + FuncAttrs.addAttribute("disable-tail-calls", + llvm::toStringRef(DisableTailCalls)); + GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs); } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); @@ -1925,9 +1945,9 @@ void CodeGenModule::ConstructAttributeList( const ABIArgInfo &RetAI = FI.getReturnInfo(); switch (RetAI.getKind()) { case ABIArgInfo::Extend: - if (RetTy->hasSignedIntegerRepresentation()) + if (RetAI.isSignExt()) RetAttrs.addAttribute(llvm::Attribute::SExt); - else if (RetTy->hasUnsignedIntegerRepresentation()) + else RetAttrs.addAttribute(llvm::Attribute::ZExt); LLVM_FALLTHROUGH; case ABIArgInfo::Direct: @@ -1957,7 +1977,8 @@ void CodeGenModule::ConstructAttributeList( if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy) .getQuantity()); - else if (getContext().getTargetAddressSpace(PTy) == 0) + else if (getContext().getTargetAddressSpace(PTy) == 0 && + !CodeGenOpts.NullPointerIsValid) RetAttrs.addAttribute(llvm::Attribute::NonNull); } @@ -1967,7 +1988,8 @@ void CodeGenModule::ConstructAttributeList( // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs; - SRETAttrs.addAttribute(llvm::Attribute::StructRet); + if (!RetAI.getSuppressSRet()) + SRETAttrs.addAttribute(llvm::Attribute::StructRet); hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); @@ -2006,14 +2028,10 @@ void CodeGenModule::ConstructAttributeList( // sense to do it here because parameters are so messed up. switch (AI.getKind()) { case ABIArgInfo::Extend: - if (ParamType->isSignedIntegerOrEnumerationType()) + if (AI.isSignExt()) Attrs.addAttribute(llvm::Attribute::SExt); - else if (ParamType->isUnsignedIntegerOrEnumerationType()) { - if (getTypes().getABIInfo().shouldSignExtUnsignedType(ParamType)) - Attrs.addAttribute(llvm::Attribute::SExt); - else - Attrs.addAttribute(llvm::Attribute::ZExt); - } + else + Attrs.addAttribute(llvm::Attribute::ZExt); LLVM_FALLTHROUGH; case ABIArgInfo::Direct: if (ArgNo == 0 && FI.isChainCall()) @@ -2070,7 +2088,8 @@ void CodeGenModule::ConstructAttributeList( if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy) .getQuantity()); - else if (getContext().getTargetAddressSpace(PTy) == 0) + else if (getContext().getTargetAddressSpace(PTy) == 0 && + !CodeGenOpts.NullPointerIsValid) Attrs.addAttribute(llvm::Attribute::NonNull); } @@ -2255,11 +2274,16 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end(); i != e; ++i, ++info_it, ++ArgNo) { const VarDecl *Arg = *i; - QualType Ty = info_it->type; const ABIArgInfo &ArgI = info_it->info; bool isPromoted = isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted(); + // We are converting from ABIArgInfo type to VarDecl type directly, unless + // the parameter is promoted. In this case we convert to + // CGFunctionInfo::ArgInfo type with subsequent argument demotion. + QualType Ty = isPromoted ? info_it->type : Arg->getType(); + assert(hasScalarEvaluationKind(Ty) == + hasScalarEvaluationKind(Arg->getType())); unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); @@ -2325,7 +2349,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), - PVD->getFunctionScopeIndex())) + PVD->getFunctionScopeIndex()) && + !CGM.getCodeGenOpts().NullPointerIsValid) AI->addAttr(llvm::Attribute::NonNull); QualType OTy = PVD->getOriginalType(); @@ -2344,7 +2369,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, Attrs.addDereferenceableAttr( getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize); AI->addAttrs(Attrs); - } else if (getContext().getTargetAddressSpace(ETy) == 0) { + } else if (getContext().getTargetAddressSpace(ETy) == 0 && + !CGM.getCodeGenOpts().NullPointerIsValid) { AI->addAttr(llvm::Attribute::NonNull); } } @@ -2354,7 +2380,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // we can't use the dereferenceable attribute, but in addrspace(0) // we know that it must be nonnull. if (ArrTy->getSizeModifier() == VariableArrayType::Static && - !getContext().getTargetAddressSpace(ArrTy->getElementType())) + !getContext().getTargetAddressSpace(ArrTy->getElementType()) && + !CGM.getCodeGenOpts().NullPointerIsValid) AI->addAttr(llvm::Attribute::NonNull); } @@ -3022,7 +3049,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, Ty.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); } void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, @@ -3062,6 +3090,19 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, } else { args.add(convertTempToRValue(local, type, loc), type); } + + // Deactivate the cleanup for the callee-destructed param that was pushed. + if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk && + type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee() && + type.isDestructedType()) { + EHScopeStack::stable_iterator cleanup = + CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param)); + assert(cleanup.isValid() && + "cleanup for callee-destructed param not recorded"); + // This unreachable is a temporary marker which will be removed later. + llvm::Instruction *isActive = Builder.CreateUnreachable(); + args.addArgCleanupDeactivation(cleanup, isActive); + } } static bool isProvablyNull(llvm::Value *addr) { @@ -3143,7 +3184,6 @@ static void emitWritebacks(CodeGenFunction &CGF, static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF, const CallArgList &CallArgs) { - assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()); ArrayRef<CallArgList::CallArgCleanup> Cleanups = CallArgs.getCleanupsToDeactivate(); // Iterate in reverse to increase the likelihood of popping the cleanup. @@ -3430,13 +3470,17 @@ void CodeGenFunction::EmitCallArgs( assert(InitialArgSize + 1 == Args.size() && "The code below depends on only adding one arg per EmitCallArg"); (void)InitialArgSize; - RValue RVArg = Args.back().RV; - EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC, - ParamsToSkip + Idx); - // @llvm.objectsize should never have side-effects and shouldn't need - // destruction/cleanups, so we can safely "emit" it after its arg, - // regardless of right-to-leftness - MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg); + // Since pointer argument are never emitted as LValue, it is safe to emit + // non-null argument check for r-value only. + if (!Args.back().hasLValue()) { + RValue RVArg = Args.back().getKnownRValue(); + EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC, + ParamsToSkip + Idx); + // @llvm.objectsize should never have side-effects and shouldn't need + // destruction/cleanups, so we can safely "emit" it after its arg, + // regardless of right-to-leftness + MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg); + } } if (!LeftToRight) { @@ -3456,10 +3500,15 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup { QualType Ty; void Emit(CodeGenFunction &CGF, Flags flags) override { - const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor(); - assert(!Dtor->isTrivial()); - CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false, - /*Delegating=*/false, Addr); + QualType::DestructionKind DtorKind = Ty.isDestructedType(); + if (DtorKind == QualType::DK_cxx_destructor) { + const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor(); + assert(!Dtor->isTrivial()); + CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false, + /*Delegating=*/false, Addr); + } else { + CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty)); + } } }; @@ -3478,6 +3527,33 @@ struct DisableDebugLocationUpdates { } // end anonymous namespace +RValue CallArg::getRValue(CodeGenFunction &CGF) const { + if (!HasLV) + return RV; + LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty); + CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap, + LV.isVolatile()); + IsUsed = true; + return RValue::getAggregate(Copy.getAddress()); +} + +void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { + LValue Dst = CGF.MakeAddrLValue(Addr, Ty); + if (!HasLV && RV.isScalar()) + CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true); + else if (!HasLV && RV.isComplex()) + CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true); + else { + auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress(); + LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty); + // We assume that call args are never copied into subobjects. + CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap, + HasLV ? LV.isVolatileQualified() + : RV.isVolatileQualified()); + } + IsUsed = true; +} + void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, QualType type) { DisableDebugLocationUpdates Dis(*this, E); @@ -3501,7 +3577,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, // However, we still have to push an EH-only cleanup in case we unwind before // we make it to the call. if (HasAggregateEvalKind && - CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) { + type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { // If we're using inalloca, use the argument memory. Otherwise, use a // temporary. AggValueSlot Slot; @@ -3510,10 +3586,12 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, else Slot = CreateAggTemp(type, "agg.tmp"); - const CXXRecordDecl *RD = type->getAsCXXRecordDecl(); - bool DestroyedInCallee = - RD && RD->hasNonTrivialDestructor() && - CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default; + bool DestroyedInCallee = true, NeedsEHCleanup = true; + if (const auto *RD = type->getAsCXXRecordDecl()) + DestroyedInCallee = RD->hasNonTrivialDestructor(); + else + NeedsEHCleanup = needsEHCleanup(type.isDestructedType()); + if (DestroyedInCallee) Slot.setExternallyDestructed(); @@ -3521,7 +3599,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, RValue RV = Slot.asRValue(); args.add(RV, type); - if (DestroyedInCallee) { + if (DestroyedInCallee && NeedsEHCleanup) { // Create a no-op GEP between the placeholder and the cleanup so we can // RAUW it successfully. It also serves as a marker of the first // instruction where the cleanup is active. @@ -3538,15 +3616,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) { LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr()); assert(L.isSimple()); - if (L.getAlignment() >= getContext().getTypeAlignInChars(type)) { - args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true); - } else { - // We can't represent a misaligned lvalue in the CallArgList, so copy - // to an aligned temporary now. - Address tmp = CreateMemTemp(type); - EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile()); - args.add(RValue::getAggregate(tmp), type); - } + args.addUncopiedAggregate(L, type); return; } @@ -3608,20 +3678,21 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, // Calls which may throw must have operand bundles indicating which funclet // they are nested within. -static void -getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad, - SmallVectorImpl<llvm::OperandBundleDef> &BundleList) { +SmallVector<llvm::OperandBundleDef, 1> +CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) { + SmallVector<llvm::OperandBundleDef, 1> BundleList; // There is no need for a funclet operand bundle if we aren't inside a // funclet. if (!CurrentFuncletPad) - return; + return BundleList; // Skip intrinsics which cannot throw. auto *CalleeFn = dyn_cast<llvm::Function>(Callee->stripPointerCasts()); if (CalleeFn && CalleeFn->isIntrinsic() && CalleeFn->doesNotThrow()) - return; + return BundleList; BundleList.emplace_back("funclet", CurrentFuncletPad); + return BundleList; } /// Emits a simple call (never an invoke) to the given runtime function. @@ -3629,10 +3700,8 @@ llvm::CallInst * CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, ArrayRef<llvm::Value*> args, const llvm::Twine &name) { - SmallVector<llvm::OperandBundleDef, 1> BundleList; - getBundlesForFunclet(callee, CurrentFuncletPad, BundleList); - - llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name); + llvm::CallInst *call = + Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name); call->setCallingConv(getRuntimeCC()); return call; } @@ -3640,8 +3709,8 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, /// Emits a call or invoke to the given noreturn runtime function. void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef<llvm::Value*> args) { - SmallVector<llvm::OperandBundleDef, 1> BundleList; - getBundlesForFunclet(callee, CurrentFuncletPad, BundleList); + SmallVector<llvm::OperandBundleDef, 1> BundleList = + getBundlesForFunclet(callee); if (getInvokeDest()) { llvm::InvokeInst *invoke = @@ -3684,8 +3753,8 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, ArrayRef<llvm::Value *> Args, const Twine &Name) { llvm::BasicBlock *InvokeDest = getInvokeDest(); - SmallVector<llvm::OperandBundleDef, 1> BundleList; - getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList); + SmallVector<llvm::OperandBundleDef, 1> BundleList = + getBundlesForFunclet(Callee); llvm::Instruction *Inst; if (!InvokeDest) @@ -3705,16 +3774,6 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, return llvm::CallSite(Inst); } -/// \brief Store a non-aggregate value to an address to initialize it. For -/// initialization, a non-atomic store will be used. -static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src, - LValue Dst) { - if (Src.isScalar()) - CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true); - else - CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true); -} - void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old, llvm::Value *New) { DeferredReplacements.push_back(std::make_pair(Old, New)); @@ -3728,7 +3787,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SourceLocation Loc) { // FIXME: We no longer need the types from CallArgs; lift up and simplify. - assert(Callee.isOrdinary()); + assert(Callee.isOrdinary() || Callee.isVirtual()); // Handle struct-return functions by passing a pointer to the // location that we would like to return into. @@ -3775,17 +3834,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); - size_t UnusedReturnSize = 0; + Address SRetAlloca = Address::invalid(); + llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getValue(); } else { - SRetPtr = CreateMemTemp(RetTy); + SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca); if (HaveInsertPoint() && ReturnValue.isUnused()) { uint64_t size = CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); - if (EmitLifetimeStart(size, SRetPtr.getPointer())) - UnusedReturnSize = size; + UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer()); } } if (IRFunctionArgs.hasSRetArg()) { @@ -3807,7 +3866,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end(); I != E; ++I, ++info_it, ++ArgNo) { const ABIArgInfo &ArgInfo = info_it->info; - RValue RV = I->RV; // Insert a padding argument to ensure proper alignment. if (IRFunctionArgs.hasPaddingArg(ArgNo)) @@ -3821,13 +3879,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); assert(getTarget().getTriple().getArch() == llvm::Triple::x86); - if (RV.isAggregate()) { + if (I->isAggregate()) { // Replace the placeholder with the appropriate argument slot GEP. + Address Addr = I->hasLValue() + ? I->getKnownLValue().getAddress() + : I->getKnownRValue().getAggregateAddress(); llvm::Instruction *Placeholder = - cast<llvm::Instruction>(RV.getAggregatePointer()); + cast<llvm::Instruction>(Addr.getPointer()); CGBuilderTy::InsertPoint IP = Builder.saveIP(); Builder.SetInsertPoint(Placeholder); - Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); Builder.restoreIP(IP); deferPlaceholderReplacement(Placeholder, Addr.getPointer()); } else { @@ -3840,22 +3901,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // from {}* to (%struct.foo*)*. if (Addr.getType() != MemType) Addr = Builder.CreateBitCast(Addr, MemType); - LValue argLV = MakeAddrLValue(Addr, I->Ty); - EmitInitStoreOfNonAggregate(*this, RV, argLV); + I->copyInto(*this, Addr); } break; } case ABIArgInfo::Indirect: { assert(NumIRArgs == 1); - if (RV.isScalar() || RV.isComplex()) { + if (!I->isAggregate()) { // Make a temporary alloca to pass the argument. - Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "indirect-arg-temp", false); + Address Addr = CreateMemTempWithoutCast( + I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); IRCallArgs[FirstIRArg] = Addr.getPointer(); - LValue argLV = MakeAddrLValue(Addr, I->Ty); - EmitInitStoreOfNonAggregate(*this, RV, argLV); + I->copyInto(*this, Addr); } else { // We want to avoid creating an unnecessary temporary+copy here; // however, we need one in three cases: @@ -3863,30 +3922,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // source. (This case doesn't occur on any common architecture.) // 2. If the argument is byval, RV is not sufficiently aligned, and // we cannot force it to be sufficiently aligned. - // 3. If the argument is byval, but RV is located in an address space - // different than that of the argument (0). - Address Addr = RV.getAggregateAddress(); + // 3. If the argument is byval, but RV is not located in default + // or alloca address space. + Address Addr = I->hasLValue() + ? I->getKnownLValue().getAddress() + : I->getKnownRValue().getAggregateAddress(); + llvm::Value *V = Addr.getPointer(); CharUnits Align = ArgInfo.getIndirectAlign(); const llvm::DataLayout *TD = &CGM.getDataLayout(); - const unsigned RVAddrSpace = Addr.getType()->getAddressSpace(); - const unsigned ArgAddrSpace = - (FirstIRArg < IRFuncTy->getNumParams() - ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() - : 0); - if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) || - (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align && - llvm::getOrEnforceKnownAlignment(Addr.getPointer(), - Align.getQuantity(), *TD) - < Align.getQuantity()) || - (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { + + assert((FirstIRArg >= IRFuncTy->getNumParams() || + IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() == + TD->getAllocaAddrSpace()) && + "indirect argument must be in alloca address space"); + + bool NeedCopy = false; + + if (Addr.getAlignment() < Align && + llvm::getOrEnforceKnownAlignment(V, Align.getQuantity(), *TD) < + Align.getQuantity()) { + NeedCopy = true; + } else if (I->hasLValue()) { + auto LV = I->getKnownLValue(); + auto AS = LV.getAddressSpace(); + if ((!ArgInfo.getIndirectByVal() && + (LV.getAlignment() >= + getContext().getTypeAlignInChars(I->Ty))) || + (ArgInfo.getIndirectByVal() && + ((AS != LangAS::Default && AS != LangAS::opencl_private && + AS != CGM.getASTAllocaAddressSpace())))) { + NeedCopy = true; + } + } + if (NeedCopy) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "byval-temp", false); + Address AI = CreateMemTempWithoutCast( + I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); IRCallArgs[FirstIRArg] = AI.getPointer(); - EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); + I->copyInto(*this, AI); } else { // Skip the extra memcpy call. - IRCallArgs[FirstIRArg] = Addr.getPointer(); + auto *T = V->getType()->getPointerElementType()->getPointerTo( + CGM.getDataLayout().getAllocaAddrSpace()); + IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast( + *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, + true); } } break; @@ -3903,10 +3983,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, ArgInfo.getDirectOffset() == 0) { assert(NumIRArgs == 1); llvm::Value *V; - if (RV.isScalar()) - V = RV.getScalarVal(); + if (!I->isAggregate()) + V = I->getKnownRValue().getScalarVal(); else - V = Builder.CreateLoad(RV.getAggregateAddress()); + V = Builder.CreateLoad( + I->hasLValue() ? I->getKnownLValue().getAddress() + : I->getKnownRValue().getAggregateAddress()); // Implement swifterror by copying into a new swifterror argument. // We'll write back in the normal path out of the call. @@ -3944,12 +4026,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // FIXME: Avoid the conversion through memory if possible. Address Src = Address::invalid(); - if (RV.isScalar() || RV.isComplex()) { + if (!I->isAggregate()) { Src = CreateMemTemp(I->Ty, "coerce"); - LValue SrcLV = MakeAddrLValue(Src, I->Ty); - EmitInitStoreOfNonAggregate(*this, RV, SrcLV); + I->copyInto(*this, Src); } else { - Src = RV.getAggregateAddress(); + Src = I->hasLValue() ? I->getKnownLValue().getAddress() + : I->getKnownRValue().getAggregateAddress(); } // If the value is offset in memory, apply the offset now. @@ -4003,22 +4085,26 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::Value *tempSize = nullptr; Address addr = Address::invalid(); - if (RV.isAggregate()) { - addr = RV.getAggregateAddress(); + Address AllocaAddr = Address::invalid(); + if (I->isAggregate()) { + addr = I->hasLValue() ? I->getKnownLValue().getAddress() + : I->getKnownRValue().getAggregateAddress(); + } else { + RValue RV = I->getKnownRValue(); assert(RV.isScalar()); // complex should always just be direct llvm::Type *scalarType = RV.getScalarVal()->getType(); auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType); auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); - tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize); - // Materialize to a temporary. addr = CreateTempAlloca(RV.getScalarVal()->getType(), - CharUnits::fromQuantity(std::max(layout->getAlignment(), - scalarAlign))); - EmitLifetimeStart(scalarSize, addr.getPointer()); + CharUnits::fromQuantity(std::max( + layout->getAlignment(), scalarAlign)), + "tmp", + /*ArraySize=*/nullptr, &AllocaAddr); + tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); } @@ -4036,7 +4122,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(IRArgPos == FirstIRArg + NumIRArgs); if (tempSize) { - EmitLifetimeEnd(tempSize, addr.getPointer()); + EmitLifetimeEnd(tempSize, AllocaAddr.getPointer()); } break; @@ -4044,13 +4130,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::Expand: unsigned IRArgPos = FirstIRArg; - ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos); + ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos); assert(IRArgPos == FirstIRArg + NumIRArgs); break; } } - llvm::Value *CalleePtr = Callee.getFunctionPointer(); + const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this); + llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer(); // If we're using inalloca, set up that argument. if (ArgMemory.isValid()) { @@ -4191,10 +4278,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); } + + // If we made a temporary, be sure to clean up after ourselves. Note that we + // can't depend on being inside of an ExprWithCleanups, so we need to manually + // pop this cleanup later on. Being eager about this is OK, since this + // temporary is 'invisible' outside of the callee. + if (UnusedReturnSizePtr) + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca, + UnusedReturnSizePtr); + llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); - SmallVector<llvm::OperandBundleDef, 1> BundleList; - getBundlesForFunclet(CalleePtr, CurrentFuncletPad, BundleList); + SmallVector<llvm::OperandBundleDef, 1> BundleList = + getBundlesForFunclet(CalleePtr); // Emit the actual call/invoke instruction. llvm::CallSite CS; @@ -4244,9 +4340,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // insertion point; this allows the rest of IRGen to discard // unreachable code. if (CS.doesNotReturn()) { - if (UnusedReturnSize) - EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize), - SRetPtr.getPointer()); + if (UnusedReturnSizePtr) + PopCleanupBlock(); // Strip away the noreturn attribute to better diagnose unreachable UB. if (SanOpts.has(SanitizerKind::Unreachable)) { @@ -4315,9 +4410,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation()); - if (UnusedReturnSize) - EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize), - SRetPtr.getPointer()); + if (UnusedReturnSizePtr) + PopCleanupBlock(); return ret; } @@ -4395,7 +4489,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, OffsetValue); } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { llvm::Value *ParamVal = - CallArgs[AA->getParamIndex() - 1].RV.getScalarVal(); + CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue( + *this).getScalarVal(); EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal); } } @@ -4403,6 +4498,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, return Ret; } +CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const { + if (isVirtual()) { + const CallExpr *CE = getVirtualCallExpr(); + return CGF.CGM.getCXXABI().getVirtualFunctionPointer( + CGF, getVirtualMethodDecl(), getThisAddress(), + getFunctionType(), CE ? CE->getLocStart() : SourceLocation()); + } + + return *this; +} + /* VarArg handling */ Address CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr) { diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index 7e10407fc31c..8adbe76fa6c3 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -18,6 +18,7 @@ #include "CGValue.h" #include "EHScopeStack.h" #include "clang/AST/CanonicalType.h" +#include "clang/AST/GlobalDecl.h" #include "clang/AST/Type.h" #include "llvm/IR/Value.h" @@ -42,9 +43,9 @@ namespace CodeGen { /// Abstract information about a function or function prototype. class CGCalleeInfo { - /// \brief The function prototype of the callee. + /// The function prototype of the callee. const FunctionProtoType *CalleeProtoTy; - /// \brief The function declaration of the callee. + /// The function declaration of the callee. const Decl *CalleeDecl; public: @@ -68,8 +69,9 @@ public: Invalid, Builtin, PseudoDestructor, + Virtual, - Last = PseudoDestructor + Last = Virtual }; struct BuiltinInfoStorage { @@ -79,12 +81,19 @@ public: struct PseudoDestructorInfoStorage { const CXXPseudoDestructorExpr *Expr; }; + struct VirtualInfoStorage { + const CallExpr *CE; + GlobalDecl MD; + Address Addr; + llvm::FunctionType *FTy; + }; SpecialKind KindOrFunctionPointer; union { CGCalleeInfo AbstractInfo; BuiltinInfoStorage BuiltinInfo; PseudoDestructorInfoStorage PseudoDestructorInfo; + VirtualInfoStorage VirtualInfo; }; explicit CGCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {} @@ -127,6 +136,16 @@ public: return CGCallee(abstractInfo, functionPtr); } + static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr, + llvm::FunctionType *FTy) { + CGCallee result(SpecialKind::Virtual); + result.VirtualInfo.CE = CE; + result.VirtualInfo.MD = MD; + result.VirtualInfo.Addr = Addr; + result.VirtualInfo.FTy = FTy; + return result; + } + bool isBuiltin() const { return KindOrFunctionPointer == SpecialKind::Builtin; } @@ -150,7 +169,9 @@ public: bool isOrdinary() const { return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last); } - const CGCalleeInfo &getAbstractInfo() const { + CGCalleeInfo getAbstractInfo() const { + if (isVirtual()) + return VirtualInfo.MD.getDecl(); assert(isOrdinary()); return AbstractInfo; } @@ -158,29 +179,86 @@ public: assert(isOrdinary()); return reinterpret_cast<llvm::Value*>(uintptr_t(KindOrFunctionPointer)); } - llvm::FunctionType *getFunctionType() const { - return cast<llvm::FunctionType>( - getFunctionPointer()->getType()->getPointerElementType()); - } void setFunctionPointer(llvm::Value *functionPtr) { assert(isOrdinary()); KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr)); } + + bool isVirtual() const { + return KindOrFunctionPointer == SpecialKind::Virtual; + } + const CallExpr *getVirtualCallExpr() const { + assert(isVirtual()); + return VirtualInfo.CE; + } + GlobalDecl getVirtualMethodDecl() const { + assert(isVirtual()); + return VirtualInfo.MD; + } + Address getThisAddress() const { + assert(isVirtual()); + return VirtualInfo.Addr; + } + + llvm::FunctionType *getFunctionType() const { + if (isVirtual()) + return VirtualInfo.FTy; + return cast<llvm::FunctionType>( + getFunctionPointer()->getType()->getPointerElementType()); + } + + /// If this is a delayed callee computation of some sort, prepare + /// a concrete callee. + CGCallee prepareConcreteCallee(CodeGenFunction &CGF) const; }; struct CallArg { - RValue RV; + private: + union { + RValue RV; + LValue LV; /// The argument is semantically a load from this l-value. + }; + bool HasLV; + + /// A data-flow flag to make sure getRValue and/or copyInto are not + /// called twice for duplicated IR emission. + mutable bool IsUsed; + + public: QualType Ty; - bool NeedsCopy; - CallArg(RValue rv, QualType ty, bool needscopy) - : RV(rv), Ty(ty), NeedsCopy(needscopy) - { } + CallArg(RValue rv, QualType ty) + : RV(rv), HasLV(false), IsUsed(false), Ty(ty) {} + CallArg(LValue lv, QualType ty) + : LV(lv), HasLV(true), IsUsed(false), Ty(ty) {} + bool hasLValue() const { return HasLV; } + QualType getType() const { return Ty; } + + /// \returns an independent RValue. If the CallArg contains an LValue, + /// a temporary copy is returned. + RValue getRValue(CodeGenFunction &CGF) const; + + LValue getKnownLValue() const { + assert(HasLV && !IsUsed); + return LV; + } + RValue getKnownRValue() const { + assert(!HasLV && !IsUsed); + return RV; + } + void setRValue(RValue _RV) { + assert(!HasLV); + RV = _RV; + } + + bool isAggregate() const { return HasLV || RV.isAggregate(); } + + void copyInto(CodeGenFunction &CGF, Address A) const; }; /// CallArgList - Type for representing both the value and type of /// arguments in a call. class CallArgList : - public SmallVector<CallArg, 16> { + public SmallVector<CallArg, 8> { public: CallArgList() : StackBase(nullptr) {} @@ -204,8 +282,10 @@ public: llvm::Instruction *IsActiveIP; }; - void add(RValue rvalue, QualType type, bool needscopy = false) { - push_back(CallArg(rvalue, type, needscopy)); + void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); } + + void addUncopiedAggregate(LValue LV, QualType type) { + push_back(CallArg(LV, type)); } /// Add all the arguments from another CallArgList to this one. After doing @@ -254,7 +334,7 @@ public: llvm::Instruction *getStackBase() const { return StackBase; } void freeArgumentMemory(CodeGenFunction &CGF) const; - /// \brief Returns if we're using an inalloca struct to pass arguments in + /// Returns if we're using an inalloca struct to pass arguments in /// memory. bool isUsingInAlloca() const { return StackBase; } diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index a6915071ec17..0b9311f7771c 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -406,8 +406,8 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr, // Apply the offset. llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy); - Value = Builder.CreateGEP(Value, Builder.CreateNeg(NonVirtualOffset), - "sub.ptr"); + Value = Builder.CreateInBoundsGEP(Value, Builder.CreateNeg(NonVirtualOffset), + "sub.ptr"); // Just cast. Value = Builder.CreateBitCast(Value, DerivedPtrTy); @@ -555,10 +555,12 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, BaseClassDecl, isBaseVirtual); AggValueSlot AggSlot = - AggValueSlot::forAddr(V, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::forAddr( + V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); CGF.EmitAggExpr(BaseInit->getInit(), AggSlot); @@ -615,7 +617,14 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, llvm::Value *ThisPtr = CGF.LoadCXXThis(); QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl); - LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); + LValue LHS; + + // If a base constructor is being emitted, create an LValue that has the + // non-virtual alignment. + if (CGF.CurGD.getCtorType() == Ctor_Base) + LHS = CGF.MakeNaturalAlignPointeeAddrLValue(ThisPtr, RecordTy); + else + LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); EmitLValueForAnyFieldInitialization(CGF, MemberInit, LHS); @@ -640,7 +649,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field); // Copy the aggregate. - CGF.EmitAggregateCopy(LHS.getAddress(), Src.getAddress(), FieldType, + CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field), LHS.isVolatileQualified()); // Ensure that we destroy the objects if an exception is thrown later in // the constructor. @@ -671,10 +680,12 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, break; case TEK_Aggregate: { AggValueSlot Slot = - AggValueSlot::forLValue(LHS, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::forLValue( + LHS, + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + overlapForFieldInit(Field)); EmitAggExpr(Init, Slot); break; } @@ -905,15 +916,15 @@ namespace { } CharUnits getMemcpySize(uint64_t FirstByteOffset) const { + ASTContext &Ctx = CGF.getContext(); unsigned LastFieldSize = - LastField->isBitField() ? - LastField->getBitWidthValue(CGF.getContext()) : - CGF.getContext().getTypeSize(LastField->getType()); - uint64_t MemcpySizeBits = - LastFieldOffset + LastFieldSize - FirstByteOffset + - CGF.getContext().getCharWidth() - 1; - CharUnits MemcpySize = - CGF.getContext().toCharUnitsFromBits(MemcpySizeBits); + LastField->isBitField() + ? LastField->getBitWidthValue(Ctx) + : Ctx.toBits( + Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first); + uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize - + FirstByteOffset + Ctx.getCharWidth() - 1; + CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits); return MemcpySize; } @@ -1265,7 +1276,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) - CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis()); + CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); EmitBaseInitializer(*this, ClassDecl, *B, CtorType); } @@ -1282,7 +1293,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) - CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis()); + CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); EmitBaseInitializer(*this, ClassDecl, *B, CtorType); } @@ -1466,11 +1477,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // Initialize the vtable pointers before entering the body. if (!CanSkipVTablePointerInitialization(*this, Dtor)) { - // Insert the llvm.invariant.group.barrier intrinsic before initializing + // Insert the llvm.launder.invariant.group intrinsic before initializing // the vptrs to cancel any previous assumptions we might have made. if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0) - CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis()); + CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); InitializeVTablePointers(Dtor->getParent()); } @@ -1728,7 +1739,7 @@ namespace { }; } // end anonymous namespace -/// \brief Emit all code that comes at the end of class's +/// Emit all code that comes at the end of class's /// destructor. This is to call destructors on members and base classes /// in reverse order of their construction. /// @@ -1954,7 +1965,8 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, } EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, curAddr, E); + /*Delegating=*/false, curAddr, E, + AggValueSlot::DoesNotOverlap); } // Go to the next element. @@ -1989,7 +2001,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, - const CXXConstructExpr *E) { + const CXXConstructExpr *E, + AggValueSlot::Overlap_t Overlap) { CallArgList Args; // Push the this ptr. @@ -2002,10 +2015,10 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); const Expr *Arg = E->getArg(0); - QualType SrcTy = Arg->getType(); - Address Src = EmitLValue(Arg).getAddress(); + LValue Src = EmitLValue(Arg); QualType DestTy = getContext().getTypeDeclType(D->getParent()); - EmitAggregateCopyCtor(This, Src, DestTy, SrcTy); + LValue Dest = MakeAddrLValue(This, DestTy); + EmitAggregateCopyCtor(Dest, Src, Overlap); return; } @@ -2017,7 +2030,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(), /*ParamsToSkip*/ 0, Order); - EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); + EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args, + Overlap, E->getExprLoc()); } static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, @@ -2049,14 +2063,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, bool ForVirtualBase, bool Delegating, Address This, - CallArgList &Args) { + CallArgList &Args, + AggValueSlot::Overlap_t Overlap, + SourceLocation Loc) { const CXXRecordDecl *ClassDecl = D->getParent(); // C++11 [class.mfct.non-static]p2: // If a non-static member function of a class X is called for an object that // is not of type X, or of a type derived from X, the behavior is undefined. - // FIXME: Provide a source location here. - EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(), + EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, Loc, This.getPointer(), getContext().getRecordType(ClassDecl)); if (D->isTrivial() && D->isDefaultConstructor()) { @@ -2071,9 +2086,12 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, assert(Args.size() == 2 && "unexpected argcount for trivial ctor"); QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType(); - Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy)); + Address Src(Args[1].getRValue(*this).getScalarVal(), + getNaturalTypeAlignment(SrcTy)); + LValue SrcLVal = MakeAddrLValue(Src, SrcTy); QualType DestTy = getContext().getTypeDeclType(ClassDecl); - EmitAggregateCopyCtor(This, Src, DestTy, SrcTy); + LValue DestLVal = MakeAddrLValue(This, DestTy); + EmitAggregateCopyCtor(DestLVal, SrcLVal, Overlap); return; } @@ -2123,8 +2141,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall( const CXXConstructorDecl *D, bool ForVirtualBase, Address This, bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) { CallArgList Args; - CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()), - /*NeedsCopy=*/false); + CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext())); // Forward the parameters. if (InheritedFromVBase && @@ -2163,7 +2180,8 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall( } EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false, - This, Args); + This, Args, AggValueSlot::MayOverlap, + E->getLocation()); } void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( @@ -2188,7 +2206,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( assert(Args.size() >= Params.size() && "too few arguments for call"); for (unsigned I = 0, N = Args.size(); I != N; ++I) { if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) { - const RValue &RV = Args[I].RV; + const RValue &RV = Args[I].getRValue(*this); assert(!RV.isComplex() && "complex indirect params not supported"); ParamValue Val = RV.isScalar() ? ParamValue::forDirect(RV.getScalarVal()) @@ -2259,7 +2277,8 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(), /*ParamsToSkip*/ 1); - EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args); + EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args, + AggValueSlot::MayOverlap, E->getExprLoc()); } void @@ -2294,7 +2313,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, } EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false, - /*Delegating=*/true, This, DelegateArgs); + /*Delegating=*/true, This, DelegateArgs, + AggValueSlot::MayOverlap, Loc); } namespace { @@ -2325,7 +2345,8 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor AggValueSlot::forAddr(ThisPtr, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap); EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot); @@ -2667,7 +2688,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, SSK = llvm::SanStat_CFI_UnrelatedCast; break; case CFITCK_ICall: - llvm_unreachable("not expecting CFITCK_ICall"); + case CFITCK_NVMFCall: + case CFITCK_VMFCall: + llvm_unreachable("unexpected sanitizer kind"); } std::string TypeName = RD->getQualifiedNameAsString(); diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 22055b2cb902..cfd230997ed0 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -281,10 +281,10 @@ void EHScopeStack::popNullFixups() { BranchFixups.pop_back(); } -void CodeGenFunction::initFullExprCleanup() { +Address CodeGenFunction::createCleanupActiveFlag() { // Create a variable to decide whether the cleanup needs to be run. - Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), - "cleanup.cond"); + Address active = CreateTempAllocaWithoutCast( + Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond"); // Initialize it to false at a site that's guaranteed to be run // before each evaluation. @@ -293,10 +293,14 @@ void CodeGenFunction::initFullExprCleanup() { // Initialize it to true at the current location. Builder.CreateStore(Builder.getTrue(), active); + return active; +} + +void CodeGenFunction::initFullExprCleanupWithFlag(Address ActiveFlag) { // Set that as the active flag in the cleanup. EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin()); assert(!cleanup.hasActiveFlag() && "cleanup already has active flag?"); - cleanup.setActiveFlag(active); + cleanup.setActiveFlag(ActiveFlag); if (cleanup.isNormalCleanup()) cleanup.setTestFlagInNormalCleanup(); if (cleanup.isEHCleanup()) cleanup.setTestFlagInEHCleanup(); @@ -494,6 +498,13 @@ void CodeGenFunction::PopCleanupBlocks( &LifetimeExtendedCleanupStack[I], Header.getSize()); I += Header.getSize(); + + if (Header.isConditional()) { + Address ActiveFlag = + reinterpret_cast<Address &>(LifetimeExtendedCleanupStack[I]); + initFullExprCleanupWithFlag(ActiveFlag); + I += sizeof(ActiveFlag); + } } LifetimeExtendedCleanupStack.resize(OldLifetimeExtendedSize); } @@ -624,7 +635,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF, si->eraseFromParent(); // Destroy the load. - assert(condition->getOperand(0) == CGF.NormalCleanupDest); + assert(condition->getOperand(0) == CGF.NormalCleanupDest.getPointer()); assert(condition->use_empty()); condition->eraseFromParent(); } @@ -833,7 +844,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { if (NormalCleanupDestSlot->hasOneUse()) { NormalCleanupDestSlot->user_back()->eraseFromParent(); NormalCleanupDestSlot->eraseFromParent(); - NormalCleanupDest = nullptr; + NormalCleanupDest = Address::invalid(); } llvm::BasicBlock *BranchAfter = Scope.getBranchAfterBlock(0); @@ -971,16 +982,21 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( CurrentFuncletPad); llvm::CleanupPadInst *CPI = nullptr; - if (!EHPersonality::get(*this).usesFuncletPads()) { - EHStack.pushTerminate(); - PushedTerminate = true; - } else { + + const EHPersonality &Personality = EHPersonality::get(*this); + if (Personality.usesFuncletPads()) { llvm::Value *ParentPad = CurrentFuncletPad; if (!ParentPad) ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); CurrentFuncletPad = CPI = Builder.CreateCleanupPad(ParentPad); } + // Non-MSVC personalities need to terminate when an EH cleanup throws. + if (!Personality.isMSVCPersonality()) { + EHStack.pushTerminate(); + PushedTerminate = true; + } + // We only actually emit the cleanup code if the cleanup is either // active or was used before it was deactivated. if (EHActiveFlag.isValid() || IsActive) { @@ -1233,8 +1249,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C, EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.find(C)); assert(Scope.isActive() && "double deactivation"); - // If it's the top of the stack, just pop it. - if (C == EHStack.stable_begin()) { + // If it's the top of the stack, just pop it, but do so only if it belongs + // to the current RunCleanupsScope. + if (C == EHStack.stable_begin() && + CurrentCleanupScopeDepth.strictlyEncloses(C)) { // If it's a normal cleanup, we need to pretend that the // fallthrough is unreachable. CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP(); @@ -1250,10 +1268,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C, } Address CodeGenFunction::getNormalCleanupDestSlot() { - if (!NormalCleanupDest) + if (!NormalCleanupDest.isValid()) NormalCleanupDest = - CreateTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot"); - return Address(NormalCleanupDest, CharUnits::fromQuantity(4)); + CreateDefaultAlignTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot"); + return NormalCleanupDest; } /// Emits all the code to cause the given temporary to be cleaned up. diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h index 105c5629d50c..93be3e6c1502 100644 --- a/lib/CodeGen/CGCleanup.h +++ b/lib/CodeGen/CGCleanup.h @@ -230,7 +230,7 @@ public: }; /// A cleanup scope which generates the cleanup blocks lazily. -class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope { +class alignas(8) EHCleanupScope : public EHScope { /// The nearest normal cleanup scope enclosing this one. EHScopeStack::stable_iterator EnclosingNormal; @@ -627,16 +627,21 @@ struct EHPersonality { static const EHPersonality MSVC_except_handler; static const EHPersonality MSVC_C_specific_handler; static const EHPersonality MSVC_CxxFrameHandler3; + static const EHPersonality GNU_Wasm_CPlusPlus; /// Does this personality use landingpads or the family of pad instructions /// designed to form funclets? - bool usesFuncletPads() const { return isMSVCPersonality(); } + bool usesFuncletPads() const { + return isMSVCPersonality() || isWasmPersonality(); + } bool isMSVCPersonality() const { return this == &MSVC_except_handler || this == &MSVC_C_specific_handler || this == &MSVC_CxxFrameHandler3; } + bool isWasmPersonality() const { return this == &GNU_Wasm_CPlusPlus; } + bool isMSVCXXPersonality() const { return this == &MSVC_CxxFrameHandler3; } }; } diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 5842e7b3ff93..4f525c8aac85 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -44,6 +44,15 @@ struct clang::CodeGen::CGCoroData { // A branch to this block is emitted when coroutine needs to suspend. llvm::BasicBlock *SuspendBB = nullptr; + // The promise type's 'unhandled_exception' handler, if it defines one. + Stmt *ExceptionHandler = nullptr; + + // A temporary i1 alloca that stores whether 'await_resume' threw an + // exception. If it did, 'true' is stored in this variable, and the coroutine + // body must be skipped. If the promise type does not define an exception + // handler, this is null. + llvm::Value *ResumeEHVar = nullptr; + // Stores the jump destination just before the coroutine memory is freed. // This is the destination that every suspend point jumps to for the cleanup // branch. @@ -121,6 +130,16 @@ static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) { return Prefix; } +static bool memberCallExpressionCanThrow(const Expr *E) { + if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E)) + if (const auto *Proto = + CE->getMethodDecl()->getType()->getAs<FunctionProtoType>()) + if (isNoexceptExceptionSpec(Proto->getExceptionSpecType()) && + Proto->canThrow() == CT_Cannot) + return false; + return true; +} + // Emit suspend expression which roughly looks like: // // auto && x = CommonExpr(); @@ -208,11 +227,36 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co // Emit await_resume expression. CGF.EmitBlock(ReadyBlock); + + // Exception handling requires additional IR. If the 'await_resume' function + // is marked as 'noexcept', we avoid generating this additional IR. + CXXTryStmt *TryStmt = nullptr; + if (Coro.ExceptionHandler && Kind == AwaitKind::Init && + memberCallExpressionCanThrow(S.getResumeExpr())) { + Coro.ResumeEHVar = + CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh")); + Builder.CreateFlagStore(true, Coro.ResumeEHVar); + + auto Loc = S.getResumeExpr()->getExprLoc(); + auto *Catch = new (CGF.getContext()) + CXXCatchStmt(Loc, /*exDecl=*/nullptr, Coro.ExceptionHandler); + auto *TryBody = + CompoundStmt::Create(CGF.getContext(), S.getResumeExpr(), Loc, Loc); + TryStmt = CXXTryStmt::Create(CGF.getContext(), Loc, TryBody, Catch); + CGF.EnterCXXTryStmt(*TryStmt); + } + LValueOrRValue Res; if (forLValue) Res.LV = CGF.EmitLValue(S.getResumeExpr()); else Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult); + + if (TryStmt) { + Builder.CreateFlagStore(false, Coro.ResumeEHVar); + CGF.ExitCXXTryStmt(*TryStmt); + } + return Res; } @@ -315,7 +359,7 @@ namespace { GetParamRef Visitor; Visitor.Visit(const_cast<Expr*>(InitExpr)); assert(Visitor.Expr); - auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr); + DeclRefExpr *DREOrig = Visitor.Expr; auto *PD = DREOrig->getDecl(); auto it = LocalDeclMap.find(PD); @@ -588,19 +632,40 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { EHStack.pushCleanup<CallCoroEnd>(EHCleanup); CurCoro.Data->CurrentAwaitKind = AwaitKind::Init; + CurCoro.Data->ExceptionHandler = S.getExceptionHandler(); EmitStmt(S.getInitSuspendStmt()); CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal; - if (auto *OnException = S.getExceptionHandler()) { + if (CurCoro.Data->ExceptionHandler) { + // If we generated IR to record whether an exception was thrown from + // 'await_resume', then use that IR to determine whether the coroutine + // body should be skipped. + // If we didn't generate the IR (perhaps because 'await_resume' was marked + // as 'noexcept'), then we skip this check. + BasicBlock *ContBB = nullptr; + if (CurCoro.Data->ResumeEHVar) { + BasicBlock *BodyBB = createBasicBlock("coro.resumed.body"); + ContBB = createBasicBlock("coro.resumed.cont"); + Value *SkipBody = Builder.CreateFlagLoad(CurCoro.Data->ResumeEHVar, + "coro.resumed.eh"); + Builder.CreateCondBr(SkipBody, ContBB, BodyBB); + EmitBlock(BodyBB); + } + auto Loc = S.getLocStart(); - CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException); - auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch); + CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, + CurCoro.Data->ExceptionHandler); + auto *TryStmt = + CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch); EnterCXXTryStmt(*TryStmt); emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock()); ExitCXXTryStmt(*TryStmt); + + if (ContBB) + EmitBlock(ContBB); } else { emitBodyAndFallthrough(*this, S, S.getBody()); diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index aeed4d658a4e..097a1e043047 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -289,8 +289,7 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { << OC->getIdentifier()->getNameStart() << ')'; } } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { - OS << OCD->getClassInterface()->getName() << '(' - << OCD->getName() << ')'; + OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')'; } else if (isa<ObjCProtocolDecl>(DC)) { // We can extract the type of the class from the self pointer. if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) { @@ -361,18 +360,19 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { return StringRef(); } -llvm::DIFile::ChecksumKind +Optional<llvm::DIFile::ChecksumKind> CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { Checksum.clear(); - if (!CGM.getCodeGenOpts().EmitCodeView) - return llvm::DIFile::CSK_None; + if (!CGM.getCodeGenOpts().EmitCodeView && + CGM.getCodeGenOpts().DwarfVersion < 5) + return None; SourceManager &SM = CGM.getContext().getSourceManager(); bool Invalid; llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); if (Invalid) - return llvm::DIFile::CSK_None; + return None; llvm::MD5 Hash; llvm::MD5::MD5Result Result; @@ -384,51 +384,62 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { return llvm::DIFile::CSK_MD5; } +Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM, + FileID FID) { + if (!CGM.getCodeGenOpts().EmbedSource) + return None; + + bool SourceInvalid = false; + StringRef Source = SM.getBufferData(FID, &SourceInvalid); + + if (SourceInvalid) + return None; + + return Source; +} + llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (!Loc.isValid()) // If Location is not valid then use main input file. - return DBuilder.createFile(remapDIPath(TheCU->getFilename()), - remapDIPath(TheCU->getDirectory()), - TheCU->getFile()->getChecksumKind(), - TheCU->getFile()->getChecksum()); + return getOrCreateMainFile(); SourceManager &SM = CGM.getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty()) // If the location is not valid then use main input file. - return DBuilder.createFile(remapDIPath(TheCU->getFilename()), - remapDIPath(TheCU->getDirectory()), - TheCU->getFile()->getChecksumKind(), - TheCU->getFile()->getChecksum()); + return getOrCreateMainFile(); // Cache the results. const char *fname = PLoc.getFilename(); - auto it = DIFileCache.find(fname); + auto It = DIFileCache.find(fname); - if (it != DIFileCache.end()) { + if (It != DIFileCache.end()) { // Verify that the information still exists. - if (llvm::Metadata *V = it->second) + if (llvm::Metadata *V = It->second) return cast<llvm::DIFile>(V); } SmallString<32> Checksum; - llvm::DIFile::ChecksumKind CSKind = + Optional<llvm::DIFile::ChecksumKind> CSKind = computeChecksum(SM.getFileID(Loc), Checksum); + Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; + if (CSKind) + CSInfo.emplace(*CSKind, Checksum); - llvm::DIFile *F = DBuilder.createFile(remapDIPath(PLoc.getFilename()), - remapDIPath(getCurrentDirname()), - CSKind, Checksum); + llvm::DIFile *F = DBuilder.createFile( + remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo, + getSource(SM, SM.getFileID(Loc))); DIFileCache[fname].reset(F); return F; } llvm::DIFile *CGDebugInfo::getOrCreateMainFile() { - return DBuilder.createFile(remapDIPath(TheCU->getFilename()), - remapDIPath(TheCU->getDirectory()), - TheCU->getFile()->getChecksumKind(), - TheCU->getFile()->getChecksum()); + return DBuilder.createFile( + remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()), + TheCU->getFile()->getChecksum(), + CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None); } std::string CGDebugInfo::remapDIPath(StringRef Path) const { @@ -472,7 +483,8 @@ StringRef CGDebugInfo::getCurrentDirname() { void CGDebugInfo::CreateCompileUnit() { SmallString<32> Checksum; - llvm::DIFile::ChecksumKind CSKind = llvm::DIFile::CSK_None; + Optional<llvm::DIFile::ChecksumKind> CSKind; + Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; // Should we be asking the SourceManager for the main file name, instead of // accepting it as an argument? This just causes the main file name to @@ -551,14 +563,19 @@ void CGDebugInfo::CreateCompileUnit() { break; } + if (CSKind) + CSInfo.emplace(*CSKind, Checksum); + // Create new compile unit. // FIXME - Eliminate TheCU. auto &CGOpts = CGM.getCodeGenOpts(); TheCU = DBuilder.createCompileUnit( LangTag, DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname()), CSKind, Checksum), - Producer, LO.Optimize || CGOpts.PrepareForLTO || CGOpts.EmitSummaryIndex, + remapDIPath(getCurrentDirname()), CSInfo, + getSource(SM, SM.getMainFileID())), + CGOpts.EmitVersionIdentMetadata ? Producer : "", + LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO, CGOpts.DwarfDebugFlags, RuntimeVers, CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind, 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, @@ -620,14 +637,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return SelTy; } -#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ - case BuiltinType::Id: \ - return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \ +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: \ + return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \ SingletonId); #include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: - return getOrCreateStructPtrType("opencl_sampler_t", - OCLSamplerDITy); + return getOrCreateStructPtrType("opencl_sampler_t", OCLSamplerDITy); case BuiltinType::OCLEvent: return getOrCreateStructPtrType("opencl_event_t", OCLEventDITy); case BuiltinType::OCLClkEvent: @@ -645,6 +661,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::SChar: Encoding = llvm::dwarf::DW_ATE_signed_char; break; + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: Encoding = llvm::dwarf::DW_ATE_UTF; @@ -681,6 +698,34 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { // floating point types of the same size. Encoding = llvm::dwarf::DW_ATE_float; break; + case BuiltinType::ShortAccum: + case BuiltinType::Accum: + case BuiltinType::LongAccum: + case BuiltinType::ShortFract: + case BuiltinType::Fract: + case BuiltinType::LongFract: + case BuiltinType::SatShortFract: + case BuiltinType::SatFract: + case BuiltinType::SatLongFract: + case BuiltinType::SatShortAccum: + case BuiltinType::SatAccum: + case BuiltinType::SatLongAccum: + Encoding = llvm::dwarf::DW_ATE_signed_fixed; + break; + case BuiltinType::UShortAccum: + case BuiltinType::UAccum: + case BuiltinType::ULongAccum: + case BuiltinType::UShortFract: + case BuiltinType::UFract: + case BuiltinType::ULongFract: + case BuiltinType::SatUShortAccum: + case BuiltinType::SatUAccum: + case BuiltinType::SatULongAccum: + case BuiltinType::SatUShortFract: + case BuiltinType::SatUFract: + case BuiltinType::SatULongFract: + Encoding = llvm::dwarf::DW_ATE_unsigned_fixed; + break; } switch (BT->getKind()) { @@ -780,27 +825,49 @@ static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) { } } -/// In C++ mode, types have linkage, so we can rely on the ODR and -/// on their mangled names, if they're external. -static SmallString<256> getUniqueTagTypeName(const TagType *Ty, - CodeGenModule &CGM, - llvm::DICompileUnit *TheCU) { - SmallString<256> FullName; +// Determines if the tag declaration will require a type identifier. +static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM, + llvm::DICompileUnit *TheCU) { + // We only add a type identifier for types with C++ name mangling. + if (!hasCXXMangling(TD, TheCU)) + return false; + + // CodeView types with C++ mangling need a type identifier. + if (CGM.getCodeGenOpts().EmitCodeView) + return true; + + // Externally visible types with C++ mangling need a type identifier. + if (TD->isExternallyVisible()) + return true; + + return false; +} + +// When emitting CodeView debug information we need to produce a type +// identifier for all types which have a C++ mangling. Until a GUID is added +// to the identifier (not currently implemented) the result will not be unique +// across compilation units. +// When emitting DWARF debug information, we need to produce a type identifier +// for all externally visible types with C++ name mangling. This identifier +// should be unique across ODR-compliant compilation units. +static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM, + llvm::DICompileUnit *TheCU) { + SmallString<256> Identifier; const TagDecl *TD = Ty->getDecl(); - if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible()) - return FullName; + if (!needsTypeIdentifier(TD, CGM, TheCU)) + return Identifier; // TODO: This is using the RTTI name. Is there a better way to get // a unique string for a type? - llvm::raw_svector_ostream Out(FullName); + llvm::raw_svector_ostream Out(Identifier); CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(QualType(Ty, 0), Out); - return FullName; + return Identifier; } -/// \return the approproate DWARF tag for a composite type. +/// \return the appropriate DWARF tag for a composite type. static llvm::dwarf::Tag getTagForRecord(const RecordDecl *RD) { - llvm::dwarf::Tag Tag; + llvm::dwarf::Tag Tag; if (RD->isStruct() || RD->isInterface()) Tag = llvm::dwarf::DW_TAG_structure_type; else if (RD->isUnion()) @@ -828,10 +895,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, uint32_t Align = 0; // Create the type. - SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU); + SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType( getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align, - llvm::DINode::FlagFwdDecl, FullName); + llvm::DINode::FlagFwdDecl, Identifier); if (CGM.getCodeGenOpts().DebugFwdTemplateParams) if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) DBuilder.replaceArrays(RetTy, llvm::DINodeArray(), @@ -926,9 +993,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty, // DW_AT_APPLE_BLOCK attribute and are an implementation detail only // the debugger needs to know about. To allow type uniquing, emit // them without a name or a location. - EltTy = - DBuilder.createStructType(Unit, "", nullptr, LineNo, - FieldOffset, 0, Flags, nullptr, Elements); + EltTy = DBuilder.createStructType(Unit, "", nullptr, LineNo, FieldOffset, 0, + Flags, nullptr, Elements); return DBuilder.createPointerType(EltTy, Size); } @@ -943,8 +1009,9 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false); printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy()); - auto *AliasDecl = cast<TypeAliasTemplateDecl>( - Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl(); + auto *AliasDecl = + cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl()) + ->getTemplatedDecl(); SourceLocation Loc = AliasDecl->getLocation(); return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc), @@ -981,20 +1048,28 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_vectorcall; case CC_X86Pascal: return llvm::dwarf::DW_CC_BORLAND_pascal; - - // FIXME: Create new DW_CC_ codes for these calling conventions. case CC_Win64: + return llvm::dwarf::DW_CC_LLVM_Win64; case CC_X86_64SysV: + return llvm::dwarf::DW_CC_LLVM_X86_64SysV; case CC_AAPCS: + return llvm::dwarf::DW_CC_LLVM_AAPCS; case CC_AAPCS_VFP: + return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP; case CC_IntelOclBicc: + return llvm::dwarf::DW_CC_LLVM_IntelOclBicc; case CC_SpirFunction: + return llvm::dwarf::DW_CC_LLVM_SpirFunction; case CC_OpenCLKernel: + return llvm::dwarf::DW_CC_LLVM_OpenCLKernel; case CC_Swift: + return llvm::dwarf::DW_CC_LLVM_Swift; case CC_PreserveMost: + return llvm::dwarf::DW_CC_LLVM_PreserveMost; case CC_PreserveAll: + return llvm::dwarf::DW_CC_LLVM_PreserveAll; case CC_X86RegCall: - return 0; + return llvm::dwarf::DW_CC_LLVM_X86RegCall; } return 0; } @@ -1102,8 +1177,8 @@ CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, } llvm::DINode::DIFlags flags = getAccessFlag(AS, RD); - return DBuilder.createMemberType(scope, name, file, line, SizeInBits, - Align, offsetInBits, flags, debugType); + return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align, + offsetInBits, flags, debugType); } void CGDebugInfo::CollectRecordLambdaFields( @@ -1223,10 +1298,6 @@ void CGDebugInfo::CollectRecordFields( else { const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record); - // Debug info for nested types is included in the member list only for - // CodeView. - bool IncludeNestedTypes = CGM.getCodeGenOpts().EmitCodeView; - // Field number for non-static fields. unsigned fieldNo = 0; @@ -1236,6 +1307,13 @@ void CGDebugInfo::CollectRecordFields( if (const auto *V = dyn_cast<VarDecl>(I)) { if (V->hasAttr<NoDebugAttr>()) continue; + + // Skip variable template specializations when emitting CodeView. MSVC + // doesn't emit them. + if (CGM.getCodeGenOpts().EmitCodeView && + isa<VarTemplateSpecializationDecl>(V)) + continue; + // Reuse the existing static member declaration if one exists auto MI = StaticDataMemberCache.find(V->getCanonicalDecl()); if (MI != StaticDataMemberCache.end()) { @@ -1252,7 +1330,9 @@ void CGDebugInfo::CollectRecordFields( // Bump field number for next field. ++fieldNo; - } else if (IncludeNestedTypes) { + } else if (CGM.getCodeGenOpts().EmitCodeView) { + // Debug info for nested types is included in the member list only for + // CodeView. if (const auto *nestedType = dyn_cast<TypeDecl>(I)) if (!nestedType->isImplicit() && nestedType->getDeclContext() == record) @@ -1386,7 +1466,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // deleting dtor. const auto *DD = dyn_cast<CXXDestructorDecl>(Method); GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method); - MicrosoftVTableContext::MethodVFTableLocation ML = + MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); VIndex = ML.Index; @@ -1507,6 +1587,7 @@ void CGDebugInfo::CollectCXXBasesAux( auto *BaseTy = getOrCreateType(BI.getType(), Unit); llvm::DINode::DIFlags BFlags = StartingFlags; uint64_t BaseOffset; + uint32_t VBPtrOffset = 0; if (BI.isVirtual()) { if (CGM.getTarget().getCXXABI().isItaniumFamily()) { @@ -1520,6 +1601,10 @@ void CGDebugInfo::CollectCXXBasesAux( // vbase offset offset in Itanium. BaseOffset = 4 * CGM.getMicrosoftVTableContext().getVBTableIndex(RD, Base); + VBPtrOffset = CGM.getContext() + .getASTRecordLayout(RD) + .getVBPtrOffset() + .getQuantity(); } BFlags |= llvm::DINode::FlagVirtual; } else @@ -1528,8 +1613,8 @@ void CGDebugInfo::CollectCXXBasesAux( // BI->isVirtual() and bits when not. BFlags |= getAccessFlag(BI.getAccessSpecifier(), RD); - llvm::DIType *DTy = - DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset, BFlags); + llvm::DIType *DTy = DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset, + VBPtrOffset, BFlags); EltTys.push_back(DTy); } } @@ -1603,8 +1688,8 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, V = CGM.getCXXABI().EmitNullMemberPointer(MPT); if (!V) V = llvm::ConstantInt::get(CGM.Int8Ty, 0); - TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, V)); + TemplateParams.push_back( + DBuilder.createTemplateValueParameter(TheCU, Name, TTy, V)); } break; case TemplateArgument::Template: TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( @@ -1676,9 +1761,8 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { Optional<unsigned> DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); - llvm::DIType *vtbl_ptr_type = - DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace, - "__vtbl_ptr_type"); + llvm::DIType *vtbl_ptr_type = DBuilder.createPointerType( + SubTy, Size, 0, DWARFAddressSpace, "__vtbl_ptr_type"); VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size); return VTablePtrType; } @@ -1722,9 +1806,8 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); // Create a very wide void* type and insert it directly in the element list. - llvm::DIType *VTableType = - DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace, - "__vtbl_ptr_type"); + llvm::DIType *VTableType = DBuilder.createPointerType( + nullptr, VTableWidth, 0, DWARFAddressSpace, "__vtbl_ptr_type"); EltTys.push_back(VTableType); // The vptr is a pointer to this special vtable type. @@ -1739,9 +1822,9 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, VPtrTy = getOrCreateVTablePtrType(Unit); unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy); - llvm::DIType *VPtrMember = DBuilder.createMemberType( - Unit, getVTableName(RD), Unit, 0, Size, 0, 0, - llvm::DINode::FlagArtificial, VPtrTy); + llvm::DIType *VPtrMember = + DBuilder.createMemberType(Unit, getVTableName(RD), Unit, 0, Size, 0, 0, + llvm::DINode::FlagArtificial, VPtrTy); EltTys.push_back(VPtrMember); } @@ -2079,7 +2162,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, llvm::raw_svector_ostream OS(ConfigMacros); const auto &PPOpts = CGM.getPreprocessorOpts(); unsigned I = 0; - // Translate the macro definitions back into a commmand line. + // Translate the macro definitions back into a command line. for (auto &M : PPOpts.Macros) { if (++I > 1) OS << " "; @@ -2088,9 +2171,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, OS << "\"-" << (Undef ? 'U' : 'D'); for (char c : Macro) switch (c) { - case '\\' : OS << "\\\\"; break; - case '"' : OS << "\\\""; break; - default: OS << c; + case '\\': + OS << "\\\\"; + break; + case '"': + OS << "\\\""; + break; + default: + OS << c; } OS << '\"'; } @@ -2107,6 +2195,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, : ~1ULL; llvm::DIBuilder DIB(CGM.getModule()); DIB.createCompileUnit(TheCU->getSourceLanguage(), + // TODO: Support "Source" from external AST providers? DIB.createFile(Mod.getModuleName(), Mod.getPath()), TheCU->getProducer(), true, StringRef(), 0, Mod.getASTFile(), llvm::DICompileUnit::FullDebug, @@ -2162,7 +2251,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, if (!SClassTy) return nullptr; - llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, + llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0, llvm::DINode::FlagZero); EltTys.push_back(InhTag); } @@ -2184,7 +2273,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, EltTys.push_back(PropertyNode); }; { - llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet; + llvm::SmallPtrSet<const IdentifierInfo *, 16> PropertySet; for (const ObjCCategoryDecl *ClassExt : ID->known_extensions()) for (auto *PD : ClassExt->properties()) { PropertySet.insert(PD->getIdentifier()); @@ -2265,10 +2354,12 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, ObjCMethodDecl *Setter = PD->getSetterMethodDecl(); PropertyNode = DBuilder.createObjCProperty( PD->getName(), PUnit, PLine, - hasDefaultGetterName(PD, Getter) ? "" : getSelectorName( - PD->getGetterName()), - hasDefaultSetterName(PD, Setter) ? "" : getSelectorName( - PD->getSetterName()), + hasDefaultGetterName(PD, Getter) + ? "" + : getSelectorName(PD->getGetterName()), + hasDefaultSetterName(PD, Setter) + ? "" + : getSelectorName(PD->getSetterName()), PD->getPropertyAttributes(), getOrCreateType(PD->getType(), PUnit)); } @@ -2291,12 +2382,14 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty, llvm::DIFile *Unit) { llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit); int64_t Count = Ty->getNumElements(); - if (Count == 0) - // If number of elements are not known then this is an unbounded array. - // Use Count == -1 to express such arrays. - Count = -1; - llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(0, Count); + llvm::Metadata *Subscript; + QualType QTy(Ty, 0); + auto SizeExpr = SizeExprCache.find(QTy); + if (SizeExpr != SizeExprCache.end()) + Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond()); + else + Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1); llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript); uint64_t Size = CGM.getContext().getTypeSize(Ty); @@ -2353,8 +2446,12 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { } } - // FIXME: Verify this is right for VLAs. - Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count)); + auto SizeNode = SizeExprCache.find(EltTy); + if (SizeNode != SizeExprCache.end()) + Subscripts.push_back( + DBuilder.getOrCreateSubrange(0, SizeNode->getSecond())); + else + Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count)); EltTy = Ty->getElementType(); } @@ -2422,8 +2519,7 @@ llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) { return DBuilder.createQualifiedType(llvm::dwarf::DW_TAG_atomic_type, FromTy); } -llvm::DIType* CGDebugInfo::CreateType(const PipeType *Ty, - llvm::DIFile *U) { +llvm::DIType *CGDebugInfo::CreateType(const PipeType *Ty, llvm::DIFile *U) { return getOrCreateType(Ty->getElementType(), U); } @@ -2437,7 +2533,7 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) { Align = getDeclAlignIfRequired(ED, CGM.getContext()); } - SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU); + SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); bool isImportedFromModule = DebugTypeExtRefs && ED->isFromASTFile() && ED->getDefinition(); @@ -2460,7 +2556,7 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) { StringRef EDName = ED->getName(); llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType( llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line, - 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName); + 0, Size, Align, llvm::DINode::FlagFwdDecl, Identifier); ReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(Ty), @@ -2480,14 +2576,17 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { Align = getDeclAlignIfRequired(ED, CGM.getContext()); } - SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU); + SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); // Create elements for each enumerator. SmallVector<llvm::Metadata *, 16> Enumerators; ED = ED->getDefinition(); + bool IsSigned = ED->getIntegerType()->isSignedIntegerType(); for (const auto *Enum : ED->enumerators()) { - Enumerators.push_back(DBuilder.createEnumerator( - Enum->getName(), Enum->getInitVal().getSExtValue())); + const auto &InitVal = Enum->getInitVal(); + auto Value = IsSigned ? InitVal.getSExtValue() : InitVal.getZExtValue(); + Enumerators.push_back( + DBuilder.createEnumerator(Enum->getName(), Value, !IsSigned)); } // Return a CompositeType for the enum itself. @@ -2496,11 +2595,10 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation()); unsigned Line = getLineNumber(ED->getLocation()); llvm::DIScope *EnumContext = getDeclContextDescriptor(ED); - llvm::DIType *ClassTy = - ED->isFixed() ? getOrCreateType(ED->getIntegerType(), DefUnit) : nullptr; + llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit); return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit, Line, Size, Align, EltArray, ClassTy, - FullName); + Identifier, ED->isFixed()); } llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent, @@ -2585,10 +2683,10 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) { // Unwrap the type as needed for debug information. Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext()); - auto it = TypeCache.find(Ty.getAsOpaquePtr()); - if (it != TypeCache.end()) { + auto It = TypeCache.find(Ty.getAsOpaquePtr()); + if (It != TypeCache.end()) { // Verify that the debug info still exists. - if (llvm::Metadata *V = it->second) + if (llvm::Metadata *V = It->second) return cast<llvm::DIType>(V); } @@ -2623,7 +2721,7 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { return T; llvm::DIType *Res = CreateTypeNode(Ty, Unit); - void* TyPtr = Ty.getAsOpaquePtr(); + void *TyPtr = Ty.getAsOpaquePtr(); // And update the type cache. TypeCache[TyPtr].reset(Res); @@ -2801,11 +2899,24 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { uint64_t Size = CGM.getContext().getTypeSize(Ty); auto Align = getDeclAlignIfRequired(D, CGM.getContext()); - SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU); + SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); + + // Explicitly record the calling convention for C++ records. + auto Flags = llvm::DINode::FlagZero; + if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect) + Flags |= llvm::DINode::FlagTypePassByReference; + else + Flags |= llvm::DINode::FlagTypePassByValue; + + // Record if a C++ record is trivial type. + if (CXXRD->isTrivial()) + Flags |= llvm::DINode::FlagTrivial; + } llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, - llvm::DINode::FlagZero, FullName); + Flags, Identifier); // Elements of composite types usually have back to the type, creating // uniquing cycles. Distinct nodes are more efficient. @@ -2819,14 +2930,14 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { // so they don't tend to be involved in uniquing cycles and there is some // chance of merging them when linking together two modules. Only make // them distinct if they are ODR-uniqued. - if (FullName.empty()) + if (Identifier.empty()) break; LLVM_FALLTHROUGH; case llvm::dwarf::DW_TAG_structure_type: case llvm::dwarf::DW_TAG_union_type: case llvm::dwarf::DW_TAG_class_type: - // Immediatley resolve to a distinct node. + // Immediately resolve to a distinct node. RealDecl = llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl)); break; @@ -2901,10 +3012,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, if (DebugKind >= codegenoptions::LimitedDebugInfo) { if (const NamespaceDecl *NSDecl = - dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) + dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) FDContext = getOrCreateNamespace(NSDecl); else if (const RecordDecl *RDecl = - dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) { + dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) { llvm::DIScope *Mod = getParentModuleOrNull(RDecl); FDContext = getContextDescriptor(RDecl, Mod ? Mod : TheCU); } @@ -2931,8 +3042,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, llvm::APInt ConstVal(32, 1); QualType ET = CGM.getContext().getAsArrayType(T)->getElementType(); - T = CGM.getContext().getConstantArrayType(ET, ConstVal, - ArrayType::Normal, 0); + T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal, + 0); } Name = VD->getName(); @@ -2959,8 +3070,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, if (DC->isRecord()) DC = CGM.getContext().getTranslationUnitDecl(); - llvm::DIScope *Mod = getParentModuleOrNull(VD); - VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU); + llvm::DIScope *Mod = getParentModuleOrNull(VD); + VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU); } llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, @@ -2972,8 +3083,8 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; unsigned Line = getLineNumber(Loc); - collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, - TParamsArray, Flags); + collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray, + Flags); auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); // Build function type. @@ -2999,20 +3110,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, !FD->isExternallyVisible(), /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(FD)); - const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); + const FunctionDecl *CanonDecl = FD->getCanonicalDecl(); FwdDeclReplaceMap.emplace_back(std::piecewise_construct, std::make_tuple(CanonDecl), std::make_tuple(SP)); return SP; } -llvm::DISubprogram * -CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) { +llvm::DISubprogram *CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) { return getFunctionFwdDeclOrStub(GD, /* Stub = */ false); } -llvm::DISubprogram * -CGDebugInfo::getFunctionStub(GlobalDecl GD) { +llvm::DISubprogram *CGDebugInfo::getFunctionStub(GlobalDecl GD) { return getFunctionFwdDeclOrStub(GD, /* Stub = */ true); } @@ -3136,7 +3245,8 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, if (FPT->getNumParams() > 1) SelfDeclTy = FPT->getParamType(0); if (!SelfDeclTy.isNull()) - Elts.push_back(CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F))); + Elts.push_back( + CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F))); // "_cmd" pointer is always second argument. Elts.push_back(DBuilder.createArtificialType( getOrCreateType(CGM.getContext().getObjCSelType(), F))); @@ -3172,7 +3282,8 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, SourceLocation ScopeLoc, QualType FnType, - llvm::Function *Fn, CGBuilderTy &Builder) { + llvm::Function *Fn, bool CurFuncIsThunk, + CGBuilderTy &Builder) { StringRef Name; StringRef LinkageName; @@ -3213,11 +3324,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (Name.startswith("\01")) Name = Name.substr(1); - if (!HasDecl || D->isImplicit()) { + if (!HasDecl || D->isImplicit() || D->hasAttr<ArtificialAttr>()) { Flags |= llvm::DINode::FlagArtificial; // Artificial functions should not silently reuse CurLoc. CurLoc = SourceLocation(); } + + if (CurFuncIsThunk) + Flags |= llvm::DINode::FlagThunk; + unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = getLineNumber(ScopeLoc); @@ -3238,6 +3353,27 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (HasDecl && isa<FunctionDecl>(D)) DeclCache[D->getCanonicalDecl()].reset(SP); + if (CGM.getCodeGenOpts().DwarfVersion >= 5) { + // Starting with DWARF V5 method declarations are emitted as children of + // the interface type. + if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) { + const ObjCInterfaceDecl *ID = OMD->getClassInterface(); + QualType QTy(ID->getTypeForDecl(), 0); + auto It = TypeCache.find(QTy.getAsOpaquePtr()); + if (It != TypeCache.end()) { + llvm::DICompositeType *InterfaceDecl = + cast<llvm::DICompositeType>(It->second); + llvm::DISubprogram *FD = DBuilder.createFunction( + InterfaceDecl, Name, LinkageName, Unit, LineNo, + getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), + false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get()); + DBuilder.finalizeSubprogram(FD); + ObjCMethodCache[ID].push_back(FD); + } + } + } + // Push the function onto the lexical block stack. LexicalBlockStack.emplace_back(SP); @@ -3330,8 +3466,7 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { } void CGDebugInfo::AppendAddressSpaceXDeref( - unsigned AddressSpace, - SmallVectorImpl<int64_t> &Expr) const { + unsigned AddressSpace, SmallVectorImpl<int64_t> &Expr) const { Optional<unsigned> DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(AddressSpace); if (!DWARFAddressSpace) @@ -3463,13 +3598,14 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, nullptr, Elements); } -void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, - llvm::Optional<unsigned> ArgNo, - CGBuilderTy &Builder) { +llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, + llvm::Value *Storage, + llvm::Optional<unsigned> ArgNo, + CGBuilderTy &Builder) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (VD->hasAttr<NoDebugAttr>()) - return; + return nullptr; bool Unwritten = VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) && @@ -3487,7 +3623,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, // If there is no debug info for this type then do not emit debug info // for this variable. if (!Ty) - return; + return nullptr; // Get location information. unsigned Line = 0; @@ -3538,15 +3674,15 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) { // If VD is an anonymous union then Storage represents value for // all union fields. - const auto *RD = cast<RecordDecl>(RT->getDecl()); + const RecordDecl *RD = RT->getDecl(); if (RD->isUnion() && RD->isAnonymousStructOrUnion()) { // GDB has trouble finding local variables in anonymous unions, so we emit - // artifical local variables for each of the members. + // artificial local variables for each of the members. // // FIXME: Remove this code as soon as GDB supports this. // The debug info verifier in LLVM operates based on the assumption that a - // variable has the same size as its storage and we had to disable the check - // for artificial variables. + // variable has the same size as its storage and we had to disable the + // check for artificial variables. for (const auto *Field : RD->fields()) { llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit); StringRef FieldName = Field->getName(); @@ -3571,25 +3707,26 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, } // Create the descriptor for the variable. - auto *D = ArgNo - ? DBuilder.createParameterVariable( - Scope, Name, *ArgNo, Unit, Line, Ty, - CGM.getLangOpts().Optimize, Flags) - : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty, - CGM.getLangOpts().Optimize, Flags, - Align); + auto *D = ArgNo ? DBuilder.createParameterVariable( + Scope, Name, *ArgNo, Unit, Line, Ty, + CGM.getLangOpts().Optimize, Flags) + : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty, + CGM.getLangOpts().Optimize, + Flags, Align); // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); + + return D; } -void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, - llvm::Value *Storage, - CGBuilderTy &Builder) { +llvm::DILocalVariable * +CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, + CGBuilderTy &Builder) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); - EmitDeclare(VD, Storage, llvm::None, Builder); + return EmitDeclare(VD, Storage, llvm::None, Builder); } llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, @@ -3686,7 +3823,7 @@ struct BlockLayoutChunk { bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) { return l.OffsetInBits < r.OffsetInBits; } -} +} // namespace void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, StringRef Name, @@ -3725,9 +3862,10 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, blockLayout->getElementOffsetInBits(3), tunit, tunit)); fields.push_back(createFieldType( - "__descriptor", C.getPointerType(block.NeedsCopyDispose - ? C.getBlockDescriptorExtendedType() - : C.getBlockDescriptorType()), + "__descriptor", + C.getPointerType(block.NeedsCopyDispose + ? C.getBlockDescriptorExtendedType() + : C.getBlockDescriptorType()), loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit)); // We want to sort the captures by offset, not because DWARF @@ -3806,8 +3944,8 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, } SmallString<36> typeName; - llvm::raw_svector_ostream(typeName) << "__block_literal_" - << CGM.getUniqueBlockCount(); + llvm::raw_svector_ostream(typeName) + << "__block_literal_" << CGM.getUniqueBlockCount(); llvm::DINodeArray fieldsArray = DBuilder.getOrCreateArray(fields); @@ -3823,8 +3961,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Create the descriptor for the parameter. auto *debugVar = DBuilder.createParameterVariable( - scope, Name, ArgNo, tunit, line, type, - CGM.getLangOpts().Optimize, flags); + scope, Name, ArgNo, tunit, line, type, CGM.getLangOpts().Optimize, flags); // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(), @@ -3863,7 +4000,7 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls( if (FieldName.empty()) { if (const auto *RT = dyn_cast<RecordType>(Field->getType())) GVE = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName, - Var, DContext); + Var, DContext); continue; } // Use VarDecl's Tag, Scope and Line number. @@ -4090,7 +4227,6 @@ void CGDebugInfo::setDwoId(uint64_t Signature) { TheCU->setDWOId(Signature); } - void CGDebugInfo::finalize() { // Creating types might create further types - invalidating the current // element and the size(), so don't cache/reference them. @@ -4102,32 +4238,55 @@ void CGDebugInfo::finalize() { DBuilder.replaceTemporary(llvm::TempDIType(E.Decl), Ty); } - for (auto p : ReplaceMap) { - assert(p.second); - auto *Ty = cast<llvm::DIType>(p.second); + if (CGM.getCodeGenOpts().DwarfVersion >= 5) { + // Add methods to interface. + for (const auto &P : ObjCMethodCache) { + if (P.second.empty()) + continue; + + QualType QTy(P.first->getTypeForDecl(), 0); + auto It = TypeCache.find(QTy.getAsOpaquePtr()); + assert(It != TypeCache.end()); + + llvm::DICompositeType *InterfaceDecl = + cast<llvm::DICompositeType>(It->second); + + SmallVector<llvm::Metadata *, 16> EltTys; + auto CurrenetElts = InterfaceDecl->getElements(); + EltTys.append(CurrenetElts.begin(), CurrenetElts.end()); + for (auto &MD : P.second) + EltTys.push_back(MD); + llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys); + DBuilder.replaceArrays(InterfaceDecl, Elements); + } + } + + for (const auto &P : ReplaceMap) { + assert(P.second); + auto *Ty = cast<llvm::DIType>(P.second); assert(Ty->isForwardDecl()); - auto it = TypeCache.find(p.first); - assert(it != TypeCache.end()); - assert(it->second); + auto It = TypeCache.find(P.first); + assert(It != TypeCache.end()); + assert(It->second); DBuilder.replaceTemporary(llvm::TempDIType(Ty), - cast<llvm::DIType>(it->second)); + cast<llvm::DIType>(It->second)); } - for (const auto &p : FwdDeclReplaceMap) { - assert(p.second); - llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(p.second)); + for (const auto &P : FwdDeclReplaceMap) { + assert(P.second); + llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(P.second)); llvm::Metadata *Repl; - auto it = DeclCache.find(p.first); + auto It = DeclCache.find(P.first); // If there has been no definition for the declaration, call RAUW // with ourselves, that will destroy the temporary MDNode and // replace it with a standard one, avoiding leaking memory. - if (it == DeclCache.end()) - Repl = p.second; + if (It == DeclCache.end()) + Repl = P.second; else - Repl = it->second; + Repl = It->second; if (auto *GVE = dyn_cast_or_null<llvm::DIGlobalVariableExpression>(Repl)) Repl = GVE->getVariable(); @@ -4157,6 +4316,5 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) { return llvm::DebugLoc(); llvm::MDNode *Scope = LexicalBlockStack.back(); - return llvm::DebugLoc::get( - getLineNumber(Loc), getColumnNumber(Loc), Scope); + return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope); } diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index 4f7b7f2a0d9c..e632806138f0 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/Type.h" +#include "clang/AST/TypeOrdering.h" #include "clang/Basic/SourceLocation.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/DenseMap.h" @@ -66,7 +67,7 @@ class CGDebugInfo { llvm::DIType *ClassTy = nullptr; llvm::DICompositeType *ObjTy = nullptr; llvm::DIType *SelTy = nullptr; -#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ llvm::DIType *SingletonId = nullptr; #include "clang/Basic/OpenCLImageTypes.def" llvm::DIType *OCLSamplerDITy = nullptr; @@ -81,6 +82,10 @@ class CGDebugInfo { llvm::SmallDenseMap<llvm::StringRef, llvm::StringRef> DebugPrefixMap; + /// Cache that maps VLA types to size expressions for that type, + /// represented by instantiated Metadata nodes. + llvm::SmallDenseMap<QualType, llvm::Metadata *> SizeExprCache; + struct ObjCInterfaceCacheEntry { const ObjCInterfaceType *Type; llvm::DIType *Decl; @@ -93,6 +98,10 @@ class CGDebugInfo { /// Cache of previously constructed interfaces which may change. llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache; + /// Cache of forward declarations for methods belonging to the interface. + llvm::DenseMap<const ObjCInterfaceDecl *, std::vector<llvm::DISubprogram *>> + ObjCMethodCache; + /// Cache of references to clang modules and precompiled headers. llvm::DenseMap<const Module *, llvm::TrackingMDRef> ModuleCache; @@ -223,12 +232,12 @@ class CGDebugInfo { /// Helper function for CollectCXXBases. /// Adds debug info entries for types in Bases that are not in SeenTypes. - void CollectCXXBasesAux(const CXXRecordDecl *RD, llvm::DIFile *Unit, - SmallVectorImpl<llvm::Metadata *> &EltTys, - llvm::DIType *RecordTy, - const CXXRecordDecl::base_class_const_range &Bases, - llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes, - llvm::DINode::DIFlags StartingFlags); + void CollectCXXBasesAux( + const CXXRecordDecl *RD, llvm::DIFile *Unit, + SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DIType *RecordTy, + const CXXRecordDecl::base_class_const_range &Bases, + llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes, + llvm::DINode::DIFlags StartingFlags); /// A helper function to collect template parameters. llvm::DINodeArray CollectTemplateParams(const TemplateParameterList *TPList, @@ -247,8 +256,7 @@ class CGDebugInfo { llvm::DIType *createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, - uint64_t offsetInBits, - uint32_t AlignInBits, + uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD = nullptr); @@ -309,6 +317,11 @@ public: void finalize(); + /// Register VLA size expression debug node with the qualified type. + void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) { + SizeExprCache[Ty] = SizeExpr; + } + /// Module debugging: Support for building PCMs. /// @{ /// Set the main CU's DwoId field to \p Signature. @@ -356,7 +369,8 @@ public: /// \param ScopeLoc The location of the function body. void EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, SourceLocation ScopeLoc, QualType FnType, - llvm::Function *Fn, CGBuilderTy &Builder); + llvm::Function *Fn, bool CurFnIsThunk, + CGBuilderTy &Builder); /// Start a new scope for an inlined function. void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD); @@ -379,16 +393,17 @@ public: /// Emit call to \c llvm.dbg.declare for an automatic variable /// declaration. - void EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI, - CGBuilderTy &Builder); + /// Returns a pointer to the DILocalVariable associated with the + /// llvm.dbg.declare, or nullptr otherwise. + llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl, + llvm::Value *AI, + CGBuilderTy &Builder); /// Emit call to \c llvm.dbg.declare for an imported variable /// declaration in a block. - void EmitDeclareOfBlockDeclRefVariable(const VarDecl *variable, - llvm::Value *storage, - CGBuilderTy &Builder, - const CGBlockInfo &blockInfo, - llvm::Instruction *InsertPoint = nullptr); + void EmitDeclareOfBlockDeclRefVariable( + const VarDecl *variable, llvm::Value *storage, CGBuilderTy &Builder, + const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint = nullptr); /// Emit call to \c llvm.dbg.declare for an argument variable /// declaration. @@ -451,10 +466,14 @@ public: llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent, SourceLocation LineLoc, SourceLocation FileLoc); + private: /// Emit call to llvm.dbg.declare for a variable declaration. - void EmitDeclare(const VarDecl *decl, llvm::Value *AI, - llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder); + /// Returns a pointer to the DILocalVariable associated with the + /// llvm.dbg.declare, or nullptr otherwise. + llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI, + llvm::Optional<unsigned> ArgNo, + CGBuilderTy &Builder); /// Build up structure info for the byref. See \a BuildByRefType. llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD, @@ -482,8 +501,11 @@ private: std::string remapDIPath(StringRef) const; /// Compute the file checksum debug info for input file ID. - llvm::DIFile::ChecksumKind computeChecksum(FileID FID, - SmallString<32> &Checksum) const; + Optional<llvm::DIFile::ChecksumKind> + computeChecksum(FileID FID, SmallString<32> &Checksum) const; + + /// Get the source of the given file ID. + Optional<StringRef> getSource(const SourceManager &SM, FileID FID); /// Get the file debug info descriptor for the input location. llvm::DIFile *getOrCreateFile(SourceLocation Loc); @@ -637,7 +659,7 @@ public: ~ApplyDebugLocation(); - /// \brief Apply TemporaryLocation if it is valid. Otherwise switch + /// Apply TemporaryLocation if it is valid. Otherwise switch /// to an artificial debug location that has a valid scope, but no /// line information. /// @@ -651,7 +673,7 @@ public: static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF) { return ApplyDebugLocation(CGF, false, SourceLocation()); } - /// \brief Apply TemporaryLocation if it is valid. Otherwise switch + /// Apply TemporaryLocation if it is valid. Otherwise switch /// to an artificial debug location that has a valid scope, but no /// line information. static ApplyDebugLocation @@ -668,7 +690,6 @@ public: static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF) { return ApplyDebugLocation(CGF, true, SourceLocation()); } - }; /// A scoped helper to set the current debug location to an inlined location. diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 04585a8afbb6..57b2fbadbeec 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -229,18 +229,19 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( LangAS AS = GetGlobalVarAddressSpace(&D); unsigned TargetAS = getContext().getTargetAddressSpace(AS); - // Local address space cannot have an initializer. + // OpenCL variables in local address space and CUDA shared + // variables cannot have an initializer. llvm::Constant *Init = nullptr; - if (Ty.getAddressSpace() != LangAS::opencl_local) - Init = EmitNullConstant(Ty); - else + if (Ty.getAddressSpace() == LangAS::opencl_local || + D.hasAttr<CUDASharedAttr>()) Init = llvm::UndefValue::get(LTy); + else + Init = EmitNullConstant(Ty); llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); - setGlobalVisibility(GV, &D, ForDefinition); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -248,12 +249,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( if (D.getTLSKind()) setTLSMode(GV, D); - if (D.isExternallyVisible()) { - if (D.hasAttr<DLLImportAttr>()) - GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); - else if (D.hasAttr<DLLExportAttr>()) - GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); - } + setGVProperties(GV, &D); // Make sure the result is of the correct type. LangAS ExpectedAS = Ty.getAddressSpace(); @@ -291,8 +287,11 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( // never defer them. assert(isa<ObjCMethodDecl>(DC) && "unexpected parent code decl"); } - if (GD.getDecl()) + if (GD.getDecl()) { + // Disable emission of the parent function for the OpenMP device codegen. + CGOpenMPRuntime::DisableAutoDeclareTargetRAII NoDeclTarget(*this); (void)GetAddrOfGlobal(GD); + } return Addr; } @@ -344,6 +343,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, OldGV->getThreadLocalMode(), CGM.getContext().getTargetAddressSpace(D.getType())); GV->setVisibility(OldGV->getVisibility()); + GV->setDSOLocal(OldGV->isDSOLocal()); GV->setComdat(OldGV->getComdat()); // Steal the name of the old global @@ -469,13 +469,11 @@ namespace { } }; - struct DestroyNRVOVariable final : EHScopeStack::Cleanup { - DestroyNRVOVariable(Address addr, - const CXXDestructorDecl *Dtor, - llvm::Value *NRVOFlag) - : Dtor(Dtor), NRVOFlag(NRVOFlag), Loc(addr) {} + template <class Derived> + struct DestroyNRVOVariable : EHScopeStack::Cleanup { + DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag) + : NRVOFlag(NRVOFlag), Loc(addr) {} - const CXXDestructorDecl *Dtor; llvm::Value *NRVOFlag; Address Loc; @@ -494,12 +492,39 @@ namespace { CGF.EmitBlock(RunDtorBB); } + static_cast<Derived *>(this)->emitDestructorCall(CGF); + + if (NRVO) CGF.EmitBlock(SkipDtorBB); + } + + virtual ~DestroyNRVOVariable() = default; + }; + + struct DestroyNRVOVariableCXX final + : DestroyNRVOVariable<DestroyNRVOVariableCXX> { + DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor, + llvm::Value *NRVOFlag) + : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag), + Dtor(Dtor) {} + + const CXXDestructorDecl *Dtor; + + void emitDestructorCall(CodeGenFunction &CGF) { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, - Loc); + /*Delegating=*/false, Loc); + } + }; - if (NRVO) CGF.EmitBlock(SkipDtorBB); + struct DestroyNRVOVariableC final + : DestroyNRVOVariable<DestroyNRVOVariableC> { + DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty) + : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {} + + QualType Ty; + + void emitDestructorCall(CodeGenFunction &CGF) { + CGF.destroyNonTrivialCStruct(CGF, Loc, Ty); } }; @@ -821,11 +846,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, EmitStoreOfScalar(value, lvalue, /* isInitialization */ true); } -/// canEmitInitWithFewStoresAfterMemset - Decide whether we can emit the -/// non-zero parts of the specified initializer with equal or fewer than -/// NumStores scalar stores. -static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init, - unsigned &NumStores) { +/// Decide whether we can emit the non-zero parts of the specified initializer +/// with equal or fewer than NumStores scalar stores. +static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init, + unsigned &NumStores) { // Zero and Undef never requires any extra stores. if (isa<llvm::ConstantAggregateZero>(Init) || isa<llvm::ConstantPointerNull>(Init) || @@ -840,7 +864,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init, if (isa<llvm::ConstantArray>(Init) || isa<llvm::ConstantStruct>(Init)) { for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i)); - if (!canEmitInitWithFewStoresAfterMemset(Elt, NumStores)) + if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores)) return false; } return true; @@ -850,7 +874,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init, dyn_cast<llvm::ConstantDataSequential>(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); - if (!canEmitInitWithFewStoresAfterMemset(Elt, NumStores)) + if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores)) return false; } return true; @@ -860,18 +884,18 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init, return false; } -/// emitStoresForInitAfterMemset - For inits that -/// canEmitInitWithFewStoresAfterMemset returned true for, emit the scalar -/// stores that would be required. -static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, - bool isVolatile, CGBuilderTy &Builder) { +/// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit +/// the scalar stores that would be required. +static void emitStoresForInitAfterBZero(CodeGenModule &CGM, + llvm::Constant *Init, Address Loc, + bool isVolatile, CGBuilderTy &Builder) { assert(!Init->isNullValue() && !isa<llvm::UndefValue>(Init) && - "called emitStoresForInitAfterMemset for zero or undef value."); + "called emitStoresForInitAfterBZero for zero or undef value."); if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) || isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) || isa<llvm::ConstantExpr>(Init)) { - Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile); + Builder.CreateStore(Init, Loc, isVolatile); return; } @@ -882,8 +906,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) - emitStoresForInitAfterMemset( - Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i), + emitStoresForInitAfterBZero( + CGM, Elt, + Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), isVolatile, Builder); } return; @@ -897,19 +922,19 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) - emitStoresForInitAfterMemset( - Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i), + emitStoresForInitAfterBZero( + CGM, Elt, + Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), isVolatile, Builder); } } -/// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset -/// plus some stores to initialize a local variable instead of using a memcpy -/// from a constant global. It is beneficial to use memset if the global is all -/// zeros, or mostly zeros and large. -static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init, - uint64_t GlobalSize) { - // If a global is all zeros, always use a memset. +/// Decide whether we should use bzero plus some stores to initialize a local +/// variable instead of using a memcpy from a constant global. It is beneficial +/// to use bzero if the global is all zeros, or mostly zeros and large. +static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init, + uint64_t GlobalSize) { + // If a global is all zeros, always use a bzero. if (isa<llvm::ConstantAggregateZero>(Init)) return true; // If a non-zero global is <= 32 bytes, always use a memcpy. If it is large, @@ -920,7 +945,114 @@ static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init, uint64_t SizeLimit = 32; return GlobalSize > SizeLimit && - canEmitInitWithFewStoresAfterMemset(Init, StoreBudget); + canEmitInitWithFewStoresAfterBZero(Init, StoreBudget); +} + +/// A byte pattern. +/// +/// Can be "any" pattern if the value was padding or known to be undef. +/// Can be "none" pattern if a sequence doesn't exist. +class BytePattern { + uint8_t Val; + enum class ValueType : uint8_t { Specific, Any, None } Type; + BytePattern(ValueType Type) : Type(Type) {} + +public: + BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {} + static BytePattern Any() { return BytePattern(ValueType::Any); } + static BytePattern None() { return BytePattern(ValueType::None); } + bool isAny() const { return Type == ValueType::Any; } + bool isNone() const { return Type == ValueType::None; } + bool isValued() const { return Type == ValueType::Specific; } + uint8_t getValue() const { + assert(isValued()); + return Val; + } + BytePattern merge(const BytePattern Other) const { + if (isNone() || Other.isNone()) + return None(); + if (isAny()) + return Other; + if (Other.isAny()) + return *this; + if (getValue() == Other.getValue()) + return *this; + return None(); + } +}; + +/// Figures out whether the constant can be initialized with memset. +static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) { + if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C)) + return BytePattern(0x00); + if (isa<llvm::UndefValue>(C)) + return BytePattern::Any(); + + if (isa<llvm::ConstantInt>(C)) { + auto *Int = cast<llvm::ConstantInt>(C); + if (Int->getBitWidth() % 8 != 0) + return BytePattern::None(); + const llvm::APInt &Value = Int->getValue(); + if (Value.isSplat(8)) + return BytePattern(Value.getLoBits(8).getLimitedValue()); + return BytePattern::None(); + } + + if (isa<llvm::ConstantFP>(C)) { + auto *FP = cast<llvm::ConstantFP>(C); + llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt(); + if (Bits.getBitWidth() % 8 != 0) + return BytePattern::None(); + if (!Bits.isSplat(8)) + return BytePattern::None(); + return BytePattern(Bits.getLimitedValue() & 0xFF); + } + + if (isa<llvm::ConstantVector>(C)) { + llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue(); + if (Splat) + return constantIsRepeatedBytePattern(Splat); + return BytePattern::None(); + } + + if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) { + BytePattern Pattern(BytePattern::Any()); + for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) { + llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I)); + Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt)); + if (Pattern.isNone()) + return Pattern; + } + return Pattern; + } + + if (llvm::ConstantDataSequential *CDS = + dyn_cast<llvm::ConstantDataSequential>(C)) { + BytePattern Pattern(BytePattern::Any()); + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + llvm::Constant *Elt = CDS->getElementAsConstant(I); + Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt)); + if (Pattern.isNone()) + return Pattern; + } + return Pattern; + } + + // BlockAddress, ConstantExpr, and everything else is scary. + return BytePattern::None(); +} + +/// Decide whether we should use memset to initialize a local variable instead +/// of using a memcpy from a constant global. Assumes we've already decided to +/// not user bzero. +/// FIXME We could be more clever, as we are for bzero above, and generate +/// memset followed by stores. It's unclear that's worth the effort. +static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init, + uint64_t GlobalSize) { + uint64_t SizeLimit = 32; + if (GlobalSize <= SizeLimit) + return BytePattern::None(); + return constantIsRepeatedBytePattern(Init); } /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a @@ -940,6 +1072,9 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, if (!ShouldEmitLifetimeMarkers) return nullptr; + assert(Addr->getType()->getPointerAddressSpace() == + CGM.getDataLayout().getAllocaAddrSpace() && + "Pointer should be in alloca address space"); llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size); Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = @@ -949,12 +1084,68 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, } void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { + assert(Addr->getType()->getPointerAddressSpace() == + CGM.getDataLayout().getAllocaAddrSpace() && + "Pointer should be in alloca address space"); Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); C->setDoesNotThrow(); } +void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( + CGDebugInfo *DI, const VarDecl &D, bool EmitDebugInfo) { + // For each dimension stores its QualType and corresponding + // size-expression Value. + SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions; + + // Break down the array into individual dimensions. + QualType Type1D = D.getType(); + while (getContext().getAsVariableArrayType(Type1D)) { + auto VlaSize = getVLAElements1D(Type1D); + if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts)) + Dimensions.emplace_back(C, Type1D.getUnqualifiedType()); + else { + auto SizeExprAddr = CreateDefaultAlignTempAlloca( + VlaSize.NumElts->getType(), "__vla_expr"); + Builder.CreateStore(VlaSize.NumElts, SizeExprAddr); + Dimensions.emplace_back(SizeExprAddr.getPointer(), + Type1D.getUnqualifiedType()); + } + Type1D = VlaSize.Type; + } + + if (!EmitDebugInfo) + return; + + // Register each dimension's size-expression with a DILocalVariable, + // so that it can be used by CGDebugInfo when instantiating a DISubrange + // to describe this array. + for (auto &VlaSize : Dimensions) { + llvm::Metadata *MD; + if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts)) + MD = llvm::ConstantAsMetadata::get(C); + else { + // Create an artificial VarDecl to generate debug info for. + IdentifierInfo &NameIdent = getContext().Idents.getOwn( + cast<llvm::AllocaInst>(VlaSize.NumElts)->getName()); + auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType(); + auto QT = getContext().getIntTypeForBitwidth( + VlaExprTy->getScalarSizeInBits(), false); + auto *ArtificialDecl = VarDecl::Create( + getContext(), const_cast<DeclContext *>(D.getDeclContext()), + D.getLocation(), D.getLocation(), &NameIdent, QT, + getContext().CreateTypeSourceInfo(QT), SC_Auto); + ArtificialDecl->setImplicit(); + + MD = DI->EmitDeclareOfAutoVariable(ArtificialDecl, VlaSize.NumElts, + Builder); + } + assert(MD && "No Size expression debug node created"); + DI->registerVLASizeExpression(VlaSize.Type, MD); + } +} + /// EmitAutoVarAlloca - Emit the alloca and debug information for a /// local variable. Does not emit initialization or destruction. CodeGenFunction::AutoVarEmission @@ -975,7 +1166,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (Ty->isVariablyModifiedType()) EmitVariablyModifiedType(Ty); + auto *DI = getDebugInfo(); + bool EmitDebugInfo = DI && CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo; + Address address = Address::invalid(); + Address AllocaAddr = Address::invalid(); if (Ty->isConstantSizeType()) { bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable(); @@ -1016,16 +1212,27 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { } // A normal fixed sized variable becomes an alloca in the entry block, - // unless it's an NRVO variable. - - if (NRVO) { + // unless: + // - it's an NRVO variable. + // - we are compiling OpenMP and it's an OpenMP local variable. + + Address OpenMPLocalAddr = + getLangOpts().OpenMP + ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) + : Address::invalid(); + if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { + address = OpenMPLocalAddr; + } else if (NRVO) { // The named return value optimization: allocate this variable in the // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). address = ReturnValue; if (const RecordType *RecordTy = Ty->getAs<RecordType>()) { - if (!cast<CXXRecordDecl>(RecordTy->getDecl())->hasTrivialDestructor()) { + const auto *RD = RecordTy->getDecl(); + const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD); + if ((CXXRD && !CXXRD->hasTrivialDestructor()) || + RD->isNonTrivialToPrimitiveDestroy()) { // Create a flag that is used to indicate when the NRVO was applied // to this variable. Set it to zero to indicate that NRVO was not // applied. @@ -1055,7 +1262,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName()); + address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(), + /*ArraySize=*/nullptr, &AllocaAddr); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't @@ -1083,7 +1291,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy); emission.SizeForLifetimeMarkers = - EmitLifetimeStart(size, address.getPointer()); + EmitLifetimeStart(size, AllocaAddr.getPointer()); } } else { assert(!emission.useLifetimeMarkers()); @@ -1108,28 +1316,28 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { pushStackRestore(NormalCleanup, Stack); } - llvm::Value *elementCount; - QualType elementType; - std::tie(elementCount, elementType) = getVLASize(Ty); - - llvm::Type *llvmTy = ConvertTypeForMem(elementType); + auto VlaSize = getVLASize(Ty); + llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type); // Allocate memory for the array. - address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount); + address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts, + &AllocaAddr); + + // If we have debug info enabled, properly describe the VLA dimensions for + // this type by registering the vla size expression for each of the + // dimensions. + EmitAndRegisterVariableArrayDimensions(DI, D, EmitDebugInfo); } setAddrOfLocalVar(&D, address); emission.Addr = address; + emission.AllocaAddr = AllocaAddr; // Emit debug info for local var declaration. - if (HaveInsertPoint()) - if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo) { - DI->setLocation(D.getLocation()); - DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); - } - } + if (EmitDebugInfo && HaveInsertPoint()) { + DI->setLocation(D.getLocation()); + (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); + } if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, address.getPointer()); @@ -1137,23 +1345,36 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Make sure we call @llvm.lifetime.end. if (emission.useLifetimeMarkers()) EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, - emission.getAllocatedAddress(), + emission.getOriginalAllocatedAddress(), emission.getSizeForLifetimeMarkers()); return emission; } +static bool isCapturedBy(const VarDecl &, const Expr *); + +/// Determines whether the given __block variable is potentially +/// captured by the given statement. +static bool isCapturedBy(const VarDecl &Var, const Stmt *S) { + if (const Expr *E = dyn_cast<Expr>(S)) + return isCapturedBy(Var, E); + for (const Stmt *SubStmt : S->children()) + if (isCapturedBy(Var, SubStmt)) + return true; + return false; +} + /// Determines whether the given __block variable is potentially /// captured by the given expression. -static bool isCapturedBy(const VarDecl &var, const Expr *e) { +static bool isCapturedBy(const VarDecl &Var, const Expr *E) { // Skip the most common kinds of expressions that make // hierarchy-walking expensive. - e = e->IgnoreParenCasts(); + E = E->IgnoreParenCasts(); - if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) { - const BlockDecl *block = be->getBlockDecl(); - for (const auto &I : block->captures()) { - if (I.getVariable() == &var) + if (const BlockExpr *BE = dyn_cast<BlockExpr>(E)) { + const BlockDecl *Block = BE->getBlockDecl(); + for (const auto &I : Block->captures()) { + if (I.getVariable() == &Var) return true; } @@ -1161,19 +1382,19 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) { return false; } - if (const StmtExpr *SE = dyn_cast<StmtExpr>(e)) { + if (const StmtExpr *SE = dyn_cast<StmtExpr>(E)) { const CompoundStmt *CS = SE->getSubStmt(); for (const auto *BI : CS->body()) - if (const auto *E = dyn_cast<Expr>(BI)) { - if (isCapturedBy(var, E)) - return true; + if (const auto *BIE = dyn_cast<Expr>(BI)) { + if (isCapturedBy(Var, BIE)) + return true; } else if (const auto *DS = dyn_cast<DeclStmt>(BI)) { // special case declarations for (const auto *I : DS->decls()) { if (const auto *VD = dyn_cast<VarDecl>((I))) { const Expr *Init = VD->getInit(); - if (Init && isCapturedBy(var, Init)) + if (Init && isCapturedBy(Var, Init)) return true; } } @@ -1185,14 +1406,14 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) { return false; } - for (const Stmt *SubStmt : e->children()) - if (isCapturedBy(var, cast<Expr>(SubStmt))) + for (const Stmt *SubStmt : E->children()) + if (isCapturedBy(Var, SubStmt)) return true; return false; } -/// \brief Determine whether the given initializer is trivial in the sense +/// Determine whether the given initializer is trivial in the sense /// that it requires no code to be generated. bool CodeGenFunction::isTrivialInitializer(const Expr *Init) { if (!Init) @@ -1232,6 +1453,19 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { if (emission.IsByRef) emitByrefStructureInit(emission); + // Initialize the variable here if it doesn't have a initializer and it is a + // C struct that is non-trivial to initialize or an array containing such a + // struct. + if (!Init && + type.isNonTrivialToPrimitiveDefaultInitialize() == + QualType::PDIK_Struct) { + LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type); + if (emission.IsByRef) + drillIntoBlockVariable(*this, Dst, &D); + defaultInitNonTrivialCStructVar(Dst); + return; + } + if (isTrivialInitializer(Init)) return; @@ -1270,58 +1504,66 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::ConstantInt::get(IntPtrTy, getContext().getTypeSizeInChars(type).getQuantity()); - llvm::Type *BP = AllocaInt8PtrTy; + llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); - // If the initializer is all or mostly zeros, codegen with memset then do - // a few stores afterward. - if (shouldUseMemSetPlusStoresToInitialize(constant, - CGM.getDataLayout().getTypeAllocSize(constant->getType()))) { + // If the initializer is all or mostly the same, codegen with bzero / memset + // then do a few stores afterward. + uint64_t ConstantSize = + CGM.getDataLayout().getTypeAllocSize(constant->getType()); + if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, isVolatile); // Zero and undef don't require a stores. if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) { - Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo()); - emitStoresForInitAfterMemset(constant, Loc.getPointer(), - isVolatile, Builder); - } - } else { - // Otherwise, create a temporary global with the initializer then - // memcpy from the global to the alloca. - std::string Name = getStaticDeclName(CGM, D); - unsigned AS = 0; - if (getLangOpts().OpenCL) { - AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); - BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); + Loc = Builder.CreateBitCast(Loc, + constant->getType()->getPointerTo(Loc.getAddressSpace())); + emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder); } - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true, - llvm::GlobalValue::PrivateLinkage, - constant, Name, nullptr, - llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Loc.getAlignment().getQuantity()); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - Address SrcPtr = Address(GV, Loc.getAlignment()); - if (SrcPtr.getType() != BP) - SrcPtr = Builder.CreateBitCast(SrcPtr, BP); + return; + } - Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile); + BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); + if (!Pattern.isNone()) { + uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue(); + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, + isVolatile); + return; } + + // Otherwise, create a temporary global with the initializer then + // memcpy from the global to the alloca. + std::string Name = getStaticDeclName(CGM, D); + unsigned AS = CGM.getContext().getTargetAddressSpace( + CGM.getStringLiteralAddressSpace()); + BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); + + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + CGM.getModule(), constant->getType(), true, + llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr, + llvm::GlobalValue::NotThreadLocal, AS); + GV->setAlignment(Loc.getAlignment().getQuantity()); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + Address SrcPtr = Address(GV, Loc.getAlignment()); + if (SrcPtr.getType() != BP) + SrcPtr = Builder.CreateBitCast(SrcPtr, BP); + + Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile); } -/// Emit an expression as an initializer for a variable at the given -/// location. The expression is not necessarily the normal -/// initializer for the variable, and the address is not necessarily +/// Emit an expression as an initializer for an object (variable, field, etc.) +/// at the given location. The expression is not necessarily the normal +/// initializer for the object, and the address is not necessarily /// its normal location. /// /// \param init the initializing expression -/// \param var the variable to act as if we're initializing +/// \param D the object to act as if we're initializing /// \param loc the address to initialize; its type is a pointer -/// to the LLVM mapping of the variable's type +/// to the LLVM mapping of the object's type /// \param alignment the alignment of the address -/// \param capturedByInit true if the variable is a __block variable +/// \param capturedByInit true if \p D is a __block variable /// whose address is potentially changed by the initializer void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { @@ -1349,11 +1591,17 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, if (type->isAtomicType()) { EmitAtomicInit(const_cast<Expr*>(init), lvalue); } else { + AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap; + if (isa<VarDecl>(D)) + Overlap = AggValueSlot::DoesNotOverlap; + else if (auto *FD = dyn_cast<FieldDecl>(D)) + Overlap = overlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + Overlap)); } return; } @@ -1386,8 +1634,8 @@ void CodeGenFunction::emitAutoVarTypeCleanup( if (emission.NRVOFlag) { assert(!type->isArrayType()); CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); - EHStack.pushCleanup<DestroyNRVOVariable>(cleanupKind, addr, - dtor, emission.NRVOFlag); + EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor, + emission.NRVOFlag); return; } break; @@ -1406,6 +1654,16 @@ void CodeGenFunction::emitAutoVarTypeCleanup( case QualType::DK_objc_weak_lifetime: break; + + case QualType::DK_nontrivial_c_struct: + destroyer = CodeGenFunction::destroyNonTrivialCStruct; + if (emission.NRVOFlag) { + assert(!type->isArrayType()); + EHStack.pushCleanup<DestroyNRVOVariableC>(cleanupKind, addr, + emission.NRVOFlag, type); + return; + } + break; } // If we haven't chosen a more specific destroyer, use the default. @@ -1452,9 +1710,15 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { } // If this is a block variable, call _Block_object_destroy - // (on the unforwarded address). - if (emission.IsByRef) - enterByrefCleanup(emission); + // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC + // mode. + if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) { + BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; + if (emission.Variable->getType().isObjCGCWeak()) + Flags |= BLOCK_FIELD_IS_WEAK; + enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags, + /*LoadBlockVarAddr*/ false); + } } CodeGenFunction::Destroyer * @@ -1467,6 +1731,8 @@ CodeGenFunction::getDestroyer(QualType::DestructionKind kind) { return destroyARCStrongPrecise; case QualType::DK_objc_weak_lifetime: return destroyARCWeak; + case QualType::DK_nontrivial_c_struct: + return destroyNonTrivialCStruct; } llvm_unreachable("Unknown DestructionKind"); } @@ -1506,9 +1772,6 @@ void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) { void CodeGenFunction::pushLifetimeExtendedDestroy( CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray) { - assert(!isInConditionalBranch() && - "performing lifetime extension from within conditional"); - // Push an EH-only cleanup for the object now. // FIXME: When popping normal cleanups, we need to keep this EH cleanup // around in case a temporary's destructor throws an exception. @@ -1791,9 +2054,12 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // Use better IR generation for certain implicit parameters. if (auto IPD = dyn_cast<ImplicitParamDecl>(&D)) { // The only implicit argument a block has is its literal. - // We assume this is always passed directly. + // This may be passed as an inalloca'ed value on Windows x86. if (BlockInfo) { - setBlockContextParameter(IPD, ArgNo, Arg.getDirectValue()); + llvm::Value *V = Arg.isIndirect() + ? Builder.CreateLoad(Arg.getIndirectAddress()) + : Arg.getDirectValue(); + setBlockContextParameter(IPD, ArgNo, V); return; } } @@ -1809,20 +2075,50 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS); if (DeclPtr.getType() != IRTy) DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName()); + // Indirect argument is in alloca address space, which may be different + // from the default address space. + auto AllocaAS = CGM.getASTAllocaAddressSpace(); + auto *V = DeclPtr.getPointer(); + auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS; + auto DestLangAS = + getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default; + if (SrcLangAS != DestLangAS) { + assert(getContext().getTargetAddressSpace(SrcLangAS) == + CGM.getDataLayout().getAllocaAddrSpace()); + auto DestAS = getContext().getTargetAddressSpace(DestLangAS); + auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS); + DeclPtr = Address(getTargetHooks().performAddrSpaceCast( + *this, V, SrcLangAS, DestLangAS, T, true), + DeclPtr.getAlignment()); + } // Push a destructor cleanup for this parameter if the ABI requires it. // Don't push a cleanup in a thunk for a method that will also emit a // cleanup. - if (!IsScalar && !CurFuncIsThunk && - getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) { - const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - if (RD && RD->hasNonTrivialDestructor()) - pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty); + if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk && + Ty->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { + if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) { + assert((DtorKind == QualType::DK_cxx_destructor || + DtorKind == QualType::DK_nontrivial_c_struct) && + "unexpected destructor type"); + pushDestroy(DtorKind, DeclPtr, Ty); + CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] = + EHStack.stable_begin(); + } } } else { - // Otherwise, create a temporary to hold the value. - DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D), - D.getName() + ".addr"); + // Check if the parameter address is controlled by OpenMP runtime. + Address OpenMPLocalAddr = + getLangOpts().OpenMP + ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) + : Address::invalid(); + if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { + DeclPtr = OpenMPLocalAddr; + } else { + // Otherwise, create a temporary to hold the value. + DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D), + D.getName() + ".addr"); + } DoStore = true; } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 042997831702..5e237d7e0b69 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -53,7 +53,8 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, case TEK_Aggregate: CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return; } llvm_unreachable("bad evaluation kind"); @@ -79,6 +80,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, case QualType::DK_objc_strong_lifetime: case QualType::DK_objc_weak_lifetime: + case QualType::DK_nontrivial_c_struct: // We don't care about releasing objects during process teardown. assert(!D.getTLSKind() && "should have rejected this"); return; @@ -173,10 +175,12 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, ConstantAddress DeclAddr(DeclPtr, getContext().getDeclAlign(&D)); if (!T->isReferenceType()) { - if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>()) + if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd && + D.hasAttr<OMPThreadPrivateDeclAttr>()) { (void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition( &D, DeclAddr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(), PerformInit, this); + } if (PerformInit) EmitDeclInit(*this, D, DeclAddr); if (CGM.isTypeConstant(D.getType(), true)) @@ -232,7 +236,10 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, llvm::Constant *addr) { // Create a function which calls the destructor. llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr); + registerGlobalDtorWithAtExit(dtorStub); +} +void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { // extern "C" int atexit(void (*f)(void)); llvm::FunctionType *atexitTy = llvm::FunctionType::get(IntTy, dtorStub->getType(), false); @@ -309,7 +316,7 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( Fn->setSection(Section); } - SetInternalFunctionAttributes(nullptr, Fn, FI); + SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); Fn->setCallingConv(getRuntimeCC()); @@ -328,6 +335,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::HWAddress, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); + if (getLangOpts().Sanitize.has(SanitizerKind::KernelHWAddress) && + !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); + if (getLangOpts().Sanitize.has(SanitizerKind::Thread) && !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); @@ -340,6 +351,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SafeStack); + if (getLangOpts().Sanitize.has(SanitizerKind::ShadowCallStack) && + !isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::ShadowCallStack); + return Fn; } @@ -376,6 +391,10 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, D->hasAttr<CUDASharedAttr>())) return; + if (getLangOpts().OpenMP && + getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)) + return; + // Check if we've already initialized this decl. auto I = DelayedCXXInitPosition.find(D); if (I != DelayedCXXInitPosition.end() && I->second == ~0U) diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 1ec084ff3f5b..c9820c242554 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -65,7 +65,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() { if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015)) name = "__std_terminate"; else - name = "\01?terminate@@YAXXZ"; + name = "?terminate@@YAXXZ"; } else if (getLangOpts().ObjC1 && getLangOpts().ObjCRuntime.hasTerminate()) name = "objc_terminate"; @@ -111,21 +111,32 @@ const EHPersonality EHPersonality::MSVC_C_specific_handler = { "__C_specific_handler", nullptr }; const EHPersonality EHPersonality::MSVC_CxxFrameHandler3 = { "__CxxFrameHandler3", nullptr }; +const EHPersonality +EHPersonality::GNU_Wasm_CPlusPlus = { "__gxx_wasm_personality_v0", nullptr }; -static const EHPersonality &getCPersonality(const llvm::Triple &T, +static const EHPersonality &getCPersonality(const TargetInfo &Target, const LangOptions &L) { + const llvm::Triple &T = Target.getTriple(); + if (T.isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; if (L.SjLjExceptions) return EHPersonality::GNU_C_SJLJ; + if (L.DWARFExceptions) + return EHPersonality::GNU_C; if (L.SEHExceptions) return EHPersonality::GNU_C_SEH; return EHPersonality::GNU_C; } -static const EHPersonality &getObjCPersonality(const llvm::Triple &T, +static const EHPersonality &getObjCPersonality(const TargetInfo &Target, const LangOptions &L) { + const llvm::Triple &T = Target.getTriple(); + if (T.isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; + switch (L.ObjCRuntime.getKind()) { case ObjCRuntime::FragileMacOSX: - return getCPersonality(T, L); + return getCPersonality(Target, L); case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: @@ -145,24 +156,37 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T, llvm_unreachable("bad runtime kind"); } -static const EHPersonality &getCXXPersonality(const llvm::Triple &T, +static const EHPersonality &getCXXPersonality(const TargetInfo &Target, const LangOptions &L) { + const llvm::Triple &T = Target.getTriple(); + if (T.isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; if (L.SjLjExceptions) return EHPersonality::GNU_CPlusPlus_SJLJ; + if (L.DWARFExceptions) + return EHPersonality::GNU_CPlusPlus; if (L.SEHExceptions) return EHPersonality::GNU_CPlusPlus_SEH; + // Wasm EH is a non-MVP feature for now. + if (Target.hasFeature("exception-handling") && + (T.getArch() == llvm::Triple::wasm32 || + T.getArch() == llvm::Triple::wasm64)) + return EHPersonality::GNU_Wasm_CPlusPlus; return EHPersonality::GNU_CPlusPlus; } /// Determines the personality function to use when both C++ /// and Objective-C exceptions are being caught. -static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, +static const EHPersonality &getObjCXXPersonality(const TargetInfo &Target, const LangOptions &L) { + if (Target.getTriple().isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; + switch (L.ObjCRuntime.getKind()) { // In the fragile ABI, just use C++ exception handling and hope // they're not doing crazy exception mixing. case ObjCRuntime::FragileMacOSX: - return getCXXPersonality(T, L); + return getCXXPersonality(Target, L); // The ObjC personality defers to the C++ personality for non-ObjC // handlers. Unlike the C++ case, we use the same personality @@ -170,7 +194,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: - return getObjCPersonality(T, L); + return getObjCPersonality(Target, L); case ObjCRuntime::GNUstep: return EHPersonality::GNU_ObjCXX; @@ -179,7 +203,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, // mixed EH. Use the ObjC personality just to avoid returning null. case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: - return getObjCPersonality(T, L); + return getObjCPersonality(Target, L); } llvm_unreachable("bad runtime kind"); } @@ -194,30 +218,17 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM, const FunctionDecl *FD) { const llvm::Triple &T = CGM.getTarget().getTriple(); const LangOptions &L = CGM.getLangOpts(); + const TargetInfo &Target = CGM.getTarget(); // Functions using SEH get an SEH personality. if (FD && FD->usesSEHTry()) return getSEHPersonalityMSVC(T); - // Try to pick a personality function that is compatible with MSVC if we're - // not compiling Obj-C. Obj-C users better have an Obj-C runtime that supports - // the GCC-style personality function. - if (T.isWindowsMSVCEnvironment() && !L.ObjC1) { - if (L.SjLjExceptions) - return EHPersonality::GNU_CPlusPlus_SJLJ; - if (L.DWARFExceptions) - return EHPersonality::GNU_CPlusPlus; - return EHPersonality::MSVC_CxxFrameHandler3; - } - - if (L.CPlusPlus && L.ObjC1) - return getObjCXXPersonality(T, L); - else if (L.CPlusPlus) - return getCXXPersonality(T, L); - else if (L.ObjC1) - return getObjCPersonality(T, L); - else - return getCPersonality(T, L); + if (L.ObjC1) + return L.CPlusPlus ? getObjCXXPersonality(Target, L) + : getObjCPersonality(Target, L); + return L.CPlusPlus ? getCXXPersonality(Target, L) + : getCPersonality(Target, L); } const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { @@ -313,8 +324,7 @@ void CodeGenModule::SimplifyPersonality() { return; const EHPersonality &ObjCXX = EHPersonality::get(*this, /*FD=*/nullptr); - const EHPersonality &CXX = - getCXXPersonality(getTarget().getTriple(), LangOpts); + const EHPersonality &CXX = getCXXPersonality(getTarget(), LangOpts); if (&ObjCXX == &CXX) return; @@ -448,11 +458,9 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) { return; ExceptionSpecificationType EST = Proto->getExceptionSpecType(); - if (isNoexceptExceptionSpec(EST)) { - if (Proto->getNoexceptSpec(getContext()) == FunctionProtoType::NR_Nothrow) { - // noexcept functions are simple terminate scopes. - EHStack.pushTerminate(); - } + if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) { + // noexcept functions are simple terminate scopes. + EHStack.pushTerminate(); } else if (EST == EST_Dynamic || EST == EST_DynamicNone) { // TODO: Revisit exception specifications for the MS ABI. There is a way to // encode these in an object file but MSVC doesn't do anything with it. @@ -527,10 +535,8 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { return; ExceptionSpecificationType EST = Proto->getExceptionSpecType(); - if (isNoexceptExceptionSpec(EST)) { - if (Proto->getNoexceptSpec(getContext()) == FunctionProtoType::NR_Nothrow) { - EHStack.popTerminate(); - } + if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) { + EHStack.popTerminate(); } else if (EST == EST_Dynamic || EST == EST_DynamicNone) { // TODO: Revisit exception specifications for the MS ABI. There is a way to // encode these in an object file but MSVC doesn't do anything with it. @@ -584,7 +590,7 @@ void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { llvm::BasicBlock * CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) { if (EHPersonality::get(*this).usesFuncletPads()) - return getMSVCDispatchBlock(si); + return getFuncletEHDispatchBlock(si); // The dispatch block for the end of the scope chain is a block that // just resumes unwinding. @@ -632,7 +638,7 @@ CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) { } llvm::BasicBlock * -CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) { +CodeGenFunction::getFuncletEHDispatchBlock(EHScopeStack::stable_iterator SI) { // Returning nullptr indicates that the previous dispatch block should unwind // to caller. if (SI == EHStack.stable_end()) @@ -646,7 +652,7 @@ CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) { return DispatchBlock; if (EHS.getKind() == EHScope::Terminate) - DispatchBlock = getTerminateHandler(); + DispatchBlock = getTerminateFunclet(); else DispatchBlock = createBasicBlock(); CGBuilderTy Builder(*this, DispatchBlock); @@ -926,10 +932,121 @@ static void emitCatchPadBlock(CodeGenFunction &CGF, EHCatchScope &CatchScope) { CGF.Builder.restoreIP(SavedIP); } +// Wasm uses Windows-style EH instructions, but it merges all catch clauses into +// one big catchpad, within which we use Itanium's landingpad-style selector +// comparison instructions. +static void emitWasmCatchPadBlock(CodeGenFunction &CGF, + EHCatchScope &CatchScope) { + llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock(); + assert(DispatchBlock); + + CGBuilderTy::InsertPoint SavedIP = CGF.Builder.saveIP(); + CGF.EmitBlockAfterUses(DispatchBlock); + + llvm::Value *ParentPad = CGF.CurrentFuncletPad; + if (!ParentPad) + ParentPad = llvm::ConstantTokenNone::get(CGF.getLLVMContext()); + llvm::BasicBlock *UnwindBB = + CGF.getEHDispatchBlock(CatchScope.getEnclosingEHScope()); + + unsigned NumHandlers = CatchScope.getNumHandlers(); + llvm::CatchSwitchInst *CatchSwitch = + CGF.Builder.CreateCatchSwitch(ParentPad, UnwindBB, NumHandlers); + + // We don't use a landingpad instruction, so generate intrinsic calls to + // provide exception and selector values. + llvm::BasicBlock *WasmCatchStartBlock = CGF.createBasicBlock("catch.start"); + CatchSwitch->addHandler(WasmCatchStartBlock); + CGF.EmitBlockAfterUses(WasmCatchStartBlock); + + // Create a catchpad instruction. + SmallVector<llvm::Value *, 4> CatchTypes; + for (unsigned I = 0, E = NumHandlers; I < E; ++I) { + const EHCatchScope::Handler &Handler = CatchScope.getHandler(I); + CatchTypeInfo TypeInfo = Handler.Type; + if (!TypeInfo.RTTI) + TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy); + CatchTypes.push_back(TypeInfo.RTTI); + } + auto *CPI = CGF.Builder.CreateCatchPad(CatchSwitch, CatchTypes); + + // Create calls to wasm.get.exception and wasm.get.ehselector intrinsics. + // Before they are lowered appropriately later, they provide values for the + // exception and selector. + llvm::Value *GetExnFn = + CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); + llvm::Value *GetSelectorFn = + CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_ehselector); + llvm::CallInst *Exn = CGF.Builder.CreateCall(GetExnFn, CPI); + CGF.Builder.CreateStore(Exn, CGF.getExceptionSlot()); + llvm::CallInst *Selector = CGF.Builder.CreateCall(GetSelectorFn, CPI); + + llvm::Value *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); + + // If there's only a single catch-all, branch directly to its handler. + if (CatchScope.getNumHandlers() == 1 && + CatchScope.getHandler(0).isCatchAll()) { + CGF.Builder.CreateBr(CatchScope.getHandler(0).Block); + CGF.Builder.restoreIP(SavedIP); + return; + } + + // Test against each of the exception types we claim to catch. + for (unsigned I = 0, E = NumHandlers;; ++I) { + assert(I < E && "ran off end of handlers!"); + const EHCatchScope::Handler &Handler = CatchScope.getHandler(I); + CatchTypeInfo TypeInfo = Handler.Type; + if (!TypeInfo.RTTI) + TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy); + + // Figure out the next block. + llvm::BasicBlock *NextBlock; + + bool EmitNextBlock = false, NextIsEnd = false; + + // If this is the last handler, we're at the end, and the next block is a + // block that contains a call to the rethrow function, so we can unwind to + // the enclosing EH scope. The call itself will be generated later. + if (I + 1 == E) { + NextBlock = CGF.createBasicBlock("rethrow"); + EmitNextBlock = true; + NextIsEnd = true; + + // If the next handler is a catch-all, we're at the end, and the + // next block is that handler. + } else if (CatchScope.getHandler(I + 1).isCatchAll()) { + NextBlock = CatchScope.getHandler(I + 1).Block; + NextIsEnd = true; + + // Otherwise, we're not at the end and we need a new block. + } else { + NextBlock = CGF.createBasicBlock("catch.fallthrough"); + EmitNextBlock = true; + } + + // Figure out the catch type's index in the LSDA's type table. + llvm::CallInst *TypeIndex = CGF.Builder.CreateCall(TypeIDFn, TypeInfo.RTTI); + TypeIndex->setDoesNotThrow(); + + llvm::Value *MatchesTypeIndex = + CGF.Builder.CreateICmpEQ(Selector, TypeIndex, "matches"); + CGF.Builder.CreateCondBr(MatchesTypeIndex, Handler.Block, NextBlock); + + if (EmitNextBlock) + CGF.EmitBlock(NextBlock); + if (NextIsEnd) + break; + } + + CGF.Builder.restoreIP(SavedIP); +} + /// Emit the structure of the dispatch block for the given catch scope. /// It is an invariant that the dispatch block already exists. static void emitCatchDispatchBlock(CodeGenFunction &CGF, EHCatchScope &catchScope) { + if (EHPersonality::get(CGF).isWasmPersonality()) + return emitWasmCatchPadBlock(CGF, catchScope); if (EHPersonality::get(CGF).usesFuncletPads()) return emitCatchPadBlock(CGF, catchScope); @@ -1017,6 +1134,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { unsigned NumHandlers = S.getNumHandlers(); EHCatchScope &CatchScope = cast<EHCatchScope>(*EHStack.begin()); assert(CatchScope.getNumHandlers() == NumHandlers); + llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock(); // If the catch was not required, bail out now. if (!CatchScope.hasEHBranches()) { @@ -1049,6 +1167,22 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { doImplicitRethrow = isa<CXXDestructorDecl>(CurCodeDecl) || isa<CXXConstructorDecl>(CurCodeDecl); + // Wasm uses Windows-style EH instructions, but merges all catch clauses into + // one big catchpad. So we save the old funclet pad here before we traverse + // each catch handler. + SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( + CurrentFuncletPad); + llvm::BasicBlock *WasmCatchStartBlock = nullptr; + if (EHPersonality::get(*this).isWasmPersonality()) { + auto *CatchSwitch = + cast<llvm::CatchSwitchInst>(DispatchBlock->getFirstNonPHI()); + WasmCatchStartBlock = CatchSwitch->hasUnwindDest() + ? CatchSwitch->getSuccessor(1) + : CatchSwitch->getSuccessor(0); + auto *CPI = cast<llvm::CatchPadInst>(WasmCatchStartBlock->getFirstNonPHI()); + CurrentFuncletPad = CPI; + } + // Perversely, we emit the handlers backwards precisely because we // want them to appear in source order. In all of these cases, the // catch block will have exactly one predecessor, which will be a @@ -1056,7 +1190,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // a catch-all, one of the dispatch blocks will branch to two // different handlers, and EmitBlockAfterUses will cause the second // handler to be moved before the first. + bool HasCatchAll = false; for (unsigned I = NumHandlers; I != 0; --I) { + HasCatchAll |= Handlers[I - 1].isCatchAll(); llvm::BasicBlock *CatchBlock = Handlers[I-1].Block; EmitBlockAfterUses(CatchBlock); @@ -1101,6 +1237,27 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { Builder.CreateBr(ContBB); } + // Because in wasm we merge all catch clauses into one big catchpad, in case + // none of the types in catch handlers matches after we test against each of + // them, we should unwind to the next EH enclosing scope. We generate a call + // to rethrow function here to do that. + if (EHPersonality::get(*this).isWasmPersonality() && !HasCatchAll) { + assert(WasmCatchStartBlock); + // Navigate for the "rethrow" block we created in emitWasmCatchPadBlock(). + // Wasm uses landingpad-style conditional branches to compare selectors, so + // we follow the false destination for each of the cond branches to reach + // the rethrow block. + llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock; + while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) { + auto *BI = cast<llvm::BranchInst>(TI); + assert(BI->isConditional()); + RethrowBlock = BI->getSuccessor(1); + } + assert(RethrowBlock != WasmCatchStartBlock && RethrowBlock->empty()); + Builder.SetInsertPoint(RethrowBlock); + CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true); + } + EmitBlock(ContBB); incrementProfileCounter(&S); } @@ -1334,23 +1491,59 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() { if (TerminateHandler) return TerminateHandler; - CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP(); - // Set up the terminate handler. This block is inserted at the very // end of the function by FinishFunction. TerminateHandler = createBasicBlock("terminate.handler"); + CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP(); Builder.SetInsertPoint(TerminateHandler); + llvm::Value *Exn = nullptr; + if (getLangOpts().CPlusPlus) + Exn = getExceptionFromSlot(); + llvm::CallInst *terminateCall = + CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn); + terminateCall->setDoesNotReturn(); + Builder.CreateUnreachable(); + + // Restore the saved insertion state. + Builder.restoreIP(SavedIP); + + return TerminateHandler; +} + +llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() { + assert(EHPersonality::get(*this).usesFuncletPads() && + "use getTerminateLandingPad for non-funclet EH"); + + llvm::BasicBlock *&TerminateFunclet = TerminateFunclets[CurrentFuncletPad]; + if (TerminateFunclet) + return TerminateFunclet; + + CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP(); + + // Set up the terminate handler. This block is inserted at the very + // end of the function by FinishFunction. + TerminateFunclet = createBasicBlock("terminate.handler"); + Builder.SetInsertPoint(TerminateFunclet); + + // Create the cleanuppad using the current parent pad as its token. Use 'none' + // if this is a top-level terminate scope, which is the common case. SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( CurrentFuncletPad); - if (EHPersonality::get(*this).usesFuncletPads()) { - llvm::Value *ParentPad = CurrentFuncletPad; - if (!ParentPad) - ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); - CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad); - } else { - if (getLangOpts().CPlusPlus) - Exn = getExceptionFromSlot(); + llvm::Value *ParentPad = CurrentFuncletPad; + if (!ParentPad) + ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); + CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad); + + // Emit the __std_terminate call. + llvm::Value *Exn = nullptr; + // In case of wasm personality, we need to pass the exception value to + // __clang_call_terminate function. + if (getLangOpts().CPlusPlus && + EHPersonality::get(*this).isWasmPersonality()) { + llvm::Value *GetExnFn = + CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); + Exn = Builder.CreateCall(GetExnFn, CurrentFuncletPad); } llvm::CallInst *terminateCall = CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn); @@ -1360,7 +1553,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() { // Restore the saved insertion state. Builder.restoreIP(SavedIP); - return TerminateHandler; + return TerminateFunclet; } llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) { diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index c7dc8337e19e..3097caacb31c 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -61,18 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. +Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, + CharUnits Align, + const Twine &Name, + llvm::Value *ArraySize) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); + Alloca->setAlignment(Align.getQuantity()); + return Address(Alloca, Align); +} + +/// CreateTempAlloca - This creates a alloca and inserts it into the entry +/// block. The alloca is casted to default address space if necessary. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, const Twine &Name, llvm::Value *ArraySize, - bool CastToDefaultAddrSpace) { - auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); - Alloca->setAlignment(Align.getQuantity()); - llvm::Value *V = Alloca; + Address *AllocaAddr) { + auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); + if (AllocaAddr) + *AllocaAddr = Alloca; + llvm::Value *V = Alloca.getPointer(); // Alloca always returns a pointer in alloca address space, which may // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { + if (getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); llvm::IRBuilderBase::InsertPointGuard IPG(Builder); // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, @@ -125,17 +137,26 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) { } Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, - bool CastToDefaultAddrSpace) { + Address *Alloca) { // FIXME: Should we prefer the preferred type alignment here? - return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, - CastToDefaultAddrSpace); + return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca); } Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, - const Twine &Name, - bool CastToDefaultAddrSpace) { - return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr, - CastToDefaultAddrSpace); + const Twine &Name, Address *Alloca) { + return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, + /*ArraySize=*/nullptr, Alloca); +} + +Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty, CharUnits Align, + const Twine &Name) { + return CreateTempAllocaWithoutCast(ConvertTypeForMem(Ty), Align, Name); +} + +Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty, + const Twine &Name) { + return CreateMemTempWithoutCast(Ty, getContext().getTypeAlignInChars(Ty), + Name); } /// EvaluateExprAsBool - Perform the usual unary conversions on the specified @@ -187,7 +208,7 @@ RValue CodeGenFunction::EmitAnyExpr(const Expr *E, llvm_unreachable("bad evaluation kind"); } -/// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will +/// EmitAnyExprToTemp - Similar to EmitAnyExpr(), however, the result will /// always be accessible even if no aggregate location is provided. RValue CodeGenFunction::EmitAnyExprToTemp(const Expr *E) { AggValueSlot AggSlot = AggValueSlot::ignored(); @@ -214,7 +235,8 @@ void CodeGenFunction::EmitAnyExprToMem(const Expr *E, EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals, AggValueSlot::IsDestructed_t(IsInit), AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsAliased_t(!IsInit))); + AggValueSlot::IsAliased_t(!IsInit), + AggValueSlot::MayOverlap)); return; } @@ -347,7 +369,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, static Address createReferenceTemporary(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, - const Expr *Inner) { + const Expr *Inner, + Address *Alloca = nullptr) { auto &TCG = CGF.getTargetHooks(); switch (M->getStorageDuration()) { case SD_FullExpression: @@ -380,7 +403,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, return Address(C, alignment); } } - return CGF.CreateMemTemp(Ty, "ref.tmp"); + return CGF.CreateMemTemp(Ty, "ref.tmp", Alloca); } case SD_Thread: case SD_Static: @@ -432,7 +455,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { E->getType().getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); break; } } @@ -456,7 +480,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { } // Create and initialize the reference temporary. - Address Object = createReferenceTemporary(*this, M, E); + Address Alloca = Address::invalid(); + Address Object = createReferenceTemporary(*this, M, E, &Alloca); if (auto *Var = dyn_cast<llvm::GlobalVariable>( Object.getPointer()->stripPointerCasts())) { Object = Address(llvm::ConstantExpr::getBitCast( @@ -475,13 +500,13 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { case SD_Automatic: case SD_FullExpression: if (auto *Size = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Object.getElementType()), - Object.getPointer())) { + CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), + Alloca.getPointer())) { if (M->getStorageDuration() == SD_Automatic) pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, - Object, Size); + Alloca, Size); else - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object, + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca, Size); } break; @@ -873,7 +898,7 @@ static llvm::Value *getArrayIndexingBound( if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) return CGF.Builder.getInt(CAT->getSize()); else if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) - return CGF.getVLASize(VAT).first; + return CGF.getVLASize(VAT).NumElts; // Ignore pass_object_size here. It's not applicable on decayed pointers. } } @@ -1034,8 +1059,12 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Derived-to-base conversions. case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo, - TBAAInfo); + // TODO: Support accesses to members of base classes in TBAA. For now, we + // conservatively pretend that the complete object is of the base class + // type. + if (TBAAInfo) + *TBAAInfo = CGM.getTBAAAccessInfo(E->getType()); + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo); auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl(); return GetAddressOfBaseClass(Addr, Derived, CE->path_begin(), CE->path_end(), @@ -1785,7 +1814,7 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { return RValue::get(Vec); } -/// @brief Generates lvalue for partial ext_vector access. +/// Generates lvalue for partial ext_vector access. Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { Address VectorAddress = LV.getExtVectorAddress(); const VectorType *ExprVT = LV.getType()->getAs<VectorType>(); @@ -1807,7 +1836,7 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { return VectorBasePtrPlusIx; } -/// @brief Load of global gamed gegisters are always calls to intrinsics. +/// Load of global gamed gegisters are always calls to intrinsics. RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) { assert((LV.getType()->isIntegerType() || LV.getType()->isPointerType()) && "Bad type for register variable"); @@ -2067,7 +2096,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, Dst.isVolatileQualified()); } -/// @brief Store of global named registers are always calls to intrinsics. +/// Store of global named registers are always calls to intrinsics. void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) { assert((Dst.getType()->isIntegerType() || Dst.getType()->isPointerType()) && "Bad type for register variable"); @@ -2206,6 +2235,22 @@ static LValue EmitThreadPrivateVarDeclLValue( return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } +static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF, + const VarDecl *VD, QualType T) { + for (const auto *D : VD->redecls()) { + if (!VD->hasAttrs()) + continue; + if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) + if (Attr->getMapType() == OMPDeclareTargetDeclAttr::MT_Link) { + QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); + Address Addr = + CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>()); + } + } + return Address::invalid(); +} + Address CodeGenFunction::EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo, @@ -2255,6 +2300,13 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, if (VD->getTLSKind() == VarDecl::TLS_Dynamic && CGF.CGM.getCXXABI().usesThreadWrapperFunction()) return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T); + // Check if the variable is marked as declare target with link clause in + // device codegen. + if (CGF.getLangOpts().OpenMPIsDevice) { + Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T); + if (Addr.isValid()) + return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); + } llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD); llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); @@ -2263,9 +2315,11 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, Address Addr(V, Alignment); // Emit reference to the private copy of the variable if it is an OpenMP // threadprivate variable. - if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) + if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd && + VD->hasAttr<OMPThreadPrivateDeclAttr>()) { return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy, E->getExprLoc()); + } LValue LV = VD->getType()->isReferenceType() ? CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), AlignmentSource::Decl) : @@ -2446,7 +2500,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // Check for OpenMP threadprivate variables. - if (getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) { + if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd && + VD->hasAttr<OMPThreadPrivateDeclAttr>()) { return EmitThreadPrivateVarDeclLValue( *this, VD, T, addr, getTypes().ConvertTypeForMem(VD->getType()), E->getExprLoc()); @@ -2579,7 +2634,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { StringRef NameItems[] = { PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName}; std::string GVName = llvm::join(NameItems, NameItems + 2, "."); - if (auto *BD = dyn_cast<BlockDecl>(CurCodeDecl)) { + if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) { std::string Name = SL->getString(); if (!Name.empty()) { unsigned Discriminator = @@ -2678,7 +2733,7 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) { return Builder.CreatePtrToInt(V, TargetTy); } -/// \brief Emit a representation of a SourceLocation for passing to a handler +/// Emit a representation of a SourceLocation for passing to a handler /// in a sanitizer runtime library. The format for this data is: /// \code /// struct SourceLocation { @@ -2737,7 +2792,7 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) { } namespace { -/// \brief Specify under what conditions this check can be recovered +/// Specify under what conditions this check can be recovered enum class CheckRecoverableKind { /// Always terminate program execution if this check fails. Unrecoverable, @@ -2945,6 +3000,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck( bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind); llvm::CallInst *CheckCall; + llvm::Constant *SlowPathFn; if (WithDiag) { llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); auto *InfoPtr = @@ -2953,20 +3009,20 @@ void CodeGenFunction::EmitCfiSlowPathCheck( InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction( + SlowPathFn = CGM.getModule().getOrInsertFunction( "__cfi_slowpath_diag", llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false)); CheckCall = Builder.CreateCall( - SlowPathDiagFn, - {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)}); + SlowPathFn, {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)}); } else { - llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction( + SlowPathFn = CGM.getModule().getOrInsertFunction( "__cfi_slowpath", llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false)); CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); } + CGM.setDSOLocal(cast<llvm::GlobalValue>(SlowPathFn->stripPointerCasts())); CheckCall->setDoesNotThrow(); EmitBlock(Cont); @@ -2980,6 +3036,7 @@ void CodeGenFunction::EmitCfiCheckStub() { llvm::Function *F = llvm::Function::Create( llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false), llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M); + CGM.setDSOLocal(F); llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F); // FIXME: consider emitting an intrinsic call like // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2) @@ -3018,6 +3075,11 @@ void CodeGenFunction::EmitCfiCheckFail() { StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args, SourceLocation()); + // This function should not be affected by blacklist. This function does + // not have a source location, but "src:*" would still apply. Revert any + // changes to SanOpts made in StartFunction. + SanOpts = CGM.getLangOpts().Sanitize; + llvm::Value *Data = EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false, CGM.getContext().VoidPtrTy, ArgData.getLocation()); @@ -3306,7 +3368,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, auto *Idx = EmitIdxAfterBase(/*Promote*/true); // The element count here is the total number of non-VLA elements. - llvm::Value *numElements = getVLASize(vla).first; + llvm::Value *numElements = getVLASize(vla).NumElts; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to @@ -3540,7 +3602,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, TBAAInfo, BaseTy, VLA->getElementType(), IsLowerBound); // The element count here is the total number of non-VLA elements. - llvm::Value *NumElements = getVLASize(VLA).first; + llvm::Value *NumElements = getVLASize(VLA).NumElts; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to @@ -3808,6 +3870,18 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, } Address addr = base.getAddress(); + if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) { + if (CGM.getCodeGenOpts().StrictVTablePointers && + ClassDef->isDynamicClass()) { + // Getting to any field of dynamic object requires stripping dynamic + // information provided by invariant.group. This is because accessing + // fields may leak the real address of dynamic object, which could result + // in miscompilation when leaked pointer would be compared. + auto *stripped = Builder.CreateStripInvariantGroup(addr.getPointer()); + addr = Address(stripped, addr.getAlignment()); + } + } + unsigned RecordCVR = base.getVRQualifiers(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. @@ -3816,7 +3890,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, hasAnyVptr(FieldType, getContext())) // Because unions can easily skip invariant.barriers, we need to add // a barrier every time CXXRecord field with vptr is referenced. - addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()), + addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()), addr.getAlignment()); } else { // For structs, we GEP to the field that the record layout suggests. @@ -4160,7 +4234,35 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) { assert(OpaqueValueMappingData::shouldBindAsLValue(e)); - return getOpaqueLValueMapping(e); + return getOrCreateOpaqueLValueMapping(e); +} + +LValue +CodeGenFunction::getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e) { + assert(OpaqueValueMapping::shouldBindAsLValue(e)); + + llvm::DenseMap<const OpaqueValueExpr*,LValue>::iterator + it = OpaqueLValues.find(e); + + if (it != OpaqueLValues.end()) + return it->second; + + assert(e->isUnique() && "LValue for a nonunique OVE hasn't been emitted"); + return EmitLValue(e->getSourceExpr()); +} + +RValue +CodeGenFunction::getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e) { + assert(!OpaqueValueMapping::shouldBindAsLValue(e)); + + llvm::DenseMap<const OpaqueValueExpr*,RValue>::iterator + it = OpaqueRValues.find(e); + + if (it != OpaqueRValues.end()) + return it->second; + + assert(e->isUnique() && "RValue for a nonunique OVE hasn't been emitted"); + return EmitAnyExpr(e->getSourceExpr()); } RValue CodeGenFunction::EmitRValueForField(LValue LV, @@ -4476,8 +4578,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee CalleeType = getContext().getCanonicalType(CalleeType); - const auto *FnType = - cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType()); + auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType(); CGCallee Callee = OrigCallee; @@ -4486,8 +4587,12 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee if (llvm::Constant *PrefixSig = CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { SanitizerScope SanScope(this); + // Remove any (C++17) exception specifications, to allow calling e.g. a + // noexcept function through a non-noexcept pointer. + auto ProtoTy = + getContext().getFunctionTypeWithExceptionSpec(PointeeType, EST_None); llvm::Constant *FTRTTIConst = - CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true); + CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true); llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty}; llvm::StructType *PrefixStructTy = llvm::StructType::get( CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true); @@ -4527,6 +4632,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee } } + const auto *FnType = cast<FunctionType>(PointeeType); + // If we are checking indirect calls and this call is indirect, check that the // function pointer is a member of the bit set for the function type. if (SanOpts.has(SanitizerKind::CFIICall) && @@ -4707,6 +4814,12 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF, // If this semantic expression is an opaque value, bind it // to the result of its source expression. if (const auto *ov = dyn_cast<OpaqueValueExpr>(semantic)) { + // Skip unique OVEs. + if (ov->isUnique()) { + assert(ov != resultExpr && + "A unique OVE cannot be used as the result expression"); + continue; + } // If this is the result expression, we may need to evaluate // directly into the slot. diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 0f05cab66d7e..291740478329 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "CodeGenFunction.h" +#include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" @@ -22,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" using namespace clang; using namespace CodeGen; @@ -36,23 +39,6 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> { AggValueSlot Dest; bool IsResultUnused; - /// We want to use 'dest' as the return slot except under two - /// conditions: - /// - The destination slot requires garbage collection, so we - /// need to use the GC API. - /// - The destination slot is potentially aliased. - bool shouldUseDestForReturnSlot() const { - return !(Dest.requiresGCollection() || Dest.isPotentiallyAliased()); - } - - ReturnValueSlot getReturnValueSlot() const { - if (!shouldUseDestForReturnSlot()) - return ReturnValueSlot(); - - return ReturnValueSlot(Dest.getAddress(), Dest.isVolatile(), - IsResultUnused); - } - AggValueSlot EnsureSlot(QualType T) { if (!Dest.isIgnored()) return Dest; return CGF.CreateAggTemp(T, "agg.tmp.ensured"); @@ -62,6 +48,15 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> { Dest = CGF.CreateAggTemp(T, "agg.tmp.ensured"); } + // Calls `Fn` with a valid return value slot, potentially creating a temporary + // to do so. If a temporary is created, an appropriate copy into `Dest` will + // be emitted, as will lifetime markers. + // + // The given function should take a ReturnValueSlot, and return an RValue that + // points to said slot. + void withReturnValueSlot(const Expr *E, + llvm::function_ref<RValue(ReturnValueSlot)> Fn); + public: AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused) : CGF(cgf), Builder(CGF.Builder), Dest(Dest), @@ -76,8 +71,15 @@ public: /// then loads the result into DestPtr. void EmitAggLoadOfLValue(const Expr *E); + enum ExprValueKind { + EVK_RValue, + EVK_NonRValue + }; + /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. - void EmitFinalDestCopy(QualType type, const LValue &src); + /// SrcIsRValue is true if source comes from an RValue. + void EmitFinalDestCopy(QualType type, const LValue &src, + ExprValueKind SrcValueKind = EVK_NonRValue); void EmitFinalDestCopy(QualType type, RValue src); void EmitCopy(QualType type, const AggValueSlot &dest, const AggValueSlot &src); @@ -85,7 +87,7 @@ public: void EmitMoveFromReturnSlot(const Expr *E, RValue Src); void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, - QualType elementType, InitListExpr *E); + QualType ArrayQTy, InitListExpr *E); AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) { if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T)) @@ -144,6 +146,7 @@ public: void VisitPointerToDataMemberBinaryOperator(const BinaryOperator *BO); void VisitBinAssign(const BinaryOperator *E); void VisitBinComma(const BinaryOperator *E); + void VisitBinCmp(const BinaryOperator *E); void VisitObjCMessageExpr(ObjCMessageExpr *E); void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) { @@ -217,7 +220,7 @@ void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) { EmitFinalDestCopy(E->getType(), LV); } -/// \brief True if the given aggregate type requires special GC API calls. +/// True if the given aggregate type requires special GC API calls. bool AggExprEmitter::TypeRequiresGCollection(QualType T) { // Only record types have members that might require garbage collection. const RecordType *RecordTy = T->getAs<RecordType>(); @@ -234,38 +237,78 @@ bool AggExprEmitter::TypeRequiresGCollection(QualType T) { return Record->hasObjectMember(); } -/// \brief Perform the final move to DestPtr if for some reason -/// getReturnValueSlot() didn't use it directly. -/// -/// The idea is that you do something like this: -/// RValue Result = EmitSomething(..., getReturnValueSlot()); -/// EmitMoveFromReturnSlot(E, Result); -/// -/// If nothing interferes, this will cause the result to be emitted -/// directly into the return value slot. Otherwise, a final move -/// will be performed. -void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue src) { - if (shouldUseDestForReturnSlot()) { - // Logically, Dest.getAddr() should equal Src.getAggregateAddr(). - // The possibility of undef rvalues complicates that a lot, - // though, so we can't really assert. - return; +void AggExprEmitter::withReturnValueSlot( + const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) { + QualType RetTy = E->getType(); + bool RequiresDestruction = + Dest.isIgnored() && + RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct; + + // If it makes no observable difference, save a memcpy + temporary. + // + // We need to always provide our own temporary if destruction is required. + // Otherwise, EmitCall will emit its own, notice that it's "unused", and end + // its lifetime before we have the chance to emit a proper destructor call. + bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() || + (RequiresDestruction && !Dest.getAddress().isValid()); + + Address RetAddr = Address::invalid(); + Address RetAllocaAddr = Address::invalid(); + + EHScopeStack::stable_iterator LifetimeEndBlock; + llvm::Value *LifetimeSizePtr = nullptr; + llvm::IntrinsicInst *LifetimeStartInst = nullptr; + if (!UseTemp) { + RetAddr = Dest.getAddress(); + } else { + RetAddr = CGF.CreateMemTemp(RetTy, "tmp", &RetAllocaAddr); + uint64_t Size = + CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy)); + LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAllocaAddr.getPointer()); + if (LifetimeSizePtr) { + LifetimeStartInst = + cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint())); + assert(LifetimeStartInst->getIntrinsicID() == + llvm::Intrinsic::lifetime_start && + "Last insertion wasn't a lifetime.start?"); + + CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>( + NormalEHLifetimeMarker, RetAllocaAddr, LifetimeSizePtr); + LifetimeEndBlock = CGF.EHStack.stable_begin(); + } } - // Otherwise, copy from there to the destination. - assert(Dest.getPointer() != src.getAggregatePointer()); - EmitFinalDestCopy(E->getType(), src); + RValue Src = + EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused)); + + if (RequiresDestruction) + CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy); + + if (!UseTemp) + return; + + assert(Dest.getPointer() != Src.getAggregatePointer()); + EmitFinalDestCopy(E->getType(), Src); + + if (!RequiresDestruction && LifetimeStartInst) { + // If there's no dtor to run, the copy was the last use of our temporary. + // Since we're not guaranteed to be in an ExprWithCleanups, clean up + // eagerly. + CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst); + CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAllocaAddr.getPointer()); + } } /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src) { assert(src.isAggregate() && "value must be aggregate value!"); LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddress(), type); - EmitFinalDestCopy(type, srcLV); + EmitFinalDestCopy(type, srcLV, EVK_RValue); } /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. -void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) { +void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src, + ExprValueKind SrcValueKind) { // If Dest is ignored, then we're evaluating an aggregate expression // in a context that doesn't care about the result. Note that loads // from volatile l-values force the existence of a non-ignored @@ -273,9 +316,32 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) { if (Dest.isIgnored()) return; + // Copy non-trivial C structs here. + LValue DstLV = CGF.MakeAddrLValue( + Dest.getAddress(), Dest.isVolatile() ? type.withVolatile() : type); + + if (SrcValueKind == EVK_RValue) { + if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct) { + if (Dest.isPotentiallyAliased()) + CGF.callCStructMoveAssignmentOperator(DstLV, src); + else + CGF.callCStructMoveConstructor(DstLV, src); + return; + } + } else { + if (type.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) { + if (Dest.isPotentiallyAliased()) + CGF.callCStructCopyAssignmentOperator(DstLV, src); + else + CGF.callCStructCopyConstructor(DstLV, src); + return; + } + } + AggValueSlot srcAgg = AggValueSlot::forLValue(src, AggValueSlot::IsDestructed, - needsGC(type), AggValueSlot::IsAliased); + needsGC(type), AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); EmitCopy(type, Dest, srcAgg); } @@ -286,7 +352,7 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) { void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest, const AggValueSlot &src) { if (dest.requiresGCollection()) { - CharUnits sz = CGF.getContext().getTypeSizeInChars(type); + CharUnits sz = dest.getPreferredSize(CGF.getContext(), type); llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity()); CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF, dest.getAddress(), @@ -298,11 +364,13 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest, // If the result of the assignment is used, copy the LHS there also. // It's volatile if either side is. Use the minimum alignment of // the two sides. - CGF.EmitAggregateCopy(dest.getAddress(), src.getAddress(), type, + LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type); + LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type); + CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(), dest.isVolatile() || src.isVolatile()); } -/// \brief Emit the initializer for a std::initializer_list initialized with a +/// Emit the initializer for a std::initializer_list initialized with a /// real initializer list. void AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) { @@ -367,7 +435,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) { } } -/// \brief Determine if E is a trivial array filler, that is, one that is +/// Determine if E is a trivial array filler, that is, one that is /// equivalent to zero-initialization. static bool isTrivialFiller(Expr *E) { if (!E) @@ -390,14 +458,17 @@ static bool isTrivialFiller(Expr *E) { return false; } -/// \brief Emit initialization of an array from an initializer list. +/// Emit initialization of an array from an initializer list. void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, - QualType elementType, InitListExpr *E) { + QualType ArrayQTy, InitListExpr *E) { uint64_t NumInitElements = E->getNumInits(); uint64_t NumArrayElements = AType->getNumElements(); assert(NumInitElements <= NumArrayElements); + QualType elementType = + CGF.getContext().getAsArrayType(ArrayQTy)->getElementType(); + // DestPtr is an array*. Construct an elementType* by drilling // down a level. llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0); @@ -409,6 +480,29 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CharUnits elementAlign = DestPtr.getAlignment().alignmentOfArrayElement(elementSize); + // Consider initializing the array by copying from a global. For this to be + // more efficient than per-element initialization, the size of the elements + // with explicit initializers should be large enough. + if (NumInitElements * elementSize.getQuantity() > 16 && + elementType.isTriviallyCopyableType(CGF.getContext())) { + CodeGen::CodeGenModule &CGM = CGF.CGM; + ConstantEmitter Emitter(CGM); + LangAS AS = ArrayQTy.getAddressSpace(); + if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) { + auto GV = new llvm::GlobalVariable( + CGM.getModule(), C->getType(), + CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true), + llvm::GlobalValue::PrivateLinkage, C, "constinit", + /* InsertBefore= */ nullptr, llvm::GlobalVariable::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(AS)); + Emitter.finalize(GV); + CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy); + GV->setAlignment(Align.getQuantity()); + EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align)); + return; + } + } + // Exception safety requires us to destroy all the // already-constructed members if an initializer throws. // For that, we'll need an EH cleanup. @@ -540,7 +634,11 @@ void AggExprEmitter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E){ } void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) { - EmitFinalDestCopy(e->getType(), CGF.getOpaqueLValueMapping(e)); + // If this is a unique OVE, just visit its source expression. + if (e->isUnique()) + Visit(e->getSourceExpr()); + else + EmitFinalDestCopy(e->getType(), CGF.getOrCreateOpaqueLValueMapping(e)); } void @@ -586,12 +684,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { CGF.EmitDynamicCast(LV.getAddress(), cast<CXXDynamicCastExpr>(E)); else CGF.CGM.ErrorUnsupported(E, "non-simple lvalue dynamic_cast"); - + if (!Dest.isIgnored()) CGF.CGM.ErrorUnsupported(E, "lvalue dynamic_cast with a destination"); break; } - + case CK_ToUnion: { // Evaluate even if the destination is ignored. if (Dest.isIgnored()) { @@ -651,7 +749,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { if (isToAtomic) { AggValueSlot valueDest = Dest; if (!valueDest.isIgnored() && CGF.CGM.isPaddedAtomicType(atomicType)) { - // Zero-initialize. (Strictly speaking, we only need to intialize + // Zero-initialize. (Strictly speaking, we only need to initialize // the padding at the end, but this is simpler.) if (!Dest.isZeroed()) CGF.EmitNullInitialization(Dest.getAddress(), atomicType); @@ -665,6 +763,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { valueDest.isExternallyDestructed(), valueDest.requiresGCollection(), valueDest.isPotentiallyAliased(), + AggValueSlot::DoesNotOverlap, AggValueSlot::IsZeroed); } @@ -762,13 +861,15 @@ void AggExprEmitter::VisitCallExpr(const CallExpr *E) { return; } - RValue RV = CGF.EmitCallExpr(E, getReturnValueSlot()); - EmitMoveFromReturnSlot(E, RV); + withReturnValueSlot(E, [&](ReturnValueSlot Slot) { + return CGF.EmitCallExpr(E, Slot); + }); } void AggExprEmitter::VisitObjCMessageExpr(ObjCMessageExpr *E) { - RValue RV = CGF.EmitObjCMessageExpr(E, getReturnValueSlot()); - EmitMoveFromReturnSlot(E, RV); + withReturnValueSlot(E, [&](ReturnValueSlot Slot) { + return CGF.EmitObjCMessageExpr(E, Slot); + }); } void AggExprEmitter::VisitBinComma(const BinaryOperator *E) { @@ -781,6 +882,150 @@ void AggExprEmitter::VisitStmtExpr(const StmtExpr *E) { CGF.EmitCompoundStmt(*E->getSubStmt(), true, Dest); } +enum CompareKind { + CK_Less, + CK_Greater, + CK_Equal, +}; + +static llvm::Value *EmitCompare(CGBuilderTy &Builder, CodeGenFunction &CGF, + const BinaryOperator *E, llvm::Value *LHS, + llvm::Value *RHS, CompareKind Kind, + const char *NameSuffix = "") { + QualType ArgTy = E->getLHS()->getType(); + if (const ComplexType *CT = ArgTy->getAs<ComplexType>()) + ArgTy = CT->getElementType(); + + if (const auto *MPT = ArgTy->getAs<MemberPointerType>()) { + assert(Kind == CK_Equal && + "member pointers may only be compared for equality"); + return CGF.CGM.getCXXABI().EmitMemberPointerComparison( + CGF, LHS, RHS, MPT, /*IsInequality*/ false); + } + + // Compute the comparison instructions for the specified comparison kind. + struct CmpInstInfo { + const char *Name; + llvm::CmpInst::Predicate FCmp; + llvm::CmpInst::Predicate SCmp; + llvm::CmpInst::Predicate UCmp; + }; + CmpInstInfo InstInfo = [&]() -> CmpInstInfo { + using FI = llvm::FCmpInst; + using II = llvm::ICmpInst; + switch (Kind) { + case CK_Less: + return {"cmp.lt", FI::FCMP_OLT, II::ICMP_SLT, II::ICMP_ULT}; + case CK_Greater: + return {"cmp.gt", FI::FCMP_OGT, II::ICMP_SGT, II::ICMP_UGT}; + case CK_Equal: + return {"cmp.eq", FI::FCMP_OEQ, II::ICMP_EQ, II::ICMP_EQ}; + } + llvm_unreachable("Unrecognised CompareKind enum"); + }(); + + if (ArgTy->hasFloatingRepresentation()) + return Builder.CreateFCmp(InstInfo.FCmp, LHS, RHS, + llvm::Twine(InstInfo.Name) + NameSuffix); + if (ArgTy->isIntegralOrEnumerationType() || ArgTy->isPointerType()) { + auto Inst = + ArgTy->hasSignedIntegerRepresentation() ? InstInfo.SCmp : InstInfo.UCmp; + return Builder.CreateICmp(Inst, LHS, RHS, + llvm::Twine(InstInfo.Name) + NameSuffix); + } + + llvm_unreachable("unsupported aggregate binary expression should have " + "already been handled"); +} + +void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) { + using llvm::BasicBlock; + using llvm::PHINode; + using llvm::Value; + assert(CGF.getContext().hasSameType(E->getLHS()->getType(), + E->getRHS()->getType())); + const ComparisonCategoryInfo &CmpInfo = + CGF.getContext().CompCategories.getInfoForType(E->getType()); + assert(CmpInfo.Record->isTriviallyCopyable() && + "cannot copy non-trivially copyable aggregate"); + + QualType ArgTy = E->getLHS()->getType(); + + // TODO: Handle comparing these types. + if (ArgTy->isVectorType()) + return CGF.ErrorUnsupported( + E, "aggregate three-way comparison with vector arguments"); + if (!ArgTy->isIntegralOrEnumerationType() && !ArgTy->isRealFloatingType() && + !ArgTy->isNullPtrType() && !ArgTy->isPointerType() && + !ArgTy->isMemberPointerType() && !ArgTy->isAnyComplexType()) { + return CGF.ErrorUnsupported(E, "aggregate three-way comparison"); + } + bool IsComplex = ArgTy->isAnyComplexType(); + + // Evaluate the operands to the expression and extract their values. + auto EmitOperand = [&](Expr *E) -> std::pair<Value *, Value *> { + RValue RV = CGF.EmitAnyExpr(E); + if (RV.isScalar()) + return {RV.getScalarVal(), nullptr}; + if (RV.isAggregate()) + return {RV.getAggregatePointer(), nullptr}; + assert(RV.isComplex()); + return RV.getComplexVal(); + }; + auto LHSValues = EmitOperand(E->getLHS()), + RHSValues = EmitOperand(E->getRHS()); + + auto EmitCmp = [&](CompareKind K) { + Value *Cmp = EmitCompare(Builder, CGF, E, LHSValues.first, RHSValues.first, + K, IsComplex ? ".r" : ""); + if (!IsComplex) + return Cmp; + assert(K == CompareKind::CK_Equal); + Value *CmpImag = EmitCompare(Builder, CGF, E, LHSValues.second, + RHSValues.second, K, ".i"); + return Builder.CreateAnd(Cmp, CmpImag, "and.eq"); + }; + auto EmitCmpRes = [&](const ComparisonCategoryInfo::ValueInfo *VInfo) { + return Builder.getInt(VInfo->getIntValue()); + }; + + Value *Select; + if (ArgTy->isNullPtrType()) { + Select = EmitCmpRes(CmpInfo.getEqualOrEquiv()); + } else if (CmpInfo.isEquality()) { + Select = Builder.CreateSelect( + EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()), + EmitCmpRes(CmpInfo.getNonequalOrNonequiv()), "sel.eq"); + } else if (!CmpInfo.isPartial()) { + Value *SelectOne = + Builder.CreateSelect(EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()), + EmitCmpRes(CmpInfo.getGreater()), "sel.lt"); + Select = Builder.CreateSelect(EmitCmp(CK_Equal), + EmitCmpRes(CmpInfo.getEqualOrEquiv()), + SelectOne, "sel.eq"); + } else { + Value *SelectEq = Builder.CreateSelect( + EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()), + EmitCmpRes(CmpInfo.getUnordered()), "sel.eq"); + Value *SelectGT = Builder.CreateSelect(EmitCmp(CK_Greater), + EmitCmpRes(CmpInfo.getGreater()), + SelectEq, "sel.gt"); + Select = Builder.CreateSelect( + EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()), SelectGT, "sel.lt"); + } + // Create the return value in the destination slot. + EnsureDest(E->getType()); + LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType()); + + // Emit the address of the first (and only) field in the comparison category + // type, and initialize it from the constant integer value selected above. + LValue FieldLV = CGF.EmitLValueForFieldInitialization( + DestLV, *CmpInfo.Record->field_begin()); + CGF.EmitStoreThroughLValue(RValue::get(Select), FieldLV, /*IsInit*/ true); + + // All done! The result is in the Dest slot. +} + void AggExprEmitter::VisitBinaryOperator(const BinaryOperator *E) { if (E->getOpcode() == BO_PtrMemD || E->getOpcode() == BO_PtrMemI) VisitPointerToDataMemberBinaryOperator(E); @@ -890,7 +1135,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { EmitCopy(E->getLHS()->getType(), AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased), + AggValueSlot::IsAliased, + AggValueSlot::MayOverlap), Dest); return; } @@ -911,7 +1157,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { AggValueSlot LHSSlot = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased); + AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); // A non-volatile aggregate destination might have volatile member. if (!LHSSlot.isVolatile() && CGF.hasVolatileMember(E->getLHS()->getType())) @@ -1089,6 +1336,7 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) { AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap, Dest.isZeroed())); return; case TEK_Scalar: @@ -1156,11 +1404,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Handle initialization of an array. if (E->getType()->isArrayType()) { - QualType elementType = - CGF.getContext().getAsArrayType(E->getType())->getElementType(); - auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType()); - EmitArrayInit(Dest.getAddress(), AType, elementType, E); + EmitArrayInit(Dest.getAddress(), AType, E->getType(), E); return; } @@ -1190,11 +1435,12 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { Address V = CGF.GetAddressOfDirectBaseInCompleteClass( Dest.getAddress(), CXXRD, BaseRD, /*isBaseVirtual*/ false); - AggValueSlot AggSlot = - AggValueSlot::forAddr(V, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot AggSlot = AggValueSlot::forAddr( + V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); if (QualType::DestructionKind dtorKind = @@ -1375,7 +1621,9 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, // If the subexpression is an ArrayInitLoopExpr, share its cleanup. auto elementSlot = AggValueSlot::forLValue( elementLV, AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased); + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); AggExprEmitter(CGF, elementSlot, false) .VisitArrayInitLoopExpr(InnerLoop, outerBegin); } else @@ -1425,6 +1673,8 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { // If this is an initlist expr, sum up the size of sizes of the (present) // elements. If this is something weird, assume the whole thing is non-zero. const InitListExpr *ILE = dyn_cast<InitListExpr>(E); + while (ILE && ILE->isTransparent()) + ILE = dyn_cast<InitListExpr>(ILE->getInit(0)); if (!ILE || !CGF.getTypes().isZeroInitializable(ILE->getType())) return CGF.getContext().getTypeSizeInChars(E->getType()); @@ -1491,7 +1741,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E, } // If the type is 16-bytes or smaller, prefer individual stores over memset. - CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType()); + CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType()); if (Size <= CharUnits::fromQuantity(16)) return; @@ -1537,16 +1787,42 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { LValue LV = MakeAddrLValue(Temp, E->getType()); EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return LV; } -void CodeGenFunction::EmitAggregateCopy(Address DestPtr, - Address SrcPtr, QualType Ty, - bool isVolatile, - bool isAssignment) { +AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit( + const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) { + // Virtual bases are initialized first, in address order, so there's never + // any overlap during their initialization. + // + // FIXME: Under P0840, this is no longer true: the tail padding of a vbase + // of a field could be reused by a vbase of a containing class. + if (IsVirtual) + return AggValueSlot::DoesNotOverlap; + + // If the base class is laid out entirely within the nvsize of the derived + // class, its tail padding cannot yet be initialized, so we can issue + // stores at the full width of the base class. + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + if (Layout.getBaseClassOffset(BaseRD) + + getContext().getASTRecordLayout(BaseRD).getSize() <= + Layout.getNonVirtualSize()) + return AggValueSlot::DoesNotOverlap; + + // The tail padding may contain values we need to preserve. + return AggValueSlot::MayOverlap; +} + +void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, + AggValueSlot::Overlap_t MayOverlap, + bool isVolatile) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); + Address DestPtr = Dest.getAddress(); + Address SrcPtr = Src.getAddress(); + if (getLangOpts().CPlusPlus) { if (const RecordType *RT = Ty->getAs<RecordType>()) { CXXRecordDecl *Record = cast<CXXRecordDecl>(RT->getDecl()); @@ -1562,7 +1838,7 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr, return; } } - + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first @@ -1574,12 +1850,11 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr, // implementation handles this case safely. If there is a libc that does not // safely handle this, we can add a target hook. - // Get data size info for this aggregate. If this is an assignment, - // don't copy the tail padding, because we might be assigning into a - // base subobject where the tail padding is claimed. Otherwise, - // copying it is fine. + // Get data size info for this aggregate. Don't copy the tail padding if this + // might be a potentially-overlapping subobject, since the tail padding might + // be occupied by a different object. Otherwise, copying it is fine. std::pair<CharUnits, CharUnits> TypeInfo; - if (isAssignment) + if (MayOverlap) TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty); else TypeInfo = getContext().getTypeInfoInChars(Ty); @@ -1591,22 +1866,11 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr, getContext().getAsArrayType(Ty))) { QualType BaseEltTy; SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr); - TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy); - std::pair<CharUnits, CharUnits> LastElementTypeInfo; - if (!isAssignment) - LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy); + TypeInfo = getContext().getTypeInfoInChars(BaseEltTy); assert(!TypeInfo.first.isZero()); SizeVal = Builder.CreateNUWMul( SizeVal, llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity())); - if (!isAssignment) { - SizeVal = Builder.CreateNUWSub( - SizeVal, - llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity())); - SizeVal = Builder.CreateNUWAdd( - SizeVal, llvm::ConstantInt::get( - SizeTy, LastElementTypeInfo.first.getQuantity())); - } } } if (!SizeVal) { @@ -1657,4 +1921,10 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr, // the optimizer wishes to expand it in to scalar memory operations. if (llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty)) Inst->setMetadata(llvm::LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (CGM.getCodeGenOpts().NewStructPathTBAA) { + TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForMemoryTransfer( + Dest.getTBAAInfo(), Src.getTBAAInfo()); + CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); + } } diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index c32f1e5415da..8955d8a4a83c 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -242,11 +242,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( } } - Address This = Address::invalid(); - if (IsArrow) - This = EmitPointerWithAlignment(Base); - else - This = EmitLValue(Base).getAddress(); + LValue This; + if (IsArrow) { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address ThisValue = EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); + This = MakeAddrLValue(ThisValue, Base->getType(), BaseInfo, TBAAInfo); + } else { + This = EmitLValue(Base); + } if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) { @@ -261,10 +265,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( // when it isn't necessary; just produce the proper effect here. LValue RHS = isa<CXXOperatorCallExpr>(CE) ? MakeNaturalAlignAddrLValue( - (*RtlArgs)[0].RV.getScalarVal(), + (*RtlArgs)[0].getRValue(*this).getScalarVal(), (*(CE->arg_begin() + 1))->getType()) : EmitLValue(*CE->arg_begin()); - EmitAggregateAssign(This, RHS.getAddress(), CE->getType()); + EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This.getPointer()); } @@ -272,8 +276,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) { // Trivial move and copy ctor are the same. assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); - Address RHS = EmitLValue(*CE->arg_begin()).getAddress(); - EmitAggregateCopy(This, RHS, (*CE->arg_begin())->getType()); + const Expr *Arg = *CE->arg_begin(); + LValue RHS = EmitLValue(Arg); + LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType()); + // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's + // constructing a new complete object of type Ctor. + EmitAggregateCopy(Dest, RHS, Arg->getType(), + AggValueSlot::DoesNotOverlap); return RValue::get(This.getPointer()); } llvm_unreachable("unknown trivial member function"); @@ -335,7 +344,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); if (UseVirtualCall) { CGM.getCXXABI().EmitVirtualDestructorCall( - *this, Dtor, Dtor_Complete, This, cast<CXXMemberCallExpr>(CE)); + *this, Dtor, Dtor_Complete, This.getAddress(), + cast<CXXMemberCallExpr>(CE)); } else { CGCallee Callee; if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) @@ -364,15 +374,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), Ctor); } else if (UseVirtualCall) { - Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty, - CE->getLocStart()); + Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); } else { if (SanOpts.has(SanitizerKind::CFINVCall) && MD->getParent()->isDynamicClass()) { llvm::Value *VTable; const CXXRecordDecl *RD; std::tie(VTable, RD) = - CGM.getCXXABI().LoadVTablePtr(*this, This, MD->getParent()); + CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(), + MD->getParent()); EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart()); } @@ -388,8 +398,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( } if (MD->isVirtual()) { - This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall( - *this, CalleeDecl, This, UseVirtualCall); + Address NewThisAddr = + CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall( + *this, CalleeDecl, This.getAddress(), UseVirtualCall); + This.setAddress(NewThisAddr); } return EmitCXXMemberOrOperatorCall( @@ -622,7 +634,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, // Call the constructor. EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, - Dest.getAddress(), E); + Dest.getAddress(), E, Dest.mayOverlap()); } } @@ -924,7 +936,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, } static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init, - QualType AllocType, Address NewPtr) { + QualType AllocType, Address NewPtr, + AggValueSlot::Overlap_t MayOverlap) { // FIXME: Refactor with EmitExprAsInit. switch (CGF.getEvaluationKind(AllocType)) { case TEK_Scalar: @@ -940,7 +953,8 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init, = AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + MayOverlap); CGF.EmitAggExpr(Init, Slot); return; } @@ -1009,7 +1023,8 @@ void CodeGenFunction::EmitNewArrayInitializer( AggValueSlot::forAddr(CurPtr, ElementType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); EmitAggExpr(ILE->getInit(0), Slot); // Move past these elements. @@ -1074,7 +1089,8 @@ void CodeGenFunction::EmitNewArrayInitializer( // an array, and we have an array filler, we can fold together the two // initialization loops. StoreAnyExprIntoOneUnit(*this, ILE->getInit(i), - ILE->getInit(i)->getType(), CurPtr); + ILE->getInit(i)->getType(), CurPtr, + AggValueSlot::DoesNotOverlap); CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(), Builder.getSize(1), "array.exp.next"), @@ -1227,7 +1243,8 @@ void CodeGenFunction::EmitNewArrayInitializer( } // Emit the initializer into this element. - StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr); + StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr, + AggValueSlot::DoesNotOverlap); // Leave the Cleanup if we entered one. if (CleanupDominator) { @@ -1258,7 +1275,8 @@ static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E, CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements, AllocSizeWithoutCookie); else if (const Expr *Init = E->getInitializer()) - StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr); + StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr, + AggValueSlot::DoesNotOverlap); } /// Emit a call to an operator new or operator delete function, as implicitly @@ -1298,19 +1316,19 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, } RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, - const Expr *Arg, + const CallExpr *TheCall, bool IsDelete) { CallArgList Args; - const Stmt *ArgS = Arg; - EmitCallArgs(Args, *Type->param_type_begin(), llvm::makeArrayRef(ArgS)); + EmitCallArgs(Args, Type->getParamTypes(), TheCall->arguments()); // Find the allocation or deallocation function that we're calling. ASTContext &Ctx = getContext(); DeclarationName Name = Ctx.DeclarationNames .getCXXOperatorName(IsDelete ? OO_Delete : OO_New); + for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name)) if (auto *FD = dyn_cast<FunctionDecl>(Decl)) if (Ctx.hasSameType(FD->getType(), QualType(Type, 0))) - return EmitNewDeleteCall(*this, cast<FunctionDecl>(Decl), Type, Args); + return EmitNewDeleteCall(*this, FD, Type, Args); llvm_unreachable("predeclared global operator new/delete is missing"); } @@ -1481,7 +1499,7 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF, AllocAlign); for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) { auto &Arg = NewArgs[I + NumNonPlacementArgs]; - Cleanup->setPlacementArg(I, Arg.RV, Arg.Ty); + Cleanup->setPlacementArg(I, Arg.getRValue(CGF), Arg.Ty); } return; @@ -1512,8 +1530,8 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF, AllocAlign); for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) { auto &Arg = NewArgs[I + NumNonPlacementArgs]; - Cleanup->setPlacementArg(I, DominatingValue<RValue>::save(CGF, Arg.RV), - Arg.Ty); + Cleanup->setPlacementArg( + I, DominatingValue<RValue>::save(CGF, Arg.getRValue(CGF)), Arg.Ty); } CGF.initFullExprCleanup(); @@ -1678,13 +1696,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { llvm::Type *elementTy = ConvertTypeForMem(allocType); Address result = Builder.CreateElementBitCast(allocation, elementTy); - // Passing pointer through invariant.group.barrier to avoid propagation of + // Passing pointer through launder.invariant.group to avoid propagation of // vptrs information which may be included in previous type. // To not break LTO with different optimizations levels, we do it regardless // of optimization level. if (CGM.getCodeGenOpts().StrictVTablePointers && allocator->isReservedGlobalPlacementOperator()) - result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()), + result = Address(Builder.CreateLaunderInvariantGroup(result.getPointer()), result.getAlignment()); EmitNewInitializer(*this, E, allocType, elementTy, result, numElements, diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 9094d3f8a91c..fb176093a741 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -155,8 +155,9 @@ public: } ComplexPairTy VisitOpaqueValueExpr(OpaqueValueExpr *E) { if (E->isGLValue()) - return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc()); - return CGF.getOpaqueRValueMapping(E).getComplexVal(); + return EmitLoadOfLValue(CGF.getOrCreateOpaqueLValueMapping(E), + E->getExprLoc()); + return CGF.getOrCreateOpaqueRValueMapping(E).getComplexVal(); } ComplexPairTy VisitPseudoObjectExpr(PseudoObjectExpr *E) { @@ -594,7 +595,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) { return ComplexPairTy(ResR, ResI); } -/// \brief Emit a libcall for a binary operation on complex types. +/// Emit a libcall for a binary operation on complex types. ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op) { CallArgList Args; @@ -628,11 +629,11 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName, llvm::Instruction *Call; RValue Res = CGF.EmitCall(FuncInfo, Callee, ReturnValueSlot(), Args, &Call); - cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getBuiltinCC()); + cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getRuntimeCC()); return Res.getComplexVal(); } -/// \brief Lookup the libcall name for a given floating point type complex +/// Lookup the libcall name for a given floating point type complex /// multiply. static StringRef getComplexMultiplyLibCallName(llvm::Type *Ty) { switch (Ty->getTypeID()) { @@ -1055,7 +1056,7 @@ ComplexPairTy ComplexExprEmitter::VisitInitListExpr(InitListExpr *E) { return Visit(E->getInit(0)); } - // Empty init list intializes to null + // Empty init list initializes to null assert(E->getNumInits() == 0 && "Unexpected number of inits"); QualType Ty = E->getType()->castAs<ComplexType>()->getElementType(); llvm::Type* LTy = CGF.ConvertType(Ty); diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index d1b9e13a6f93..cfd0b859233a 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -635,6 +635,72 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, return ConstantAddress(GV, Align); } +static llvm::Constant * +EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, + llvm::Type *CommonElementType, unsigned ArrayBound, + SmallVectorImpl<llvm::Constant *> &Elements, + llvm::Constant *Filler) { + // Figure out how long the initial prefix of non-zero elements is. + unsigned NonzeroLength = ArrayBound; + if (Elements.size() < NonzeroLength && Filler->isNullValue()) + NonzeroLength = Elements.size(); + if (NonzeroLength == Elements.size()) { + while (NonzeroLength > 0 && Elements[NonzeroLength - 1]->isNullValue()) + --NonzeroLength; + } + + if (NonzeroLength == 0) { + return llvm::ConstantAggregateZero::get( + CGM.getTypes().ConvertType(QualType(DestType, 0))); + } + + // Add a zeroinitializer array filler if we have lots of trailing zeroes. + unsigned TrailingZeroes = ArrayBound - NonzeroLength; + if (TrailingZeroes >= 8) { + assert(Elements.size() >= NonzeroLength && + "missing initializer for non-zero element"); + + // If all the elements had the same type up to the trailing zeroes, emit a + // struct of two arrays (the nonzero data and the zeroinitializer). + if (CommonElementType && NonzeroLength >= 8) { + llvm::Constant *Initial = llvm::ConstantArray::get( + llvm::ArrayType::get(CommonElementType, NonzeroLength), + makeArrayRef(Elements).take_front(NonzeroLength)); + Elements.resize(2); + Elements[0] = Initial; + } else { + Elements.resize(NonzeroLength + 1); + } + + auto *FillerType = + CommonElementType + ? CommonElementType + : CGM.getTypes().ConvertType(DestType->getElementType()); + FillerType = llvm::ArrayType::get(FillerType, TrailingZeroes); + Elements.back() = llvm::ConstantAggregateZero::get(FillerType); + CommonElementType = nullptr; + } else if (Elements.size() != ArrayBound) { + // Otherwise pad to the right size with the filler if necessary. + Elements.resize(ArrayBound, Filler); + if (Filler->getType() != CommonElementType) + CommonElementType = nullptr; + } + + // If all elements have the same type, just emit an array constant. + if (CommonElementType) + return llvm::ConstantArray::get( + llvm::ArrayType::get(CommonElementType, ArrayBound), Elements); + + // We have mixed types. Use a packed struct. + llvm::SmallVector<llvm::Type *, 16> Types; + Types.reserve(Elements.size()); + for (llvm::Constant *Elt : Elements) + Types.push_back(Elt->getType()); + llvm::StructType *SType = + llvm::StructType::get(CGM.getLLVMContext(), Types, true); + return llvm::ConstantStruct::get(SType, Elements); +} + /// This class only needs to handle two cases: /// 1) Literals (this is used by APValue emission to emit literals). /// 2) Arrays, structs and unions (outside C++11 mode, we don't currently @@ -832,60 +898,47 @@ public: } llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) { - llvm::ArrayType *AType = - cast<llvm::ArrayType>(ConvertType(ILE->getType())); - llvm::Type *ElemTy = AType->getElementType(); + auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType()); + assert(CAT && "can't emit array init for non-constant-bound array"); unsigned NumInitElements = ILE->getNumInits(); - unsigned NumElements = AType->getNumElements(); + unsigned NumElements = CAT->getSize().getZExtValue(); // Initialising an array requires us to automatically // initialise any elements that have not been initialised explicitly unsigned NumInitableElts = std::min(NumInitElements, NumElements); - QualType EltType = CGM.getContext().getAsArrayType(T)->getElementType(); + QualType EltType = CAT->getElementType(); // Initialize remaining array elements. - llvm::Constant *fillC; - if (Expr *filler = ILE->getArrayFiller()) + llvm::Constant *fillC = nullptr; + if (Expr *filler = ILE->getArrayFiller()) { fillC = Emitter.tryEmitAbstractForMemory(filler, EltType); - else - fillC = Emitter.emitNullForMemory(EltType); - if (!fillC) - return nullptr; - - // Try to use a ConstantAggregateZero if we can. - if (fillC->isNullValue() && !NumInitableElts) - return llvm::ConstantAggregateZero::get(AType); + if (!fillC) + return nullptr; + } // Copy initializer elements. SmallVector<llvm::Constant*, 16> Elts; - Elts.reserve(NumInitableElts + NumElements); + if (fillC && fillC->isNullValue()) + Elts.reserve(NumInitableElts + 1); + else + Elts.reserve(NumElements); - bool RewriteType = false; + llvm::Type *CommonElementType = nullptr; for (unsigned i = 0; i < NumInitableElts; ++i) { Expr *Init = ILE->getInit(i); llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType); if (!C) return nullptr; - RewriteType |= (C->getType() != ElemTy); + if (i == 0) + CommonElementType = C->getType(); + else if (C->getType() != CommonElementType) + CommonElementType = nullptr; Elts.push_back(C); } - RewriteType |= (fillC->getType() != ElemTy); - Elts.resize(NumElements, fillC); - - if (RewriteType) { - // FIXME: Try to avoid packing the array - std::vector<llvm::Type*> Types; - Types.reserve(NumInitableElts + NumElements); - for (unsigned i = 0, e = Elts.size(); i < e; ++i) - Types.push_back(Elts[i]->getType()); - llvm::StructType *SType = llvm::StructType::get(AType->getContext(), - Types, true); - return llvm::ConstantStruct::get(SType, Elts); - } - - return llvm::ConstantArray::get(AType, Elts); + return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, + fillC); } llvm::Constant *EmitRecordInitialization(InitListExpr *ILE, QualType T) { @@ -1881,40 +1934,31 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, case APValue::Union: return ConstStructBuilder::BuildStruct(*this, Value, DestType); case APValue::Array: { - const ArrayType *CAT = CGM.getContext().getAsArrayType(DestType); + const ConstantArrayType *CAT = + CGM.getContext().getAsConstantArrayType(DestType); unsigned NumElements = Value.getArraySize(); unsigned NumInitElts = Value.getArrayInitializedElts(); // Emit array filler, if there is one. llvm::Constant *Filler = nullptr; - if (Value.hasArrayFiller()) + if (Value.hasArrayFiller()) { Filler = tryEmitAbstractForMemory(Value.getArrayFiller(), CAT->getElementType()); - - // Emit initializer elements. - llvm::Type *CommonElementType = - CGM.getTypes().ConvertType(CAT->getElementType()); - - // Try to use a ConstantAggregateZero if we can. - if (Filler && Filler->isNullValue() && !NumInitElts) { - llvm::ArrayType *AType = - llvm::ArrayType::get(CommonElementType, NumElements); - return llvm::ConstantAggregateZero::get(AType); + if (!Filler) + return nullptr; } + // Emit initializer elements. SmallVector<llvm::Constant*, 16> Elts; - Elts.reserve(NumElements); - for (unsigned I = 0; I < NumElements; ++I) { - llvm::Constant *C = Filler; - if (I < NumInitElts) { - C = tryEmitPrivateForMemory(Value.getArrayInitializedElt(I), - CAT->getElementType()); - } else if (!Filler) { - assert(Value.hasArrayFiller() && - "Missing filler for implicit elements of initializer"); - C = tryEmitPrivateForMemory(Value.getArrayFiller(), - CAT->getElementType()); - } + if (Filler && Filler->isNullValue()) + Elts.reserve(NumInitElts + 1); + else + Elts.reserve(NumElements); + + llvm::Type *CommonElementType = nullptr; + for (unsigned I = 0; I < NumInitElts; ++I) { + llvm::Constant *C = tryEmitPrivateForMemory( + Value.getArrayInitializedElt(I), CAT->getElementType()); if (!C) return nullptr; if (I == 0) @@ -1924,20 +1968,8 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, Elts.push_back(C); } - if (!CommonElementType) { - // FIXME: Try to avoid packing the array - std::vector<llvm::Type*> Types; - Types.reserve(NumElements); - for (unsigned i = 0, e = Elts.size(); i < e; ++i) - Types.push_back(Elts[i]->getType()); - llvm::StructType *SType = - llvm::StructType::get(CGM.getLLVMContext(), Types, true); - return llvm::ConstantStruct::get(SType, Elts); - } - - llvm::ArrayType *AType = - llvm::ArrayType::get(CommonElementType, NumElements); - return llvm::ConstantArray::get(AType, Elts); + return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, + Filler); } case APValue::MemberPointer: return CGM.getCXXABI().EmitMemberPointer(Value, DestType); diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index c46215067a68..783f74c5026d 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -165,7 +165,7 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { // If a unary op has a widened operand, the op cannot overflow. if (const auto *UO = dyn_cast<UnaryOperator>(Op.E)) - return IsWidenedIntegerOp(Ctx, UO->getSubExpr()); + return !UO->canOverflow(); // We usually don't need overflow checks for binops with widened operands. // Multiplication with promoted unsigned operands is a special case. @@ -387,6 +387,9 @@ public: Value *VisitIntegerLiteral(const IntegerLiteral *E) { return Builder.getInt(E->getValue()); } + Value *VisitFixedPointLiteral(const FixedPointLiteral *E) { + return Builder.getInt(E->getValue()); + } Value *VisitFloatingLiteral(const FloatingLiteral *E) { return llvm::ConstantFP::get(VMContext, E->getValue()); } @@ -422,10 +425,11 @@ public: Value *VisitOpaqueValueExpr(OpaqueValueExpr *E) { if (E->isGLValue()) - return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc()); + return EmitLoadOfLValue(CGF.getOrCreateOpaqueLValueMapping(E), + E->getExprLoc()); // Otherwise, assume the mapping is the scalar directly. - return CGF.getOpaqueRValueMapping(E).getScalarVal(); + return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal(); } Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant, @@ -1144,7 +1148,7 @@ Value *ScalarExprEmitter::EmitNullValue(QualType Ty) { return CGF.EmitFromMemory(CGF.CGM.EmitNullConstant(Ty), Ty); } -/// \brief Emit a sanitization check for the given "binary" operation (which +/// Emit a sanitization check for the given "binary" operation (which /// might actually be a unary increment which has been lowered to a binary /// operation). The check passes if all values in \p Checks (which are \c i1), /// are \c true. @@ -1617,6 +1621,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { CE->getLocStart()); } + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) { + const QualType SrcType = E->getType(); + + if (SrcType.mayBeNotDynamicClass() && DestTy.mayBeDynamicClass()) { + // Casting to pointer that could carry dynamic information (provided by + // invariant.group) requires launder. + Src = Builder.CreateLaunderInvariantGroup(Src); + } else if (SrcType.mayBeDynamicClass() && DestTy.mayBeNotDynamicClass()) { + // Casting to pointer that does not carry dynamic information (provided + // by invariant.group) requires stripping it. Note that we don't do it + // if the source could not be dynamic type and destination could be + // dynamic because dynamic information is already laundered. It is + // because launder(strip(src)) == launder(src), so there is no need to + // add extra strip before launder. + Src = Builder.CreateStripInvariantGroup(Src); + } + } + return Builder.CreateBitCast(Src, DstTy); } case CK_AddressSpaceConversion: { @@ -1753,12 +1775,31 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Value* IntResult = Builder.CreateIntCast(Src, MiddleTy, InputSigned, "conv"); - return Builder.CreateIntToPtr(IntResult, DestLLVMTy); + auto *IntToPtr = Builder.CreateIntToPtr(IntResult, DestLLVMTy); + + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) { + // Going from integer to pointer that could be dynamic requires reloading + // dynamic information from invariant.group. + if (DestTy.mayBeDynamicClass()) + IntToPtr = Builder.CreateLaunderInvariantGroup(IntToPtr); + } + return IntToPtr; } - case CK_PointerToIntegral: + case CK_PointerToIntegral: { assert(!DestTy->isBooleanType() && "bool should use PointerToBool"); - return Builder.CreatePtrToInt(Visit(E), ConvertType(DestTy)); + auto *PtrExpr = Visit(E); + + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) { + const QualType SrcType = E->getType(); + + // Casting to integer requires stripping dynamic information as it does + // not carries it. + if (SrcType.mayBeDynamicClass()) + PtrExpr = Builder.CreateStripInvariantGroup(PtrExpr); + } + return Builder.CreatePtrToInt(PtrExpr, ConvertType(DestTy)); + } case CK_ToVoid: { CGF.EmitIgnoredExpr(E); return nullptr; @@ -1873,7 +1914,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( return Builder.CreateNSWAdd(InVal, Amount, Name); // Fall through. case LangOptions::SOB_Trapping: - if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr())) + if (!E->canOverflow()) return Builder.CreateNSWAdd(InVal, Amount, Name); return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc)); } @@ -1955,11 +1996,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (type->isIntegerType()) { // Note that signed integer inc/dec with width less than int can't // overflow because of promotion rules; we're just eliding a few steps here. - bool CanOverflow = value->getType()->getIntegerBitWidth() >= - CGF.IntTy->getIntegerBitWidth(); - if (CanOverflow && type->isSignedIntegerOrEnumerationType()) { + if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); - } else if (CanOverflow && type->isUnsignedIntegerType() && + } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc)); @@ -1975,7 +2014,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // VLA types don't have constant size. if (const VariableArrayType *vla = CGF.getContext().getAsVariableArrayType(type)) { - llvm::Value *numElts = CGF.getVLASize(vla).first; + llvm::Value *numElts = CGF.getVLASize(vla).NumElts; if (!isInc) numElts = Builder.CreateNSWNeg(numElts, "vla.negsize"); if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); @@ -2273,16 +2312,13 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( CGF.EmitIgnoredExpr(E->getArgumentExpr()); } - QualType eltType; - llvm::Value *numElts; - std::tie(numElts, eltType) = CGF.getVLASize(VAT); - - llvm::Value *size = numElts; + auto VlaSize = CGF.getVLASize(VAT); + llvm::Value *size = VlaSize.NumElts; // Scale the number of non-VLA elements by the non-VLA element size. - CharUnits eltSize = CGF.getContext().getTypeSizeInChars(eltType); + CharUnits eltSize = CGF.getContext().getTypeSizeInChars(VlaSize.Type); if (!eltSize.isOne()) - size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), numElts); + size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), size); return size; } @@ -2769,7 +2805,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (const VariableArrayType *vla = CGF.getContext().getAsVariableArrayType(elementType)) { // The element count here is the total number of non-VLA elements. - llvm::Value *numElements = CGF.getVLASize(vla).first; + llvm::Value *numElements = CGF.getVLASize(vla).NumElts; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to @@ -2964,10 +3000,9 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // For a variable-length array, this is going to be non-constant. if (const VariableArrayType *vla = CGF.getContext().getAsVariableArrayType(elementType)) { - llvm::Value *numElements; - std::tie(numElements, elementType) = CGF.getVLASize(vla); - - divisor = numElements; + auto VlaSize = CGF.getVLASize(vla); + elementType = VlaSize.Type; + divisor = VlaSize.NumElts; // Scale the number of non-VLA elements by the non-VLA element size. CharUnits eltSize = CGF.getContext().getTypeSizeInChars(elementType); @@ -3243,6 +3278,23 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp"); } else { // Unsigned integers and pointers. + + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers && + !isa<llvm::ConstantPointerNull>(LHS) && + !isa<llvm::ConstantPointerNull>(RHS)) { + + // Dynamic information is required to be stripped for comparisons, + // because it could leak the dynamic information. Based on comparisons + // of pointers to dynamic objects, the optimizer can replace one pointer + // with another, which might be incorrect in presence of invariant + // groups. Comparison with null is safe because null does not carry any + // dynamic information. + if (LHSTy.mayBeDynamicClass()) + LHS = Builder.CreateStripInvariantGroup(LHS); + if (RHSTy.mayBeDynamicClass()) + RHS = Builder.CreateStripInvariantGroup(RHS); + } + Result = Builder.CreateICmp(UICmpOpc, LHS, RHS, "cmp"); } @@ -3433,6 +3485,12 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { // Insert an entry into the phi node for the edge with the value of RHSCond. PN->addIncoming(RHSCond, RHSBlock); + // Artificial location to preserve the scope information + { + auto NL = ApplyDebugLocation::CreateArtificial(CGF); + PN->setDebugLoc(Builder.getCurrentDebugLocation()); + } + // ZExt result to int. return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext"); } diff --git a/lib/CodeGen/CGGPUBuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp index 48156b1b26b7..b5375ffb8db7 100644 --- a/lib/CodeGen/CGGPUBuiltin.cpp +++ b/lib/CodeGen/CGGPUBuiltin.cpp @@ -83,8 +83,9 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, /* ParamsToSkip = */ 0); // We don't know how to emit non-scalar varargs. - if (std::any_of(Args.begin() + 1, Args.end(), - [](const CallArg &A) { return !A.RV.isScalar(); })) { + if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) { + return !A.getRValue(*this).isScalar(); + })) { CGM.ErrorUnsupported(E, "non-scalar arg to printf"); return RValue::get(llvm::ConstantInt::get(IntTy, 0)); } @@ -97,7 +98,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, } else { llvm::SmallVector<llvm::Type *, 8> ArgTypes; for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) - ArgTypes.push_back(Args[I].RV.getScalarVal()->getType()); + ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType()); // Using llvm::StructType is correct only because printf doesn't accept // aggregates. If we had to handle aggregates here, we'd have to manually @@ -109,7 +110,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); - llvm::Value *Arg = Args[I].RV.getScalarVal(); + llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal(); Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType())); } BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); @@ -117,6 +118,6 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, // Invoke vprintf and return. llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); - return RValue::get( - Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr})); + return RValue::get(Builder.CreateCall( + VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr})); } diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index 15608c105dc7..9d5f23ff9a2a 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -32,62 +32,62 @@ class Attr; class ASTContext; namespace CodeGen { -/// \brief Attributes that may be specified on loops. +/// Attributes that may be specified on loops. struct LoopAttributes { explicit LoopAttributes(bool IsParallel = false); void clear(); - /// \brief Generate llvm.loop.parallel metadata for loads and stores. + /// Generate llvm.loop.parallel metadata for loads and stores. bool IsParallel; - /// \brief State of loop vectorization or unrolling. + /// State of loop vectorization or unrolling. enum LVEnableState { Unspecified, Enable, Disable, Full }; - /// \brief Value for llvm.loop.vectorize.enable metadata. + /// Value for llvm.loop.vectorize.enable metadata. LVEnableState VectorizeEnable; - /// \brief Value for llvm.loop.unroll.* metadata (enable, disable, or full). + /// Value for llvm.loop.unroll.* metadata (enable, disable, or full). LVEnableState UnrollEnable; - /// \brief Value for llvm.loop.vectorize.width metadata. + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; - /// \brief Value for llvm.loop.interleave.count metadata. + /// Value for llvm.loop.interleave.count metadata. unsigned InterleaveCount; - /// \brief llvm.unroll. + /// llvm.unroll. unsigned UnrollCount; - /// \brief Value for llvm.loop.distribute.enable metadata. + /// Value for llvm.loop.distribute.enable metadata. LVEnableState DistributeEnable; }; -/// \brief Information used when generating a structured loop. +/// Information used when generating a structured loop. class LoopInfo { public: - /// \brief Construct a new LoopInfo for the loop with entry Header. + /// Construct a new LoopInfo for the loop with entry Header. LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); - /// \brief Get the loop id metadata for this loop. + /// Get the loop id metadata for this loop. llvm::MDNode *getLoopID() const { return LoopID; } - /// \brief Get the header block of this loop. + /// Get the header block of this loop. llvm::BasicBlock *getHeader() const { return Header; } - /// \brief Get the set of attributes active for this loop. + /// Get the set of attributes active for this loop. const LoopAttributes &getAttributes() const { return Attrs; } private: - /// \brief Loop ID metadata. + /// Loop ID metadata. llvm::MDNode *LoopID; - /// \brief Header block of this loop. + /// Header block of this loop. llvm::BasicBlock *Header; - /// \brief The attributes for this loop. + /// The attributes for this loop. LoopAttributes Attrs; }; -/// \brief A stack of loop information corresponding to loop nesting levels. +/// A stack of loop information corresponding to loop nesting levels. /// This stack can be used to prepare attributes which are applied when a loop /// is emitted. class LoopInfoStack { @@ -97,70 +97,70 @@ class LoopInfoStack { public: LoopInfoStack() {} - /// \brief Begin a new structured loop. The set of staged attributes will be + /// Begin a new structured loop. The set of staged attributes will be /// applied to the loop and then cleared. void push(llvm::BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); - /// \brief Begin a new structured loop. Stage attributes from the Attrs list. + /// Begin a new structured loop. Stage attributes from the Attrs list. /// The staged attributes are applied to the loop and then cleared. void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, llvm::ArrayRef<const Attr *> Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); - /// \brief End the current loop. + /// End the current loop. void pop(); - /// \brief Return the top loop id metadata. + /// Return the top loop id metadata. llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); } - /// \brief Return true if the top loop is parallel. + /// Return true if the top loop is parallel. bool getCurLoopParallel() const { return hasInfo() ? getInfo().getAttributes().IsParallel : false; } - /// \brief Function called by the CodeGenFunction when an instruction is + /// Function called by the CodeGenFunction when an instruction is /// created. void InsertHelper(llvm::Instruction *I) const; - /// \brief Set the next pushed loop as parallel. + /// Set the next pushed loop as parallel. void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; } - /// \brief Set the next pushed loop 'vectorize.enable' + /// Set the next pushed loop 'vectorize.enable' void setVectorizeEnable(bool Enable = true) { StagedAttrs.VectorizeEnable = Enable ? LoopAttributes::Enable : LoopAttributes::Disable; } - /// \brief Set the next pushed loop as a distribution candidate. + /// Set the next pushed loop as a distribution candidate. void setDistributeState(bool Enable = true) { StagedAttrs.DistributeEnable = Enable ? LoopAttributes::Enable : LoopAttributes::Disable; } - /// \brief Set the next pushed loop unroll state. + /// Set the next pushed loop unroll state. void setUnrollState(const LoopAttributes::LVEnableState &State) { StagedAttrs.UnrollEnable = State; } - /// \brief Set the vectorize width for the next loop pushed. + /// Set the vectorize width for the next loop pushed. void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; } - /// \brief Set the interleave count for the next loop pushed. + /// Set the interleave count for the next loop pushed. void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; } - /// \brief Set the unroll count for the next loop pushed. + /// Set the unroll count for the next loop pushed. void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; } private: - /// \brief Returns true if there is LoopInfo on the stack. + /// Returns true if there is LoopInfo on the stack. bool hasInfo() const { return !Active.empty(); } - /// \brief Return the LoopInfo for the current loop. HasInfo should be called + /// Return the LoopInfo for the current loop. HasInfo should be called /// first to ensure LoopInfo is present. const LoopInfo &getInfo() const { return Active.back(); } - /// \brief The set of attributes that will be applied to the next pushed loop. + /// The set of attributes that will be applied to the next pushed loop. LoopAttributes StagedAttrs; - /// \brief Stack of active loops. + /// Stack of active loops. llvm::SmallVector<LoopInfo, 4> Active; }; diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp new file mode 100644 index 000000000000..922e0934b866 --- /dev/null +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -0,0 +1,885 @@ +//===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions to generate various special functions for C +// structs. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/NonTrivialTypeVisitor.h" +#include "llvm/Support/ScopedPrinter.h" +#include <array> + +using namespace clang; +using namespace CodeGen; + +// Return the size of a field in number of bits. +static uint64_t getFieldSize(const FieldDecl *FD, QualType FT, + ASTContext &Ctx) { + if (FD && FD->isBitField()) + return FD->getBitWidthValue(Ctx); + return Ctx.getTypeSize(FT); +} + +namespace { +enum { DstIdx = 0, SrcIdx = 1 }; +const char *ValNameStr[2] = {"dst", "src"}; + +template <class Derived> struct StructVisitor { + StructVisitor(ASTContext &Ctx) : Ctx(Ctx) {} + + template <class... Ts> + void visitStructFields(QualType QT, CharUnits CurStructOffset, Ts... Args) { + const RecordDecl *RD = QT->castAs<RecordType>()->getDecl(); + + // Iterate over the fields of the struct. + for (const FieldDecl *FD : RD->fields()) { + QualType FT = FD->getType(); + FT = QT.isVolatileQualified() ? FT.withVolatile() : FT; + asDerived().visit(FT, FD, CurStructOffset, Args...); + } + + asDerived().flushTrivialFields(Args...); + } + + template <class... Ts> void visitTrivial(Ts... Args) {} + + template <class... Ts> void visitCXXDestructor(Ts... Args) { + llvm_unreachable("field of a C++ struct type is not expected"); + } + + template <class... Ts> void flushTrivialFields(Ts... Args) {} + + uint64_t getFieldOffsetInBits(const FieldDecl *FD) { + return FD ? Ctx.getASTRecordLayout(FD->getParent()) + .getFieldOffset(FD->getFieldIndex()) + : 0; + } + + CharUnits getFieldOffset(const FieldDecl *FD) { + return Ctx.toCharUnitsFromBits(getFieldOffsetInBits(FD)); + } + + Derived &asDerived() { return static_cast<Derived &>(*this); } + + ASTContext &getContext() { return Ctx; } + ASTContext &Ctx; +}; + +template <class Derived, bool IsMove> +struct CopyStructVisitor : StructVisitor<Derived>, + CopiedTypeVisitor<Derived, IsMove> { + using StructVisitor<Derived>::asDerived; + using Super = CopiedTypeVisitor<Derived, IsMove>; + + CopyStructVisitor(ASTContext &Ctx) : StructVisitor<Derived>(Ctx) {} + + template <class... Ts> + void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT, + const FieldDecl *FD, CharUnits CurStructOffsset, + Ts &&... Args) { + if (PCK) + asDerived().flushTrivialFields(std::forward<Ts>(Args)...); + } + + template <class... Ts> + void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType FT, + const FieldDecl *FD, CharUnits CurStructOffsset, + Ts &&... Args) { + if (const auto *AT = asDerived().getContext().getAsArrayType(FT)) { + asDerived().visitArray(PCK, AT, FT.isVolatileQualified(), FD, + CurStructOffsset, std::forward<Ts>(Args)...); + return; + } + + Super::visitWithKind(PCK, FT, FD, CurStructOffsset, + std::forward<Ts>(Args)...); + } + + template <class... Ts> + void visitTrivial(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset, + Ts... Args) { + assert(!FT.isVolatileQualified() && "volatile field not expected"); + ASTContext &Ctx = asDerived().getContext(); + uint64_t FieldSize = getFieldSize(FD, FT, Ctx); + + // Ignore zero-sized fields. + if (FieldSize == 0) + return; + + uint64_t FStartInBits = asDerived().getFieldOffsetInBits(FD); + uint64_t FEndInBits = FStartInBits + FieldSize; + uint64_t RoundedFEnd = llvm::alignTo(FEndInBits, Ctx.getCharWidth()); + + // Set Start if this is the first field of a sequence of trivial fields. + if (Start == End) + Start = CurStructOffset + Ctx.toCharUnitsFromBits(FStartInBits); + End = CurStructOffset + Ctx.toCharUnitsFromBits(RoundedFEnd); + } + + CharUnits Start = CharUnits::Zero(), End = CharUnits::Zero(); +}; + +// This function creates the mangled name of a special function of a non-trivial +// C struct. Since there is no ODR in C, the function is mangled based on the +// struct contents and not the name. The mangled name has the following +// structure: +// +// <function-name> ::= <prefix> <alignment-info> "_" <struct-field-info> +// <prefix> ::= "__destructor_" | "__default_constructor_" | +// "__copy_constructor_" | "__move_constructor_" | +// "__copy_assignment_" | "__move_assignment_" +// <alignment-info> ::= <dst-alignment> ["_" <src-alignment>] +// <struct-field-info> ::= <field-info>+ +// <field-info> ::= <struct-or-scalar-field-info> | <array-field-info> +// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> | +// <trivial-field-info> +// <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n" +// <num-elements> <innermost-element-info> "_AE" +// <innermost-element-info> ::= <struct-or-scalar-field-info> +// <strong-field-info> ::= "_s" ["b"] ["v"] <field-offset> +// <trivial-field-info> ::= "_t" ["v"] <field-offset> "_" <field-size> + +template <class Derived> struct GenFuncNameBase { + std::string getVolatileOffsetStr(bool IsVolatile, CharUnits Offset) { + std::string S; + if (IsVolatile) + S = "v"; + S += llvm::to_string(Offset.getQuantity()); + return S; + } + + void visitARCStrong(QualType FT, const FieldDecl *FD, + CharUnits CurStructOffset) { + appendStr("_s"); + if (FT->isBlockPointerType()) + appendStr("b"); + CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset)); + } + + void visitARCWeak(QualType FT, const FieldDecl *FD, + CharUnits CurStructOffset) { + appendStr("_w"); + CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset)); + } + + void visitStruct(QualType QT, const FieldDecl *FD, + CharUnits CurStructOffset) { + CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + asDerived().visitStructFields(QT, FieldOffset); + } + + template <class FieldKind> + void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, + const FieldDecl *FD, CharUnits CurStructOffset) { + // String for non-volatile trivial fields is emitted when + // flushTrivialFields is called. + if (!FK) + return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset); + + CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + ASTContext &Ctx = asDerived().getContext(); + const ConstantArrayType *CAT = cast<ConstantArrayType>(AT); + unsigned NumElts = Ctx.getConstantArrayElementCount(CAT); + QualType EltTy = Ctx.getBaseElementType(CAT); + CharUnits EltSize = Ctx.getTypeSizeInChars(EltTy); + appendStr("_AB" + llvm::to_string(FieldOffset.getQuantity()) + "s" + + llvm::to_string(EltSize.getQuantity()) + "n" + + llvm::to_string(NumElts)); + EltTy = IsVolatile ? EltTy.withVolatile() : EltTy; + asDerived().visitWithKind(FK, EltTy, nullptr, FieldOffset); + appendStr("_AE"); + } + + void appendStr(StringRef Str) { Name += Str; } + + std::string getName(QualType QT, bool IsVolatile) { + QT = IsVolatile ? QT.withVolatile() : QT; + asDerived().visitStructFields(QT, CharUnits::Zero()); + return Name; + } + + Derived &asDerived() { return static_cast<Derived &>(*this); } + + std::string Name; +}; + +template <class Derived> +struct GenUnaryFuncName : StructVisitor<Derived>, GenFuncNameBase<Derived> { + GenUnaryFuncName(StringRef Prefix, CharUnits DstAlignment, ASTContext &Ctx) + : StructVisitor<Derived>(Ctx) { + this->appendStr(Prefix); + this->appendStr(llvm::to_string(DstAlignment.getQuantity())); + } +}; + +// Helper function to create a null constant. +static llvm::Constant *getNullForVariable(Address Addr) { + llvm::Type *Ty = Addr.getElementType(); + return llvm::ConstantPointerNull::get(cast<llvm::PointerType>(Ty)); +} + +template <bool IsMove> +struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>, + GenFuncNameBase<GenBinaryFuncName<IsMove>> { + + GenBinaryFuncName(StringRef Prefix, CharUnits DstAlignment, + CharUnits SrcAlignment, ASTContext &Ctx) + : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>(Ctx) { + this->appendStr(Prefix); + this->appendStr(llvm::to_string(DstAlignment.getQuantity())); + this->appendStr("_" + llvm::to_string(SrcAlignment.getQuantity())); + } + + void flushTrivialFields() { + if (this->Start == this->End) + return; + + this->appendStr("_t" + llvm::to_string(this->Start.getQuantity()) + "w" + + llvm::to_string((this->End - this->Start).getQuantity())); + + this->Start = this->End = CharUnits::Zero(); + } + + void visitVolatileTrivial(QualType FT, const FieldDecl *FD, + CharUnits CurStackOffset) { + // Because volatile fields can be bit-fields and are individually copied, + // their offset and width are in bits. + uint64_t OffsetInBits = + this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD); + this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" + + llvm::to_string(getFieldSize(FD, FT, this->Ctx))); + } +}; + +struct GenDefaultInitializeFuncName + : GenUnaryFuncName<GenDefaultInitializeFuncName>, + DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName> { + using Super = DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName>; + GenDefaultInitializeFuncName(CharUnits DstAlignment, ASTContext &Ctx) + : GenUnaryFuncName<GenDefaultInitializeFuncName>("__default_constructor_", + DstAlignment, Ctx) {} + void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT, + const FieldDecl *FD, CharUnits CurStructOffset) { + if (const auto *AT = getContext().getAsArrayType(FT)) { + visitArray(PDIK, AT, FT.isVolatileQualified(), FD, CurStructOffset); + return; + } + + Super::visitWithKind(PDIK, FT, FD, CurStructOffset); + } +}; + +struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>, + DestructedTypeVisitor<GenDestructorFuncName> { + using Super = DestructedTypeVisitor<GenDestructorFuncName>; + GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx) + : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment, + Ctx) {} + void visitWithKind(QualType::DestructionKind DK, QualType FT, + const FieldDecl *FD, CharUnits CurStructOffset) { + if (const auto *AT = getContext().getAsArrayType(FT)) { + visitArray(DK, AT, FT.isVolatileQualified(), FD, CurStructOffset); + return; + } + + Super::visitWithKind(DK, FT, FD, CurStructOffset); + } +}; + +// Helper function that creates CGFunctionInfo for an N-ary special function. +template <size_t N> +static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, + FunctionArgList &Args) { + ASTContext &Ctx = CGM.getContext(); + llvm::SmallVector<ImplicitParamDecl *, N> Params; + QualType ParamTy = Ctx.getPointerType(Ctx.VoidPtrTy); + + for (unsigned I = 0; I < N; ++I) + Params.push_back(ImplicitParamDecl::Create( + Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(ValNameStr[I]), ParamTy, + ImplicitParamDecl::Other)); + + for (auto &P : Params) + Args.push_back(P); + + return CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); +} + +// Template classes that are used as bases for classes that emit special +// functions. +template <class Derived> struct GenFuncBase { + template <size_t N> + void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, N> Addrs) { + this->asDerived().callSpecialFunction( + FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs); + } + + template <class FieldKind, size_t N> + void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, + const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, N> Addrs) { + // Non-volatile trivial fields are copied when flushTrivialFields is called. + if (!FK) + return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset, + Addrs); + + CodeGenFunction &CGF = *this->CGF; + ASTContext &Ctx = CGF.getContext(); + + // Compute the end address. + QualType BaseEltQT; + std::array<Address, N> StartAddrs = Addrs; + for (unsigned I = 0; I < N; ++I) + StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD); + Address DstAddr = StartAddrs[DstIdx]; + llvm::Value *NumElts = CGF.emitArrayLength(AT, BaseEltQT, DstAddr); + unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity(); + llvm::Value *BaseEltSizeVal = + llvm::ConstantInt::get(NumElts->getType(), BaseEltSize); + llvm::Value *SizeInBytes = + CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts); + Address BC = CGF.Builder.CreateBitCast(DstAddr, CGF.CGM.Int8PtrTy); + llvm::Value *DstArrayEnd = + CGF.Builder.CreateInBoundsGEP(BC.getPointer(), SizeInBytes); + DstArrayEnd = CGF.Builder.CreateBitCast(DstArrayEnd, CGF.CGM.Int8PtrPtrTy, + "dstarray.end"); + llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock(); + + // Create the header block and insert the phi instructions. + llvm::BasicBlock *HeaderBB = CGF.createBasicBlock("loop.header"); + CGF.EmitBlock(HeaderBB); + llvm::PHINode *PHIs[N]; + + for (unsigned I = 0; I < N; ++I) { + PHIs[I] = CGF.Builder.CreatePHI(CGF.CGM.Int8PtrPtrTy, 2, "addr.cur"); + PHIs[I]->addIncoming(StartAddrs[I].getPointer(), PreheaderBB); + } + + // Create the exit and loop body blocks. + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("loop.exit"); + llvm::BasicBlock *LoopBB = CGF.createBasicBlock("loop.body"); + + // Emit the comparison and conditional branch instruction that jumps to + // either the exit or the loop body. + llvm::Value *Done = + CGF.Builder.CreateICmpEQ(PHIs[DstIdx], DstArrayEnd, "done"); + CGF.Builder.CreateCondBr(Done, ExitBB, LoopBB); + + // Visit the element of the array in the loop body. + CGF.EmitBlock(LoopBB); + QualType EltQT = AT->getElementType(); + CharUnits EltSize = Ctx.getTypeSizeInChars(EltQT); + std::array<Address, N> NewAddrs = Addrs; + + for (unsigned I = 0; I < N; ++I) + NewAddrs[I] = Address( + PHIs[I], StartAddrs[I].getAlignment().alignmentAtOffset(EltSize)); + + EltQT = IsVolatile ? EltQT.withVolatile() : EltQT; + this->asDerived().visitWithKind(FK, EltQT, nullptr, CharUnits::Zero(), + NewAddrs); + + LoopBB = CGF.Builder.GetInsertBlock(); + + for (unsigned I = 0; I < N; ++I) { + // Instrs to update the destination and source addresses. + // Update phi instructions. + NewAddrs[I] = getAddrWithOffset(NewAddrs[I], EltSize); + PHIs[I]->addIncoming(NewAddrs[I].getPointer(), LoopBB); + } + + // Insert an unconditional branch to the header block. + CGF.Builder.CreateBr(HeaderBB); + CGF.EmitBlock(ExitBB); + } + + /// Return an address with the specified offset from the passed address. + Address getAddrWithOffset(Address Addr, CharUnits Offset) { + assert(Addr.isValid() && "invalid address"); + if (Offset.getQuantity() == 0) + return Addr; + Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy); + Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(), + CharUnits::One()); + return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy); + } + + Address getAddrWithOffset(Address Addr, CharUnits StructFieldOffset, + const FieldDecl *FD) { + return getAddrWithOffset(Addr, StructFieldOffset + + asDerived().getFieldOffset(FD)); + } + + template <size_t N> + llvm::Function * + getFunction(StringRef FuncName, QualType QT, std::array<Address, N> Addrs, + std::array<CharUnits, N> Alignments, CodeGenModule &CGM) { + // If the special function already exists in the module, return it. + if (llvm::Function *F = CGM.getModule().getFunction(FuncName)) { + bool WrongType = false; + if (!F->getReturnType()->isVoidTy()) + WrongType = true; + else { + for (const llvm::Argument &Arg : F->args()) + if (Arg.getType() != CGM.Int8PtrPtrTy) + WrongType = true; + } + + if (WrongType) { + std::string FuncName = F->getName(); + SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation(); + CGM.Error(Loc, "special function " + FuncName + + " for non-trivial C struct has incorrect type"); + return nullptr; + } + return F; + } + + ASTContext &Ctx = CGM.getContext(); + FunctionArgList Args; + const CGFunctionInfo &FI = getFunctionInfo<N>(CGM, Args); + llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *F = + llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, + FuncName, &CGM.getModule()); + F->setVisibility(llvm::GlobalValue::HiddenVisibility); + CGM.SetLLVMFunctionAttributes(nullptr, FI, F); + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F); + IdentifierInfo *II = &Ctx.Idents.get(FuncName); + FunctionDecl *FD = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), + II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + CodeGenFunction NewCGF(CGM); + setCGF(&NewCGF); + CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); + + for (unsigned I = 0; I < N; ++I) { + llvm::Value *V = CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[I])); + Addrs[I] = Address(V, Alignments[I]); + } + + asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs); + CGF->FinishFunction(); + return F; + } + + template <size_t N> + void callFunc(StringRef FuncName, QualType QT, std::array<Address, N> Addrs, + CodeGenFunction &CallerCGF) { + std::array<CharUnits, N> Alignments; + llvm::Value *Ptrs[N]; + + for (unsigned I = 0; I < N; ++I) { + Alignments[I] = Addrs[I].getAlignment(); + Ptrs[I] = + CallerCGF.Builder.CreateBitCast(Addrs[I], CallerCGF.CGM.Int8PtrPtrTy) + .getPointer(); + } + + if (llvm::Function *F = + getFunction(FuncName, QT, Addrs, Alignments, CallerCGF.CGM)) + CallerCGF.EmitNounwindRuntimeCall(F, Ptrs); + } + + Derived &asDerived() { return static_cast<Derived &>(*this); } + + void setCGF(CodeGenFunction *F) { CGF = F; } + + CodeGenFunction *CGF = nullptr; +}; + +template <class Derived, bool IsMove> +struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>, + GenFuncBase<Derived> { + GenBinaryFunc(ASTContext &Ctx) : CopyStructVisitor<Derived, IsMove>(Ctx) {} + + void flushTrivialFields(std::array<Address, 2> Addrs) { + CharUnits Size = this->End - this->Start; + + if (Size.getQuantity() == 0) + return; + + Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], this->Start); + Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], this->Start); + + // Emit memcpy. + if (Size.getQuantity() >= 16 || !llvm::isPowerOf2_32(Size.getQuantity())) { + llvm::Value *SizeVal = + llvm::ConstantInt::get(this->CGF->SizeTy, Size.getQuantity()); + DstAddr = + this->CGF->Builder.CreateElementBitCast(DstAddr, this->CGF->Int8Ty); + SrcAddr = + this->CGF->Builder.CreateElementBitCast(SrcAddr, this->CGF->Int8Ty); + this->CGF->Builder.CreateMemCpy(DstAddr, SrcAddr, SizeVal, false); + } else { + llvm::Type *Ty = llvm::Type::getIntNTy( + this->CGF->getLLVMContext(), + Size.getQuantity() * this->CGF->getContext().getCharWidth()); + DstAddr = this->CGF->Builder.CreateElementBitCast(DstAddr, Ty); + SrcAddr = this->CGF->Builder.CreateElementBitCast(SrcAddr, Ty); + llvm::Value *SrcVal = this->CGF->Builder.CreateLoad(SrcAddr, false); + this->CGF->Builder.CreateStore(SrcVal, DstAddr, false); + } + + this->Start = this->End = CharUnits::Zero(); + } + + template <class... Ts> + void visitVolatileTrivial(QualType FT, const FieldDecl *FD, CharUnits Offset, + std::array<Address, 2> Addrs) { + LValue DstLV, SrcLV; + if (FD) { + QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0); + llvm::PointerType *PtrTy = this->CGF->ConvertType(RT)->getPointerTo(); + Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset); + LValue DstBase = this->CGF->MakeAddrLValue( + this->CGF->Builder.CreateBitCast(DstAddr, PtrTy), FT); + DstLV = this->CGF->EmitLValueForField(DstBase, FD); + Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], Offset); + LValue SrcBase = this->CGF->MakeAddrLValue( + this->CGF->Builder.CreateBitCast(SrcAddr, PtrTy), FT); + SrcLV = this->CGF->EmitLValueForField(SrcBase, FD); + } else { + llvm::PointerType *Ty = this->CGF->ConvertType(FT)->getPointerTo(); + Address DstAddr = this->CGF->Builder.CreateBitCast(Addrs[DstIdx], Ty); + Address SrcAddr = this->CGF->Builder.CreateBitCast(Addrs[SrcIdx], Ty); + DstLV = this->CGF->MakeAddrLValue(DstAddr, FT); + SrcLV = this->CGF->MakeAddrLValue(SrcAddr, FT); + } + RValue SrcVal = this->CGF->EmitLoadOfLValue(SrcLV, SourceLocation()); + this->CGF->EmitStoreThroughLValue(SrcVal, DstLV); + } +}; + +// These classes that emit the special functions for a non-trivial struct. +struct GenDestructor : StructVisitor<GenDestructor>, + GenFuncBase<GenDestructor>, + DestructedTypeVisitor<GenDestructor> { + using Super = DestructedTypeVisitor<GenDestructor>; + GenDestructor(ASTContext &Ctx) : StructVisitor<GenDestructor>(Ctx) {} + + void visitWithKind(QualType::DestructionKind DK, QualType FT, + const FieldDecl *FD, CharUnits CurStructOffset, + std::array<Address, 1> Addrs) { + if (const auto *AT = getContext().getAsArrayType(FT)) { + visitArray(DK, AT, FT.isVolatileQualified(), FD, CurStructOffset, Addrs); + return; + } + + Super::visitWithKind(DK, FT, FD, CurStructOffset, Addrs); + } + + void visitARCStrong(QualType QT, const FieldDecl *FD, + CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CGF->destroyARCStrongImprecise( + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 1> Addrs) { + CGF->destroyARCWeak( + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + } + + void callSpecialFunction(QualType FT, CharUnits Offset, + std::array<Address, 1> Addrs) { + CGF->callCStructDestructor( + CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT)); + } +}; + +struct GenDefaultInitialize + : StructVisitor<GenDefaultInitialize>, + GenFuncBase<GenDefaultInitialize>, + DefaultInitializedTypeVisitor<GenDefaultInitialize> { + using Super = DefaultInitializedTypeVisitor<GenDefaultInitialize>; + typedef GenFuncBase<GenDefaultInitialize> GenFuncBaseTy; + + GenDefaultInitialize(ASTContext &Ctx) + : StructVisitor<GenDefaultInitialize>(Ctx) {} + + void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT, + const FieldDecl *FD, CharUnits CurStructOffset, + std::array<Address, 1> Addrs) { + if (const auto *AT = getContext().getAsArrayType(FT)) { + visitArray(PDIK, AT, FT.isVolatileQualified(), FD, CurStructOffset, + Addrs); + return; + } + + Super::visitWithKind(PDIK, FT, FD, CurStructOffset, Addrs); + } + + void visitARCStrong(QualType QT, const FieldDecl *FD, + CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CGF->EmitNullInitialization( + getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 1> Addrs) { + CGF->EmitNullInitialization( + getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + } + + template <class FieldKind, size_t... Is> + void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, + const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 1> Addrs) { + if (!FK) + return visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs); + + ASTContext &Ctx = getContext(); + CharUnits Size = Ctx.getTypeSizeInChars(QualType(AT, 0)); + QualType EltTy = Ctx.getBaseElementType(QualType(AT, 0)); + + if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) { + GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStackOffset, Addrs); + return; + } + + llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity()); + Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty); + CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal, + IsVolatile); + } + + void callSpecialFunction(QualType FT, CharUnits Offset, + std::array<Address, 1> Addrs) { + CGF->callCStructDefaultConstructor( + CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT)); + } +}; + +struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> { + GenCopyConstructor(ASTContext &Ctx) + : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {} + + void visitARCStrong(QualType QT, const FieldDecl *FD, + CharUnits CurStackOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + llvm::Value *SrcVal = CGF->EmitLoadOfScalar( + Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); + llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal); + CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CGF->EmitARCCopyWeak(Addrs[DstIdx], Addrs[SrcIdx]); + } + + void callSpecialFunction(QualType FT, CharUnits Offset, + std::array<Address, 2> Addrs) { + CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), + CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); + } +}; + +struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { + GenMoveConstructor(ASTContext &Ctx) + : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {} + + void visitARCStrong(QualType QT, const FieldDecl *FD, + CharUnits CurStackOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); + llvm::Value *SrcVal = + CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); + CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV); + CGF->EmitStoreOfScalar(SrcVal, CGF->MakeAddrLValue(Addrs[DstIdx], QT), + /* isInitialization */ true); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CGF->EmitARCMoveWeak(Addrs[DstIdx], Addrs[SrcIdx]); + } + + void callSpecialFunction(QualType FT, CharUnits Offset, + std::array<Address, 2> Addrs) { + CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), + CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); + } +}; + +struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> { + GenCopyAssignment(ASTContext &Ctx) + : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {} + + void visitARCStrong(QualType QT, const FieldDecl *FD, + CharUnits CurStackOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + llvm::Value *SrcVal = CGF->EmitLoadOfScalar( + Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); + CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal, + false); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CGF->emitARCCopyAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); + } + + void callSpecialFunction(QualType FT, CharUnits Offset, + std::array<Address, 2> Addrs) { + CGF->callCStructCopyAssignmentOperator( + CGF->MakeAddrLValue(Addrs[DstIdx], FT), + CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); + } +}; + +struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { + GenMoveAssignment(ASTContext &Ctx) + : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {} + + void visitARCStrong(QualType QT, const FieldDecl *FD, + CharUnits CurStackOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); + llvm::Value *SrcVal = + CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); + CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV); + LValue DstLV = CGF->MakeAddrLValue(Addrs[DstIdx], QT); + llvm::Value *DstVal = + CGF->EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); + CGF->EmitStoreOfScalar(SrcVal, DstLV); + CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CGF->emitARCMoveAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); + } + + void callSpecialFunction(QualType FT, CharUnits Offset, + std::array<Address, 2> Addrs) { + CGF->callCStructMoveAssignmentOperator( + CGF->MakeAddrLValue(Addrs[DstIdx], FT), + CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); + } +}; + +} // namespace + +void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF, + Address Addr, QualType Type) { + CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Type)); +} + +// Default-initialize a variable that is a non-trivial struct or an array of +// such structure. +void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) { + GenDefaultInitialize Gen(getContext()); + Address DstPtr = Builder.CreateBitCast(Dst.getAddress(), CGM.Int8PtrPtrTy); + Gen.setCGF(this); + QualType QT = Dst.getType(); + QT = Dst.isVolatile() ? QT.withVolatile() : QT; + Gen.visit(QT, nullptr, CharUnits::Zero(), std::array<Address, 1>({{DstPtr}})); +} + +template <class G, size_t N> +static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, + bool IsVolatile, CodeGenFunction &CGF, + std::array<Address, N> Addrs) { + for (unsigned I = 0; I < N; ++I) + Addrs[I] = CGF.Builder.CreateBitCast(Addrs[I], CGF.CGM.Int8PtrPtrTy); + QT = IsVolatile ? QT.withVolatile() : QT; + Gen.callFunc(FuncName, QT, Addrs, CGF); +} + +// Functions to emit calls to the special functions of a non-trivial C struct. +void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { + bool IsVolatile = Dst.isVolatile(); + Address DstPtr = Dst.getAddress(); + QualType QT = Dst.getType(); + GenDefaultInitializeFuncName GenName(DstPtr.getAlignment(), getContext()); + std::string FuncName = GenName.getName(QT, IsVolatile); + callSpecialFunction(GenDefaultInitialize(getContext()), FuncName, QT, + IsVolatile, *this, std::array<Address, 1>({{DstPtr}})); +} + +void CodeGenFunction::callCStructDestructor(LValue Dst) { + bool IsVolatile = Dst.isVolatile(); + Address DstPtr = Dst.getAddress(); + QualType QT = Dst.getType(); + GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext()); + std::string FuncName = GenName.getName(QT, IsVolatile); + callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile, + *this, std::array<Address, 1>({{DstPtr}})); +} + +void CodeGenFunction::callCStructCopyConstructor(LValue Dst, LValue Src) { + bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); + Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + QualType QT = Dst.getType(); + GenBinaryFuncName<false> GenName("__copy_constructor_", DstPtr.getAlignment(), + SrcPtr.getAlignment(), getContext()); + std::string FuncName = GenName.getName(QT, IsVolatile); + callSpecialFunction(GenCopyConstructor(getContext()), FuncName, QT, + IsVolatile, *this, + std::array<Address, 2>({{DstPtr, SrcPtr}})); +} + +void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src + +) { + bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); + Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + QualType QT = Dst.getType(); + GenBinaryFuncName<false> GenName("__copy_assignment_", DstPtr.getAlignment(), + SrcPtr.getAlignment(), getContext()); + std::string FuncName = GenName.getName(QT, IsVolatile); + callSpecialFunction(GenCopyAssignment(getContext()), FuncName, QT, IsVolatile, + *this, std::array<Address, 2>({{DstPtr, SrcPtr}})); +} + +void CodeGenFunction::callCStructMoveConstructor(LValue Dst, LValue Src) { + bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); + Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + QualType QT = Dst.getType(); + GenBinaryFuncName<true> GenName("__move_constructor_", DstPtr.getAlignment(), + SrcPtr.getAlignment(), getContext()); + std::string FuncName = GenName.getName(QT, IsVolatile); + callSpecialFunction(GenMoveConstructor(getContext()), FuncName, QT, + IsVolatile, *this, + std::array<Address, 2>({{DstPtr, SrcPtr}})); +} + +void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src + +) { + bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); + Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + QualType QT = Dst.getType(); + GenBinaryFuncName<true> GenName("__move_assignment_", DstPtr.getAlignment(), + SrcPtr.getAlignment(), getContext()); + std::string FuncName = GenName.getName(QT, IsVolatile); + callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile, + *this, std::array<Address, 2>({{DstPtr, SrcPtr}})); +} diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index f26263d9472d..81c1201c0e06 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -259,7 +259,7 @@ llvm::Value *CodeGenFunction::EmitObjCProtocolExpr(const ObjCProtocolExpr *E) { return CGM.getObjCRuntime().GenerateProtocolRef(*this, E->getProtocol()); } -/// \brief Adjust the type of an Objective-C object that doesn't match up due +/// Adjust the type of an Objective-C object that doesn't match up due /// to type erasure at various points, e.g., related result types or the use /// of parameterized classes. static RValue AdjustObjCObjectType(CodeGenFunction &CGF, QualType ExpT, @@ -803,7 +803,7 @@ PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM, Kind = Native; } -/// \brief Generate an Objective-C property getter function. +/// Generate an Objective-C property getter function. /// /// The given Decl must be an ObjCImplementationDecl. \@synthesize /// is illegal within a category. @@ -1008,12 +1008,14 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, /*init*/ true); return; } - case TEK_Aggregate: + case TEK_Aggregate: { // The return value slot is guaranteed to not be aliased, but // that's not necessarily the same as "on the stack", so // we still potentially need objc_memmove_collectable. - EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType); + EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType), + /* Src= */ LV, ivarType, overlapForReturnValue()); return; + } case TEK_Scalar: { llvm::Value *value; if (propType->isReferenceType()) { @@ -1334,7 +1336,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, EmitStmt(&assign); } -/// \brief Generate an Objective-C property setter function. +/// Generate an Objective-C property setter function. /// /// The given Decl must be an ObjCImplementationDecl. \@synthesize /// is illegal within a category. @@ -1438,7 +1440,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, EmitAggExpr(IvarInit->getInit(), AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); } // constructor returns 'self'. CodeGenTypes &Types = CGM.getTypes(); @@ -1814,22 +1817,6 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { } -static bool IsForwarding(StringRef Name) { - return llvm::StringSwitch<bool>(Name) - .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV - "objc_autorelease", // ARCInstKind::Autorelease - "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV - "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV - "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease - "objc_retainedObject", // ARCInstKind::NoopCast - "objc_retain", // ARCInstKind::Retain - "objc_unretainedObject", // ARCInstKind::NoopCast - "objc_unretainedPointer", // ARCInstKind::NoopCast - "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV - true) - .Default(false); -} - static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, llvm::FunctionType *FTy, StringRef Name) { @@ -1847,9 +1834,6 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, // performance. F->addFnAttr(llvm::Attribute::NonLazyBind); } - - if (IsForwarding(Name)) - F->arg_begin()->addAttr(llvm::Attribute::Returned); } return RTF; @@ -2052,7 +2036,7 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // Call the marker asm if we made one, which we do only at -O0. if (marker) - CGF.Builder.CreateCall(marker); + CGF.Builder.CreateCall(marker, None, CGF.getBundlesForFunclet(marker)); } /// Retain the given object which is the result of a function call. @@ -2070,7 +2054,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { /// Claim a possibly-autoreleased return value at +0. This is only /// valid to do in contexts which do not rely on the retain to keep -/// the object valid for for all of its uses; for example, when +/// the object valid for all of its uses; for example, when /// the value is ignored, or when it is being assigned to an /// __unsafe_unretained variable. /// @@ -2325,6 +2309,21 @@ void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) { "objc_copyWeak"); } +void CodeGenFunction::emitARCCopyAssignWeak(QualType Ty, Address DstAddr, + Address SrcAddr) { + llvm::Value *Object = EmitARCLoadWeakRetained(SrcAddr); + Object = EmitObjCConsumeObject(Ty, Object); + EmitARCStoreWeak(DstAddr, Object, false); +} + +void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr, + Address SrcAddr) { + llvm::Value *Object = EmitARCLoadWeakRetained(SrcAddr); + Object = EmitObjCConsumeObject(Ty, Object); + EmitARCStoreWeak(DstAddr, Object, false); + EmitARCDestroyWeak(SrcAddr); +} + /// Produce the code to do a objc_autoreleasepool_push. /// call i8* \@objc_autoreleasePoolPush(void) llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { @@ -3261,19 +3260,19 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( "__assign_helper_atomic_property_", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); StartFunction(FD, C.VoidTy, Fn, FI, args); DeclRefExpr DstExpr(&DstDecl, false, DestTy, VK_RValue, SourceLocation()); UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(), - VK_LValue, OK_Ordinary, SourceLocation()); + VK_LValue, OK_Ordinary, SourceLocation(), false); DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), - VK_LValue, OK_Ordinary, SourceLocation()); + VK_LValue, OK_Ordinary, SourceLocation(), false); Expr *Args[2] = { &DST, &SRC }; CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment()); @@ -3342,8 +3341,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_atomic_property_", &CGM.getModule()); - - CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); StartFunction(FD, C.VoidTy, Fn, FI, args); @@ -3351,7 +3350,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( VK_RValue, SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), - VK_LValue, OK_Ordinary, SourceLocation()); + VK_LValue, OK_Ordinary, SourceLocation(), false); CXXConstructExpr *CXXConstExpr = cast<CXXConstructExpr>(PID->getGetterCXXConstructor()); @@ -3384,7 +3383,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); FinishFunction(); HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index c8b8be7f4552..6a0554b46b1c 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -34,11 +34,24 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ConvertUTF.h" +#include <cctype> using namespace clang; using namespace CodeGen; namespace { + +std::string SymbolNameForMethod( StringRef ClassName, + StringRef CategoryName, const Selector MethodName, + bool isClassMethod) { + std::string MethodNameColonStripped = MethodName.getAsString(); + std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(), + ':', '_'); + return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" + + CategoryName + "_" + MethodNameColonStripped).str(); +} + /// Class that lazily initialises the runtime function. Avoids inserting the /// types and the function declaration into a module if they're not used, and /// avoids constructing the type more than once if it's used more than once. @@ -80,8 +93,7 @@ public: if (!Function) { if (!FunctionName) return nullptr; - Function = - cast<llvm::Constant>(CGM->CreateRuntimeFunction(FTy, FunctionName)); + Function = CGM->CreateRuntimeFunction(FTy, FunctionName); } return Function; } @@ -114,6 +126,10 @@ protected: /// Pointer to i8 - LLVM type of char*, for all of the places where the /// runtime needs to deal with C strings. llvm::PointerType *PtrToInt8Ty; + /// struct objc_protocol type + llvm::StructType *ProtocolTy; + /// Protocol * type. + llvm::PointerType *ProtocolPtrTy; /// Instance Method Pointer type. This is a pointer to a function that takes, /// at a minimum, an object and a selector, and is the generic type for /// Objective-C methods. Due to differences between variadic / non-variadic @@ -156,11 +172,29 @@ protected: llvm::IntegerType *Int32Ty; /// 64-bit integer type, to save us needing to look it up every time it's used. llvm::IntegerType *Int64Ty; + /// The type of struct objc_property. + llvm::StructType *PropertyMetadataTy; /// Metadata kind used to tie method lookups to message sends. The GNUstep /// runtime provides some LLVM passes that can use this to do things like /// automatic IMP caching and speculative inlining. unsigned msgSendMDKind; + /// Helper to check if we are targeting a specific runtime version or later. + bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) { + const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; + return (R.getKind() == kind) && + (R.getVersion() >= VersionTuple(major, minor)); + } + + std::string SymbolForProtocol(StringRef Name) { + return (StringRef("._OBJC_PROTOCOL_") + Name).str(); + } + + std::string SymbolForProtocolRef(StringRef Name) { + return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str(); + } + + /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. @@ -174,39 +208,28 @@ protected: /// string value. This allows the linker to combine the strings between /// different modules. Used for EH typeinfo names, selector strings, and a /// few other things. - llvm::Constant *ExportUniqueString(const std::string &Str, StringRef Prefix) { - std::string Name = Prefix.str() + Str; - auto *ConstStr = TheModule.getGlobalVariable(Name); + llvm::Constant *ExportUniqueString(const std::string &Str, + const std::string &prefix, + bool Private=false) { + std::string name = prefix + Str; + auto *ConstStr = TheModule.getGlobalVariable(name); if (!ConstStr) { llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str); - ConstStr = new llvm::GlobalVariable(TheModule, value->getType(), true, - llvm::GlobalValue::LinkOnceODRLinkage, - value, Name); + auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true, + llvm::GlobalValue::LinkOnceODRLinkage, value, name); + if (Private) + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + ConstStr = GV; } return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(), ConstStr, Zeros); } - /// Generates a global structure, initialized by the elements in the vector. - /// The element types must match the types of the structure elements in the - /// first argument. - llvm::GlobalVariable *MakeGlobal(llvm::Constant *C, - CharUnits Align, - StringRef Name="", - llvm::GlobalValue::LinkageTypes linkage - =llvm::GlobalValue::InternalLinkage) { - auto GV = new llvm::GlobalVariable(TheModule, C->getType(), false, - linkage, C, Name); - GV->setAlignment(Align.getQuantity()); - return GV; - } - /// Returns a property name and encoding string. llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD, const Decl *Container) { - const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; - if ((R.getKind() == ObjCRuntime::GNUstep) && - (R.getVersion() >= VersionTuple(1, 6))) { + assert(!isRuntime(ObjCRuntime::GNUstep, 2)); + if (isRuntime(ObjCRuntime::GNUstep, 1, 6)) { std::string NameAndAttributes; std::string TypeStr = CGM.getContext().getObjCEncodingForPropertyDecl(PD, Container); @@ -222,7 +245,7 @@ protected: /// Push the property attributes into two structure fields. void PushPropertyAttributes(ConstantStructBuilder &Fields, - ObjCPropertyDecl *property, bool isSynthesized=true, bool + const ObjCPropertyDecl *property, bool isSynthesized=true, bool isDynamic=true) { int attrs = property->getPropertyAttributes(); // For read-only properties, clear the copy and retain flags @@ -249,6 +272,46 @@ protected: Fields.addInt(Int8Ty, 0); } + virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields, + int count) { + // int count; + Fields.addInt(IntTy, count); + // int size; (only in GNUstep v2 ABI. + if (isRuntime(ObjCRuntime::GNUstep, 2)) { + llvm::DataLayout td(&TheModule); + Fields.addInt(IntTy, td.getTypeSizeInBits(PropertyMetadataTy) / + CGM.getContext().getCharWidth()); + } + // struct objc_property_list *next; + Fields.add(NULLPtr); + // struct objc_property properties[] + return Fields.beginArray(PropertyMetadataTy); + } + virtual void PushProperty(ConstantArrayBuilder &PropertiesArray, + const ObjCPropertyDecl *property, + const Decl *OCD, + bool isSynthesized=true, bool + isDynamic=true) { + auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy); + ASTContext &Context = CGM.getContext(); + Fields.add(MakePropertyEncodingString(property, OCD)); + PushPropertyAttributes(Fields, property, isSynthesized, isDynamic); + auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { + if (accessor) { + std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor); + llvm::Constant *TypeEncoding = MakeConstantString(TypeStr); + Fields.add(MakeConstantString(accessor->getSelector().getAsString())); + Fields.add(TypeEncoding); + } else { + Fields.add(NULLPtr); + Fields.add(NULLPtr); + } + }; + addPropertyMethod(property->getGetterMethodDecl()); + addPropertyMethod(property->getSetterMethodDecl()); + Fields.finishAndAddTo(PropertiesArray); + } + /// Ensures that the value has the required type, by inserting a bitcast if /// required. This function lets us avoid inserting bitcasts that are /// redundant. @@ -268,7 +331,8 @@ protected: /// LLVM context. llvm::LLVMContext &VMContext; -private: +protected: + /// Placeholder for the class. Lots of things refer to the class before we've /// actually emitted it. We use this alias as a placeholder, and then replace /// it with a pointer to the class structure before finally emitting the @@ -352,6 +416,7 @@ private: /// Function used for non-object declared property setters. LazyRuntimeFunction SetStructPropertyFn; +protected: /// The version of the runtime that this class targets. Must match the /// version in the runtime. int RuntimeVersion; @@ -362,14 +427,18 @@ private: /// Objective-C 1 property structures when targeting the GCC runtime or it /// will abort. const int ProtocolVersion; - + /// The version of the class ABI. This value is used in the class structure + /// and indicates how various fields should be interpreted. + const int ClassABIVersion; /// Generates an instance variable list structure. This is a structure /// containing a size and an array of structures containing instance variable /// metadata. This is used purely for introspection in the fragile ABI. In /// the non-fragile ABI, it's used for instance variable fixup. - llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, - ArrayRef<llvm::Constant *> IvarTypes, - ArrayRef<llvm::Constant *> IvarOffsets); + virtual llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, + ArrayRef<llvm::Constant *> IvarTypes, + ArrayRef<llvm::Constant *> IvarOffsets, + ArrayRef<llvm::Constant *> IvarAlign, + ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership); /// Generates a method list structure. This is a structure containing a size /// and an array of structures containing method metadata. @@ -378,20 +447,20 @@ private: /// pointer allowing them to be chained together in a linked list. llvm::Constant *GenerateMethodList(StringRef ClassName, StringRef CategoryName, - ArrayRef<Selector> MethodSels, - ArrayRef<llvm::Constant *> MethodTypes, + ArrayRef<const ObjCMethodDecl*> Methods, bool isClassMethodList); /// Emits an empty protocol. This is used for \@protocol() where no protocol /// is found. The runtime will (hopefully) fix up the pointer to refer to the /// real protocol. - llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName); + virtual llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName); /// Generates a list of property metadata structures. This follows the same /// pattern as method and instance variable metadata lists. - llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID, - SmallVectorImpl<Selector> &InstanceMethodSels, - SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes); + llvm::Constant *GeneratePropertyList(const Decl *Container, + const ObjCContainerDecl *OCD, + bool isClassProperty=false, + bool protocolOptionalProperties=false); /// Generates a list of referenced protocols. Classes, categories, and /// protocols all use this structure. @@ -422,22 +491,42 @@ private: /// Generates a method list. This is used by protocols to define the required /// and optional methods. - llvm::Constant *GenerateProtocolMethodList( - ArrayRef<llvm::Constant *> MethodNames, - ArrayRef<llvm::Constant *> MethodTypes); + virtual llvm::Constant *GenerateProtocolMethodList( + ArrayRef<const ObjCMethodDecl*> Methods); + /// Emits optional and required method lists. + template<class T> + void EmitProtocolMethodList(T &&Methods, llvm::Constant *&Required, + llvm::Constant *&Optional) { + SmallVector<const ObjCMethodDecl*, 16> RequiredMethods; + SmallVector<const ObjCMethodDecl*, 16> OptionalMethods; + for (const auto *I : Methods) + if (I->isOptional()) + OptionalMethods.push_back(I); + else + RequiredMethods.push_back(I); + Required = GenerateProtocolMethodList(RequiredMethods); + Optional = GenerateProtocolMethodList(OptionalMethods); + } /// Returns a selector with the specified type encoding. An empty string is /// used to return an untyped selector (with the types field set to NULL). - llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, + virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding); + /// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this + /// contains the class and ivar names, in the v2 ABI this contains the type + /// encoding as well. + virtual std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID, + const ObjCIvarDecl *Ivar) { + const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString() + + '.' + Ivar->getNameAsString(); + return Name; + } /// Returns the variable used to store the offset of an instance variable. llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar); /// Emits a reference to a class. This allows the linker to object if there /// is no class of the matching name. - -protected: void EmitClassRef(const std::string &className); /// Emits a pointer to the named class @@ -476,7 +565,7 @@ protected: public: CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, - unsigned protocolClassVersion); + unsigned protocolClassVersion, unsigned classABI=1); ConstantAddress GenerateConstantString(const StringLiteral *) override; @@ -499,6 +588,14 @@ public: Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override; llvm::Value *GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl *Method) override; + virtual llvm::Constant *GetConstantSelector(Selector Sel, + const std::string &TypeEncoding) { + llvm_unreachable("Runtime unable to generate constant selector"); + } + llvm::Constant *GetConstantSelector(const ObjCMethodDecl *M) { + return GetConstantSelector(M->getSelector(), + CGM.getContext().getObjCEncodingForMethodDecl(M)); + } llvm::Constant *GetEHType(QualType T) override; llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD, @@ -698,7 +795,10 @@ class CGObjCGNUstep : public CGObjCGNU { } public: - CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) { + CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNUstep(Mod, 9, 3, 1) {} + CGObjCGNUstep(CodeGenModule &Mod, unsigned ABI, unsigned ProtocolABI, + unsigned ClassABI) : + CGObjCGNU(Mod, ABI, ProtocolABI, ClassABI) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; llvm::StructType *SlotStructTy = @@ -707,7 +807,7 @@ class CGObjCGNUstep : public CGObjCGNU { // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender); SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy, SelectorTy, IdTy); - // Slot_t objc_msg_lookup_super(struct objc_super*, SEL); + // Slot_t objc_slot_lookup_super(struct objc_super*, SEL); SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, PtrToObjCSuperTy, SelectorTy); // If we're in ObjC++ mode, then we want to make @@ -784,6 +884,951 @@ class CGObjCGNUstep : public CGObjCGNU { } }; +/// GNUstep Objective-C ABI version 2 implementation. +/// This is the ABI that provides a clean break with the legacy GCC ABI and +/// cleans up a number of things that were added to work around 1980s linkers. +class CGObjCGNUstep2 : public CGObjCGNUstep { + /// The section for selectors. + static constexpr const char *const SelSection = "__objc_selectors"; + /// The section for classes. + static constexpr const char *const ClsSection = "__objc_classes"; + /// The section for references to classes. + static constexpr const char *const ClsRefSection = "__objc_class_refs"; + /// The section for categories. + static constexpr const char *const CatSection = "__objc_cats"; + /// The section for protocols. + static constexpr const char *const ProtocolSection = "__objc_protocols"; + /// The section for protocol references. + static constexpr const char *const ProtocolRefSection = "__objc_protocol_refs"; + /// The section for class aliases + static constexpr const char *const ClassAliasSection = "__objc_class_aliases"; + /// The section for constexpr constant strings + static constexpr const char *const ConstantStringSection = "__objc_constant_string"; + /// The GCC ABI superclass message lookup function. Takes a pointer to a + /// structure describing the receiver and the class, and a selector as + /// arguments. Returns the IMP for the corresponding method. + LazyRuntimeFunction MsgLookupSuperFn; + /// A flag indicating if we've emitted at least one protocol. + /// If we haven't, then we need to emit an empty protocol, to ensure that the + /// __start__objc_protocols and __stop__objc_protocols sections exist. + bool EmittedProtocol = false; + /// A flag indicating if we've emitted at least one protocol reference. + /// If we haven't, then we need to emit an empty protocol, to ensure that the + /// __start__objc_protocol_refs and __stop__objc_protocol_refs sections + /// exist. + bool EmittedProtocolRef = false; + /// A flag indicating if we've emitted at least one class. + /// If we haven't, then we need to emit an empty protocol, to ensure that the + /// __start__objc_classes and __stop__objc_classes sections / exist. + bool EmittedClass = false; + /// Generate the name of a symbol for a reference to a class. Accesses to + /// classes should be indirected via this. + std::string SymbolForClassRef(StringRef Name, bool isWeak) { + if (isWeak) + return (StringRef("._OBJC_WEAK_REF_CLASS_") + Name).str(); + else + return (StringRef("._OBJC_REF_CLASS_") + Name).str(); + } + /// Generate the name of a class symbol. + std::string SymbolForClass(StringRef Name) { + return (StringRef("._OBJC_CLASS_") + Name).str(); + } + void CallRuntimeFunction(CGBuilderTy &B, StringRef FunctionName, + ArrayRef<llvm::Value*> Args) { + SmallVector<llvm::Type *,8> Types; + for (auto *Arg : Args) + Types.push_back(Arg->getType()); + llvm::FunctionType *FT = llvm::FunctionType::get(B.getVoidTy(), Types, + false); + llvm::Value *Fn = CGM.CreateRuntimeFunction(FT, FunctionName); + B.CreateCall(Fn, Args); + } + + ConstantAddress GenerateConstantString(const StringLiteral *SL) override { + + auto Str = SL->getString(); + CharUnits Align = CGM.getPointerAlign(); + + // Look for an existing one + llvm::StringMap<llvm::Constant*>::iterator old = ObjCStrings.find(Str); + if (old != ObjCStrings.end()) + return ConstantAddress(old->getValue(), Align); + + bool isNonASCII = SL->containsNonAscii(); + + auto LiteralLength = SL->getLength(); + + if ((CGM.getTarget().getPointerWidth(0) == 64) && + (LiteralLength < 9) && !isNonASCII) { + // Tiny strings are only used on 64-bit platforms. They store 8 7-bit + // ASCII characters in the high 56 bits, followed by a 4-bit length and a + // 3-bit tag (which is always 4). + uint64_t str = 0; + // Fill in the characters + for (unsigned i=0 ; i<LiteralLength ; i++) + str |= ((uint64_t)SL->getCodeUnit(i)) << ((64 - 4 - 3) - (i*7)); + // Fill in the length + str |= LiteralLength << 3; + // Set the tag + str |= 4; + auto *ObjCStr = llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(Int64Ty, str), IdTy); + ObjCStrings[Str] = ObjCStr; + return ConstantAddress(ObjCStr, Align); + } + + StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass; + + if (StringClass.empty()) StringClass = "NSConstantString"; + + std::string Sym = SymbolForClass(StringClass); + + llvm::Constant *isa = TheModule.getNamedGlobal(Sym); + + if (!isa) + isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, + llvm::GlobalValue::ExternalLinkage, nullptr, Sym); + else if (isa->getType() != PtrToIdTy) + isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); + + // struct + // { + // Class isa; + // uint32_t flags; + // uint32_t length; // Number of codepoints + // uint32_t size; // Number of bytes + // uint32_t hash; + // const char *data; + // }; + + ConstantInitBuilder Builder(CGM); + auto Fields = Builder.beginStruct(); + Fields.add(isa); + // For now, all non-ASCII strings are represented as UTF-16. As such, the + // number of bytes is simply double the number of UTF-16 codepoints. In + // ASCII strings, the number of bytes is equal to the number of non-ASCII + // codepoints. + if (isNonASCII) { + unsigned NumU8CodeUnits = Str.size(); + // A UTF-16 representation of a unicode string contains at most the same + // number of code units as a UTF-8 representation. Allocate that much + // space, plus one for the final null character. + SmallVector<llvm::UTF16, 128> ToBuf(NumU8CodeUnits + 1); + const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)Str.data(); + llvm::UTF16 *ToPtr = &ToBuf[0]; + (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumU8CodeUnits, + &ToPtr, ToPtr + NumU8CodeUnits, llvm::strictConversion); + uint32_t StringLength = ToPtr - &ToBuf[0]; + // Add null terminator + *ToPtr = 0; + // Flags: 2 indicates UTF-16 encoding + Fields.addInt(Int32Ty, 2); + // Number of UTF-16 codepoints + Fields.addInt(Int32Ty, StringLength); + // Number of bytes + Fields.addInt(Int32Ty, StringLength * 2); + // Hash. Not currently initialised by the compiler. + Fields.addInt(Int32Ty, 0); + // pointer to the data string. + auto Arr = llvm::makeArrayRef(&ToBuf[0], ToPtr+1); + auto *C = llvm::ConstantDataArray::get(VMContext, Arr); + auto *Buffer = new llvm::GlobalVariable(TheModule, C->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); + Buffer->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + Fields.add(Buffer); + } else { + // Flags: 0 indicates ASCII encoding + Fields.addInt(Int32Ty, 0); + // Number of UTF-16 codepoints, each ASCII byte is a UTF-16 codepoint + Fields.addInt(Int32Ty, Str.size()); + // Number of bytes + Fields.addInt(Int32Ty, Str.size()); + // Hash. Not currently initialised by the compiler. + Fields.addInt(Int32Ty, 0); + // Data pointer + Fields.add(MakeConstantString(Str)); + } + std::string StringName; + bool isNamed = !isNonASCII; + if (isNamed) { + StringName = ".objc_str_"; + for (int i=0,e=Str.size() ; i<e ; ++i) { + unsigned char c = Str[i]; + if (isalnum(c)) + StringName += c; + else if (c == ' ') + StringName += '_'; + else { + isNamed = false; + break; + } + } + } + auto *ObjCStrGV = + Fields.finishAndCreateGlobal( + isNamed ? StringRef(StringName) : ".objc_string", + Align, false, isNamed ? llvm::GlobalValue::LinkOnceODRLinkage + : llvm::GlobalValue::PrivateLinkage); + ObjCStrGV->setSection(ConstantStringSection); + if (isNamed) { + ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName)); + ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility); + } + llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy); + ObjCStrings[Str] = ObjCStr; + ConstantStrings.push_back(ObjCStr); + return ConstantAddress(ObjCStr, Align); + } + + void PushProperty(ConstantArrayBuilder &PropertiesArray, + const ObjCPropertyDecl *property, + const Decl *OCD, + bool isSynthesized=true, bool + isDynamic=true) override { + // struct objc_property + // { + // const char *name; + // const char *attributes; + // const char *type; + // SEL getter; + // SEL setter; + // }; + auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy); + ASTContext &Context = CGM.getContext(); + Fields.add(MakeConstantString(property->getNameAsString())); + std::string TypeStr = + CGM.getContext().getObjCEncodingForPropertyDecl(property, OCD); + Fields.add(MakeConstantString(TypeStr)); + std::string typeStr; + Context.getObjCEncodingForType(property->getType(), typeStr); + Fields.add(MakeConstantString(typeStr)); + auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { + if (accessor) { + std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor); + Fields.add(GetConstantSelector(accessor->getSelector(), TypeStr)); + } else { + Fields.add(NULLPtr); + } + }; + addPropertyMethod(property->getGetterMethodDecl()); + addPropertyMethod(property->getSetterMethodDecl()); + Fields.finishAndAddTo(PropertiesArray); + } + + llvm::Constant * + GenerateProtocolMethodList(ArrayRef<const ObjCMethodDecl*> Methods) override { + // struct objc_protocol_method_description + // { + // SEL selector; + // const char *types; + // }; + llvm::StructType *ObjCMethodDescTy = + llvm::StructType::get(CGM.getLLVMContext(), + { PtrToInt8Ty, PtrToInt8Ty }); + ASTContext &Context = CGM.getContext(); + ConstantInitBuilder Builder(CGM); + // struct objc_protocol_method_description_list + // { + // int count; + // int size; + // struct objc_protocol_method_description methods[]; + // }; + auto MethodList = Builder.beginStruct(); + // int count; + MethodList.addInt(IntTy, Methods.size()); + // int size; // sizeof(struct objc_method_description) + llvm::DataLayout td(&TheModule); + MethodList.addInt(IntTy, td.getTypeSizeInBits(ObjCMethodDescTy) / + CGM.getContext().getCharWidth()); + // struct objc_method_description[] + auto MethodArray = MethodList.beginArray(ObjCMethodDescTy); + for (auto *M : Methods) { + auto Method = MethodArray.beginStruct(ObjCMethodDescTy); + Method.add(CGObjCGNU::GetConstantSelector(M)); + Method.add(GetTypeString(Context.getObjCEncodingForMethodDecl(M, true))); + Method.finishAndAddTo(MethodArray); + } + MethodArray.finishAndAddTo(MethodList); + return MethodList.finishAndCreateGlobal(".objc_protocol_method_list", + CGM.getPointerAlign()); + } + + llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, + llvm::Value *cmd, MessageSendInfo &MSI) override { + // Don't access the slot unless we're trying to cache the result. + CGBuilderTy &Builder = CGF.Builder; + llvm::Value *lookupArgs[] = {CGObjCGNU::EnforceType(Builder, ObjCSuper, + PtrToObjCSuperTy).getPointer(), cmd}; + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); + } + + llvm::GlobalVariable *GetClassVar(StringRef Name, bool isWeak=false) { + std::string SymbolName = SymbolForClassRef(Name, isWeak); + auto *ClassSymbol = TheModule.getNamedGlobal(SymbolName); + if (ClassSymbol) + return ClassSymbol; + ClassSymbol = new llvm::GlobalVariable(TheModule, + IdTy, false, llvm::GlobalValue::ExternalLinkage, + nullptr, SymbolName); + // If this is a weak symbol, then we are creating a valid definition for + // the symbol, pointing to a weak definition of the real class pointer. If + // this is not a weak reference, then we are expecting another compilation + // unit to provide the real indirection symbol. + if (isWeak) + ClassSymbol->setInitializer(new llvm::GlobalVariable(TheModule, + Int8Ty, false, llvm::GlobalValue::ExternalWeakLinkage, + nullptr, SymbolForClass(Name))); + assert(ClassSymbol->getName() == SymbolName); + return ClassSymbol; + } + llvm::Value *GetClassNamed(CodeGenFunction &CGF, + const std::string &Name, + bool isWeak) override { + return CGF.Builder.CreateLoad(Address(GetClassVar(Name, isWeak), + CGM.getPointerAlign())); + } + int32_t FlagsForOwnership(Qualifiers::ObjCLifetime Ownership) { + // typedef enum { + // ownership_invalid = 0, + // ownership_strong = 1, + // ownership_weak = 2, + // ownership_unsafe = 3 + // } ivar_ownership; + int Flag; + switch (Ownership) { + case Qualifiers::OCL_Strong: + Flag = 1; + break; + case Qualifiers::OCL_Weak: + Flag = 2; + break; + case Qualifiers::OCL_ExplicitNone: + Flag = 3; + break; + case Qualifiers::OCL_None: + case Qualifiers::OCL_Autoreleasing: + assert(Ownership != Qualifiers::OCL_Autoreleasing); + Flag = 0; + } + return Flag; + } + llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, + ArrayRef<llvm::Constant *> IvarTypes, + ArrayRef<llvm::Constant *> IvarOffsets, + ArrayRef<llvm::Constant *> IvarAlign, + ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership) override { + llvm_unreachable("Method should not be called!"); + } + + llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName) override { + std::string Name = SymbolForProtocol(ProtocolName); + auto *GV = TheModule.getGlobalVariable(Name); + if (!GV) { + // Emit a placeholder symbol. + GV = new llvm::GlobalVariable(TheModule, ProtocolTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, Name); + GV->setAlignment(CGM.getPointerAlign().getQuantity()); + } + return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy); + } + + /// Existing protocol references. + llvm::StringMap<llvm::Constant*> ExistingProtocolRefs; + + llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, + const ObjCProtocolDecl *PD) override { + auto Name = PD->getNameAsString(); + auto *&Ref = ExistingProtocolRefs[Name]; + if (!Ref) { + auto *&Protocol = ExistingProtocols[Name]; + if (!Protocol) + Protocol = GenerateProtocolRef(PD); + std::string RefName = SymbolForProtocolRef(Name); + assert(!TheModule.getGlobalVariable(RefName)); + // Emit a reference symbol. + auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy, + false, llvm::GlobalValue::ExternalLinkage, + llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName); + GV->setSection(ProtocolRefSection); + GV->setAlignment(CGM.getPointerAlign().getQuantity()); + Ref = GV; + } + EmittedProtocolRef = true; + return CGF.Builder.CreateAlignedLoad(Ref, CGM.getPointerAlign()); + } + + llvm::Constant *GenerateProtocolList(ArrayRef<llvm::Constant*> Protocols) { + llvm::ArrayType *ProtocolArrayTy = llvm::ArrayType::get(ProtocolPtrTy, + Protocols.size()); + llvm::Constant * ProtocolArray = llvm::ConstantArray::get(ProtocolArrayTy, + Protocols); + ConstantInitBuilder builder(CGM); + auto ProtocolBuilder = builder.beginStruct(); + ProtocolBuilder.addNullPointer(PtrTy); + ProtocolBuilder.addInt(SizeTy, Protocols.size()); + ProtocolBuilder.add(ProtocolArray); + return ProtocolBuilder.finishAndCreateGlobal(".objc_protocol_list", + CGM.getPointerAlign(), false, llvm::GlobalValue::InternalLinkage); + } + + void GenerateProtocol(const ObjCProtocolDecl *PD) override { + // Do nothing - we only emit referenced protocols. + } + llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) { + std::string ProtocolName = PD->getNameAsString(); + auto *&Protocol = ExistingProtocols[ProtocolName]; + if (Protocol) + return Protocol; + + EmittedProtocol = true; + + // Use the protocol definition, if there is one. + if (const ObjCProtocolDecl *Def = PD->getDefinition()) + PD = Def; + + SmallVector<llvm::Constant*, 16> Protocols; + for (const auto *PI : PD->protocols()) + Protocols.push_back( + llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), + ProtocolPtrTy)); + llvm::Constant *ProtocolList = GenerateProtocolList(Protocols); + + // Collect information about methods + llvm::Constant *InstanceMethodList, *OptionalInstanceMethodList; + llvm::Constant *ClassMethodList, *OptionalClassMethodList; + EmitProtocolMethodList(PD->instance_methods(), InstanceMethodList, + OptionalInstanceMethodList); + EmitProtocolMethodList(PD->class_methods(), ClassMethodList, + OptionalClassMethodList); + + auto SymName = SymbolForProtocol(ProtocolName); + auto *OldGV = TheModule.getGlobalVariable(SymName); + // The isa pointer must be set to a magic number so the runtime knows it's + // the correct layout. + ConstantInitBuilder builder(CGM); + auto ProtocolBuilder = builder.beginStruct(); + ProtocolBuilder.add(llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); + ProtocolBuilder.add(MakeConstantString(ProtocolName)); + ProtocolBuilder.add(ProtocolList); + ProtocolBuilder.add(InstanceMethodList); + ProtocolBuilder.add(ClassMethodList); + ProtocolBuilder.add(OptionalInstanceMethodList); + ProtocolBuilder.add(OptionalClassMethodList); + // Required instance properties + ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, false, false)); + // Optional instance properties + ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, false, true)); + // Required class properties + ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, true, false)); + // Optional class properties + ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, true, true)); + + auto *GV = ProtocolBuilder.finishAndCreateGlobal(SymName, + CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); + GV->setSection(ProtocolSection); + GV->setComdat(TheModule.getOrInsertComdat(SymName)); + if (OldGV) { + OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV, + OldGV->getType())); + OldGV->removeFromParent(); + GV->setName(SymName); + } + Protocol = GV; + return GV; + } + llvm::Constant *EnforceType(llvm::Constant *Val, llvm::Type *Ty) { + if (Val->getType() == Ty) + return Val; + return llvm::ConstantExpr::getBitCast(Val, Ty); + } + llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, + const std::string &TypeEncoding) override { + return GetConstantSelector(Sel, TypeEncoding); + } + llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) { + if (TypeEncoding.empty()) + return NULLPtr; + std::string MangledTypes = TypeEncoding; + std::replace(MangledTypes.begin(), MangledTypes.end(), + '@', '\1'); + std::string TypesVarName = ".objc_sel_types_" + MangledTypes; + auto *TypesGlobal = TheModule.getGlobalVariable(TypesVarName); + if (!TypesGlobal) { + llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext, + TypeEncoding); + auto *GV = new llvm::GlobalVariable(TheModule, Init->getType(), + true, llvm::GlobalValue::LinkOnceODRLinkage, Init, TypesVarName); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + TypesGlobal = GV; + } + return llvm::ConstantExpr::getGetElementPtr(TypesGlobal->getValueType(), + TypesGlobal, Zeros); + } + llvm::Constant *GetConstantSelector(Selector Sel, + const std::string &TypeEncoding) override { + // @ is used as a special character in symbol names (used for symbol + // versioning), so mangle the name to not include it. Replace it with a + // character that is not a valid type encoding character (and, being + // non-printable, never will be!) + std::string MangledTypes = TypeEncoding; + std::replace(MangledTypes.begin(), MangledTypes.end(), + '@', '\1'); + auto SelVarName = (StringRef(".objc_selector_") + Sel.getAsString() + "_" + + MangledTypes).str(); + if (auto *GV = TheModule.getNamedGlobal(SelVarName)) + return EnforceType(GV, SelectorTy); + ConstantInitBuilder builder(CGM); + auto SelBuilder = builder.beginStruct(); + SelBuilder.add(ExportUniqueString(Sel.getAsString(), ".objc_sel_name_", + true)); + SelBuilder.add(GetTypeString(TypeEncoding)); + auto *GV = SelBuilder.finishAndCreateGlobal(SelVarName, + CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); + GV->setComdat(TheModule.getOrInsertComdat(SelVarName)); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + GV->setSection(SelSection); + auto *SelVal = EnforceType(GV, SelectorTy); + return SelVal; + } + std::pair<llvm::Constant*,llvm::Constant*> + GetSectionBounds(StringRef Section) { + auto *Start = new llvm::GlobalVariable(TheModule, PtrTy, + /*isConstant*/false, + llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__start_") + + Section); + Start->setVisibility(llvm::GlobalValue::HiddenVisibility); + auto *Stop = new llvm::GlobalVariable(TheModule, PtrTy, + /*isConstant*/false, + llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__stop_") + + Section); + Stop->setVisibility(llvm::GlobalValue::HiddenVisibility); + return { Start, Stop }; + } + llvm::Function *ModuleInitFunction() override { + llvm::Function *LoadFunction = llvm::Function::Create( + llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false), + llvm::GlobalValue::LinkOnceODRLinkage, ".objcv2_load_function", + &TheModule); + LoadFunction->setVisibility(llvm::GlobalValue::HiddenVisibility); + LoadFunction->setComdat(TheModule.getOrInsertComdat(".objcv2_load_function")); + + llvm::BasicBlock *EntryBB = + llvm::BasicBlock::Create(VMContext, "entry", LoadFunction); + CGBuilderTy B(CGM, VMContext); + B.SetInsertPoint(EntryBB); + ConstantInitBuilder builder(CGM); + auto InitStructBuilder = builder.beginStruct(); + InitStructBuilder.addInt(Int64Ty, 0); + auto addSection = [&](const char *section) { + auto bounds = GetSectionBounds(section); + InitStructBuilder.add(bounds.first); + InitStructBuilder.add(bounds.second); + }; + addSection(SelSection); + addSection(ClsSection); + addSection(ClsRefSection); + addSection(CatSection); + addSection(ProtocolSection); + addSection(ProtocolRefSection); + addSection(ClassAliasSection); + addSection(ConstantStringSection); + auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init", + CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); + InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility); + InitStruct->setComdat(TheModule.getOrInsertComdat(".objc_init")); + + CallRuntimeFunction(B, "__objc_load", {InitStruct});; + B.CreateRetVoid(); + // Make sure that the optimisers don't delete this function. + CGM.addCompilerUsedGlobal(LoadFunction); + // FIXME: Currently ELF only! + // We have to do this by hand, rather than with @llvm.ctors, so that the + // linker can remove the duplicate invocations. + auto *InitVar = new llvm::GlobalVariable(TheModule, LoadFunction->getType(), + /*isConstant*/true, llvm::GlobalValue::LinkOnceAnyLinkage, + LoadFunction, ".objc_ctor"); + // Check that this hasn't been renamed. This shouldn't happen, because + // this function should be called precisely once. + assert(InitVar->getName() == ".objc_ctor"); + InitVar->setSection(".ctors"); + InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility); + InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor")); + CGM.addCompilerUsedGlobal(InitVar); + for (auto *C : Categories) { + auto *Cat = cast<llvm::GlobalVariable>(C->stripPointerCasts()); + Cat->setSection(CatSection); + CGM.addUsedGlobal(Cat); + } + // Add a null value fore each special section so that we can always + // guarantee that the _start and _stop symbols will exist and be + // meaningful. + auto createNullGlobal = [&](StringRef Name, ArrayRef<llvm::Constant*> Init, + StringRef Section) { + auto nullBuilder = builder.beginStruct(); + for (auto *F : Init) + nullBuilder.add(F); + auto GV = nullBuilder.finishAndCreateGlobal(Name, CGM.getPointerAlign(), + false, llvm::GlobalValue::LinkOnceODRLinkage); + GV->setSection(Section); + GV->setComdat(TheModule.getOrInsertComdat(Name)); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + CGM.addUsedGlobal(GV); + return GV; + }; + createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, SelSection); + if (Categories.empty()) + createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr, + NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, CatSection); + if (!EmittedClass) { + createNullGlobal(".objc_null_cls_init_ref", NULLPtr, ClsSection); + createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr }, + ClsRefSection); + } + if (!EmittedProtocol) + createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr, + NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, + NULLPtr}, ProtocolSection); + if (!EmittedProtocolRef) + createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, ProtocolRefSection); + if (!ClassAliases.empty()) + for (auto clsAlias : ClassAliases) + createNullGlobal(std::string(".objc_class_alias") + + clsAlias.second, { MakeConstantString(clsAlias.second), + GetClassVar(clsAlias.first) }, ClassAliasSection); + else + createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr }, + ClassAliasSection); + if (ConstantStrings.empty()) { + auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0); + createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero, + i32Zero, i32Zero, i32Zero, NULLPtr }, ConstantStringSection); + } + ConstantStrings.clear(); + Categories.clear(); + Classes.clear(); + return nullptr;//CGObjCGNU::ModuleInitFunction(); + } + /// In the v2 ABI, ivar offset variables use the type encoding in their name + /// to trigger linker failures if the types don't match. + std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID, + const ObjCIvarDecl *Ivar) override { + std::string TypeEncoding; + CGM.getContext().getObjCEncodingForType(Ivar->getType(), TypeEncoding); + // Prevent the @ from being interpreted as a symbol version. + std::replace(TypeEncoding.begin(), TypeEncoding.end(), + '@', '\1'); + const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString() + + '.' + Ivar->getNameAsString() + '.' + TypeEncoding; + return Name; + } + llvm::Value *EmitIvarOffset(CodeGenFunction &CGF, + const ObjCInterfaceDecl *Interface, + const ObjCIvarDecl *Ivar) override { + const std::string Name = GetIVarOffsetVariableName(Ivar->getContainingInterface(), Ivar); + llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name); + if (!IvarOffsetPointer) + IvarOffsetPointer = new llvm::GlobalVariable(TheModule, IntTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, Name); + CharUnits Align = CGM.getIntAlign(); + llvm::Value *Offset = CGF.Builder.CreateAlignedLoad(IvarOffsetPointer, Align); + if (Offset->getType() != PtrDiffTy) + Offset = CGF.Builder.CreateZExtOrBitCast(Offset, PtrDiffTy); + return Offset; + } + void GenerateClass(const ObjCImplementationDecl *OID) override { + ASTContext &Context = CGM.getContext(); + + // Get the class name + ObjCInterfaceDecl *classDecl = + const_cast<ObjCInterfaceDecl *>(OID->getClassInterface()); + std::string className = classDecl->getNameAsString(); + auto *classNameConstant = MakeConstantString(className); + + ConstantInitBuilder builder(CGM); + auto metaclassFields = builder.beginStruct(); + // struct objc_class *isa; + metaclassFields.addNullPointer(PtrTy); + // struct objc_class *super_class; + metaclassFields.addNullPointer(PtrTy); + // const char *name; + metaclassFields.add(classNameConstant); + // long version; + metaclassFields.addInt(LongTy, 0); + // unsigned long info; + // objc_class_flag_meta + metaclassFields.addInt(LongTy, 1); + // long instance_size; + // Setting this to zero is consistent with the older ABI, but it might be + // more sensible to set this to sizeof(struct objc_class) + metaclassFields.addInt(LongTy, 0); + // struct objc_ivar_list *ivars; + metaclassFields.addNullPointer(PtrTy); + // struct objc_method_list *methods + // FIXME: Almost identical code is copied and pasted below for the + // class, but refactoring it cleanly requires C++14 generic lambdas. + if (OID->classmeth_begin() == OID->classmeth_end()) + metaclassFields.addNullPointer(PtrTy); + else { + SmallVector<ObjCMethodDecl*, 16> ClassMethods; + ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(), + OID->classmeth_end()); + metaclassFields.addBitCast( + GenerateMethodList(className, "", ClassMethods, true), + PtrTy); + } + // void *dtable; + metaclassFields.addNullPointer(PtrTy); + // IMP cxx_construct; + metaclassFields.addNullPointer(PtrTy); + // IMP cxx_destruct; + metaclassFields.addNullPointer(PtrTy); + // struct objc_class *subclass_list + metaclassFields.addNullPointer(PtrTy); + // struct objc_class *sibling_class + metaclassFields.addNullPointer(PtrTy); + // struct objc_protocol_list *protocols; + metaclassFields.addNullPointer(PtrTy); + // struct reference_list *extra_data; + metaclassFields.addNullPointer(PtrTy); + // long abi_version; + metaclassFields.addInt(LongTy, 0); + // struct objc_property_list *properties + metaclassFields.add(GeneratePropertyList(OID, classDecl, /*isClassProperty*/true)); + + auto *metaclass = metaclassFields.finishAndCreateGlobal("._OBJC_METACLASS_" + + className, CGM.getPointerAlign()); + + auto classFields = builder.beginStruct(); + // struct objc_class *isa; + classFields.add(metaclass); + // struct objc_class *super_class; + // Get the superclass name. + const ObjCInterfaceDecl * SuperClassDecl = + OID->getClassInterface()->getSuperClass(); + if (SuperClassDecl) { + auto SuperClassName = SymbolForClass(SuperClassDecl->getNameAsString()); + llvm::Constant *SuperClass = TheModule.getNamedGlobal(SuperClassName); + if (!SuperClass) + { + SuperClass = new llvm::GlobalVariable(TheModule, PtrTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, SuperClassName); + } + classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); + } else + classFields.addNullPointer(PtrTy); + // const char *name; + classFields.add(classNameConstant); + // long version; + classFields.addInt(LongTy, 0); + // unsigned long info; + // !objc_class_flag_meta + classFields.addInt(LongTy, 0); + // long instance_size; + int superInstanceSize = !SuperClassDecl ? 0 : + Context.getASTObjCInterfaceLayout(SuperClassDecl).getSize().getQuantity(); + // Instance size is negative for classes that have not yet had their ivar + // layout calculated. + classFields.addInt(LongTy, + 0 - (Context.getASTObjCImplementationLayout(OID).getSize().getQuantity() - + superInstanceSize)); + + if (classDecl->all_declared_ivar_begin() == nullptr) + classFields.addNullPointer(PtrTy); + else { + int ivar_count = 0; + for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD; + IVD = IVD->getNextIvar()) ivar_count++; + llvm::DataLayout td(&TheModule); + // struct objc_ivar_list *ivars; + ConstantInitBuilder b(CGM); + auto ivarListBuilder = b.beginStruct(); + // int count; + ivarListBuilder.addInt(IntTy, ivar_count); + // size_t size; + llvm::StructType *ObjCIvarTy = llvm::StructType::get( + PtrToInt8Ty, + PtrToInt8Ty, + PtrToInt8Ty, + Int32Ty, + Int32Ty); + ivarListBuilder.addInt(SizeTy, td.getTypeSizeInBits(ObjCIvarTy) / + CGM.getContext().getCharWidth()); + // struct objc_ivar ivars[] + auto ivarArrayBuilder = ivarListBuilder.beginArray(); + CodeGenTypes &Types = CGM.getTypes(); + for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD; + IVD = IVD->getNextIvar()) { + auto ivarTy = IVD->getType(); + auto ivarBuilder = ivarArrayBuilder.beginStruct(); + // const char *name; + ivarBuilder.add(MakeConstantString(IVD->getNameAsString())); + // const char *type; + std::string TypeStr; + //Context.getObjCEncodingForType(ivarTy, TypeStr, IVD, true); + Context.getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, ivarTy, TypeStr, true); + ivarBuilder.add(MakeConstantString(TypeStr)); + // int *offset; + uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); + uint64_t Offset = BaseOffset - superInstanceSize; + llvm::Constant *OffsetValue = llvm::ConstantInt::get(IntTy, Offset); + std::string OffsetName = GetIVarOffsetVariableName(classDecl, IVD); + llvm::GlobalVariable *OffsetVar = TheModule.getGlobalVariable(OffsetName); + if (OffsetVar) + OffsetVar->setInitializer(OffsetValue); + else + OffsetVar = new llvm::GlobalVariable(TheModule, IntTy, + false, llvm::GlobalValue::ExternalLinkage, + OffsetValue, OffsetName); + auto ivarVisibility = + (IVD->getAccessControl() == ObjCIvarDecl::Private || + IVD->getAccessControl() == ObjCIvarDecl::Package || + classDecl->getVisibility() == HiddenVisibility) ? + llvm::GlobalValue::HiddenVisibility : + llvm::GlobalValue::DefaultVisibility; + OffsetVar->setVisibility(ivarVisibility); + ivarBuilder.add(OffsetVar); + // Ivar size + ivarBuilder.addInt(Int32Ty, + td.getTypeSizeInBits(Types.ConvertType(ivarTy)) / + CGM.getContext().getCharWidth()); + // Alignment will be stored as a base-2 log of the alignment. + int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); + // Objects that require more than 2^64-byte alignment should be impossible! + assert(align < 64); + // uint32_t flags; + // Bits 0-1 are ownership. + // Bit 2 indicates an extended type encoding + // Bits 3-8 contain log2(aligment) + ivarBuilder.addInt(Int32Ty, + (align << 3) | (1<<2) | + FlagsForOwnership(ivarTy.getQualifiers().getObjCLifetime())); + ivarBuilder.finishAndAddTo(ivarArrayBuilder); + } + ivarArrayBuilder.finishAndAddTo(ivarListBuilder); + auto ivarList = ivarListBuilder.finishAndCreateGlobal(".objc_ivar_list", + CGM.getPointerAlign(), /*constant*/ false, + llvm::GlobalValue::PrivateLinkage); + classFields.add(ivarList); + } + // struct objc_method_list *methods + SmallVector<const ObjCMethodDecl*, 16> InstanceMethods; + InstanceMethods.insert(InstanceMethods.begin(), OID->instmeth_begin(), + OID->instmeth_end()); + for (auto *propImpl : OID->property_impls()) + if (propImpl->getPropertyImplementation() == + ObjCPropertyImplDecl::Synthesize) { + ObjCPropertyDecl *prop = propImpl->getPropertyDecl(); + auto addIfExists = [&](const ObjCMethodDecl* OMD) { + if (OMD) + InstanceMethods.push_back(OMD); + }; + addIfExists(prop->getGetterMethodDecl()); + addIfExists(prop->getSetterMethodDecl()); + } + + if (InstanceMethods.size() == 0) + classFields.addNullPointer(PtrTy); + else + classFields.addBitCast( + GenerateMethodList(className, "", InstanceMethods, false), + PtrTy); + // void *dtable; + classFields.addNullPointer(PtrTy); + // IMP cxx_construct; + classFields.addNullPointer(PtrTy); + // IMP cxx_destruct; + classFields.addNullPointer(PtrTy); + // struct objc_class *subclass_list + classFields.addNullPointer(PtrTy); + // struct objc_class *sibling_class + classFields.addNullPointer(PtrTy); + // struct objc_protocol_list *protocols; + SmallVector<llvm::Constant*, 16> Protocols; + for (const auto *I : classDecl->protocols()) + Protocols.push_back( + llvm::ConstantExpr::getBitCast(GenerateProtocolRef(I), + ProtocolPtrTy)); + if (Protocols.empty()) + classFields.addNullPointer(PtrTy); + else + classFields.add(GenerateProtocolList(Protocols)); + // struct reference_list *extra_data; + classFields.addNullPointer(PtrTy); + // long abi_version; + classFields.addInt(LongTy, 0); + // struct objc_property_list *properties + classFields.add(GeneratePropertyList(OID, classDecl)); + + auto *classStruct = + classFields.finishAndCreateGlobal(SymbolForClass(className), + CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); + + if (CGM.getTriple().isOSBinFormatCOFF()) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + cast<llvm::GlobalValue>(classStruct)->setDLLStorageClass(Storage); + } + + auto *classRefSymbol = GetClassVar(className); + classRefSymbol->setSection(ClsRefSection); + classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy)); + + + // Resolve the class aliases, if they exist. + // FIXME: Class pointer aliases shouldn't exist! + if (ClassPtrAlias) { + ClassPtrAlias->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(classStruct, IdTy)); + ClassPtrAlias->eraseFromParent(); + ClassPtrAlias = nullptr; + } + if (auto Placeholder = + TheModule.getNamedGlobal(SymbolForClass(className))) + if (Placeholder != classStruct) { + Placeholder->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(classStruct, Placeholder->getType())); + Placeholder->eraseFromParent(); + classStruct->setName(SymbolForClass(className)); + } + if (MetaClassPtrAlias) { + MetaClassPtrAlias->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(metaclass, IdTy)); + MetaClassPtrAlias->eraseFromParent(); + MetaClassPtrAlias = nullptr; + } + assert(classStruct->getName() == SymbolForClass(className)); + + auto classInitRef = new llvm::GlobalVariable(TheModule, + classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage, + classStruct, "._OBJC_INIT_CLASS_" + className); + classInitRef->setSection(ClsSection); + CGM.addUsedGlobal(classInitRef); + + EmittedClass = true; + } + public: + CGObjCGNUstep2(CodeGenModule &Mod) : CGObjCGNUstep(Mod, 10, 4, 2) { + MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, + PtrToObjCSuperTy, SelectorTy); + // struct objc_property + // { + // const char *name; + // const char *attributes; + // const char *type; + // SEL getter; + // SEL setter; + // } + PropertyMetadataTy = + llvm::StructType::get(CGM.getLLVMContext(), + { PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty }); + } + +}; + /// Support for the ObjFW runtime. class CGObjCObjFW: public CGObjCGNU { protected: @@ -878,22 +1923,12 @@ void CGObjCGNU::EmitClassRef(const std::string &className) { llvm::GlobalValue::WeakAnyLinkage, ClassSymbol, symbolRef); } -static std::string SymbolNameForMethod( StringRef ClassName, - StringRef CategoryName, const Selector MethodName, - bool isClassMethod) { - std::string MethodNameColonStripped = MethodName.getAsString(); - std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(), - ':', '_'); - return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" + - CategoryName + "_" + MethodNameColonStripped).str(); -} - CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, - unsigned protocolClassVersion) + unsigned protocolClassVersion, unsigned classABI) : CGObjCRuntime(cgm), TheModule(CGM.getModule()), VMContext(cgm.getLLVMContext()), ClassPtrAlias(nullptr), MetaClassPtrAlias(nullptr), RuntimeVersion(runtimeABIVersion), - ProtocolVersion(protocolClassVersion) { + ProtocolVersion(protocolClassVersion), ClassABIVersion(classABI) { msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend"); @@ -911,6 +1946,8 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, Int8Ty = llvm::Type::getInt8Ty(VMContext); // C string type. Used in lots of places. PtrToInt8Ty = llvm::PointerType::getUnqual(Int8Ty); + ProtocolPtrTy = llvm::PointerType::getUnqual( + Types.ConvertType(CGM.getContext().getObjCProtoType())); Zeros[0] = llvm::ConstantInt::get(LongTy, 0); Zeros[1] = Zeros[0]; @@ -942,6 +1979,31 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, IdTy = PtrToInt8Ty; } PtrToIdTy = llvm::PointerType::getUnqual(IdTy); + ProtocolTy = llvm::StructType::get(IdTy, + PtrToInt8Ty, // name + PtrToInt8Ty, // protocols + PtrToInt8Ty, // instance methods + PtrToInt8Ty, // class methods + PtrToInt8Ty, // optional instance methods + PtrToInt8Ty, // optional class methods + PtrToInt8Ty, // properties + PtrToInt8Ty);// optional properties + + // struct objc_property_gsv1 + // { + // const char *name; + // char attributes; + // char attributes2; + // char unused1; + // char unused2; + // const char *getter_name; + // const char *getter_types; + // const char *setter_name; + // const char *setter_types; + // } + PropertyMetadataTy = llvm::StructType::get(CGM.getLLVMContext(), { + PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, PtrToInt8Ty, + PtrToInt8Ty, PtrToInt8Ty }); ObjCSuperTy = llvm::StructType::get(IdTy, IdTy); PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy); @@ -1035,16 +2097,8 @@ llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF, const ObjCInterfaceDecl *OID) { auto *Value = GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported()); - if (CGM.getTriple().isOSBinFormatCOFF()) { - if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) { - auto DLLStorage = llvm::GlobalValue::DefaultStorageClass; - if (OID->hasAttr<DLLExportAttr>()) - DLLStorage = llvm::GlobalValue::DLLExportStorageClass; - else if (OID->hasAttr<DLLImportAttr>()) - DLLStorage = llvm::GlobalValue::DLLImportStorageClass; - ClassSymbol->setDLLStorageClass(DLLStorage); - } - } + if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) + CGM.setGVProperties(ClassSymbol, OID); return Value; } @@ -1061,13 +2115,7 @@ llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { if ((VD = dyn_cast<VarDecl>(Result))) break; - auto DLLStorage = llvm::GlobalValue::DefaultStorageClass; - if (!VD || VD->hasAttr<DLLImportAttr>()) - DLLStorage = llvm::GlobalValue::DLLImportStorageClass; - else if (VD->hasAttr<DLLExportAttr>()) - DLLStorage = llvm::GlobalValue::DLLExportStorageClass; - - ClassSymbol->setDLLStorageClass(DLLStorage); + CGM.setGVProperties(ClassSymbol, VD); } } return Value; @@ -1217,7 +2265,7 @@ ConstantAddress CGObjCGNU::GenerateConstantString(const StringLiteral *SL) { StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass; - if (StringClass.empty()) StringClass = "NXConstantString"; + if (StringClass.empty()) StringClass = "NSConstantString"; std::string Sym = "_OBJC_CLASS_"; Sym += StringClass; @@ -1278,54 +2326,67 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs); llvm::Value *ReceiverClass = nullptr; - if (isCategoryImpl) { - llvm::Constant *classLookupFunction = nullptr; + bool isV2ABI = isRuntime(ObjCRuntime::GNUstep, 2); + if (isV2ABI) { + ReceiverClass = GetClassNamed(CGF, + Class->getSuperClass()->getNameAsString(), /*isWeak*/false); if (IsClassMessage) { - classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( - IdTy, PtrTy, true), "objc_get_meta_class"); - } else { - classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( - IdTy, PtrTy, true), "objc_get_class"); + // Load the isa pointer of the superclass is this is a class method. + ReceiverClass = Builder.CreateBitCast(ReceiverClass, + llvm::PointerType::getUnqual(IdTy)); + ReceiverClass = + Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); } - ReceiverClass = Builder.CreateCall(classLookupFunction, - MakeConstantString(Class->getNameAsString())); + ReceiverClass = EnforceType(Builder, ReceiverClass, IdTy); } else { - // Set up global aliases for the metaclass or class pointer if they do not - // already exist. These will are forward-references which will be set to - // pointers to the class and metaclass structure created for the runtime - // load function. To send a message to super, we look up the value of the - // super_class pointer from either the class or metaclass structure. - if (IsClassMessage) { - if (!MetaClassPtrAlias) { - MetaClassPtrAlias = llvm::GlobalAlias::create( - IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage, - ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule); + if (isCategoryImpl) { + llvm::Constant *classLookupFunction = nullptr; + if (IsClassMessage) { + classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( + IdTy, PtrTy, true), "objc_get_meta_class"); + } else { + classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( + IdTy, PtrTy, true), "objc_get_class"); } - ReceiverClass = MetaClassPtrAlias; + ReceiverClass = Builder.CreateCall(classLookupFunction, + MakeConstantString(Class->getNameAsString())); } else { - if (!ClassPtrAlias) { - ClassPtrAlias = llvm::GlobalAlias::create( - IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage, - ".objc_class_ref" + Class->getNameAsString(), &TheModule); + // Set up global aliases for the metaclass or class pointer if they do not + // already exist. These will are forward-references which will be set to + // pointers to the class and metaclass structure created for the runtime + // load function. To send a message to super, we look up the value of the + // super_class pointer from either the class or metaclass structure. + if (IsClassMessage) { + if (!MetaClassPtrAlias) { + MetaClassPtrAlias = llvm::GlobalAlias::create( + IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage, + ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule); + } + ReceiverClass = MetaClassPtrAlias; + } else { + if (!ClassPtrAlias) { + ClassPtrAlias = llvm::GlobalAlias::create( + IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage, + ".objc_class_ref" + Class->getNameAsString(), &TheModule); + } + ReceiverClass = ClassPtrAlias; } - ReceiverClass = ClassPtrAlias; } + // Cast the pointer to a simplified version of the class structure + llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy); + ReceiverClass = Builder.CreateBitCast(ReceiverClass, + llvm::PointerType::getUnqual(CastTy)); + // Get the superclass pointer + ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1); + // Load the superclass pointer + ReceiverClass = + Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); } - // Cast the pointer to a simplified version of the class structure - llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy); - ReceiverClass = Builder.CreateBitCast(ReceiverClass, - llvm::PointerType::getUnqual(CastTy)); - // Get the superclass pointer - ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1); - // Load the superclass pointer - ReceiverClass = - Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); // Construct the structure used to look up the IMP llvm::StructType *ObjCSuperTy = llvm::StructType::get(Receiver->getType(), IdTy); - // FIXME: Is this really supposed to be a dynamic alloca? - Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy), + Address ObjCSuper = CGF.CreateTempAlloca(ObjCSuperTy, CGF.getPointerAlign()); Builder.CreateStore(Receiver, @@ -1456,7 +2517,7 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, } // Reset the receiver in case the lookup modified it - ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy, false); + ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy); imp = EnforceType(Builder, imp, MSI.MessengerType); @@ -1506,17 +2567,16 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, llvm::Constant *CGObjCGNU:: GenerateMethodList(StringRef ClassName, StringRef CategoryName, - ArrayRef<Selector> MethodSels, - ArrayRef<llvm::Constant *> MethodTypes, + ArrayRef<const ObjCMethodDecl*> Methods, bool isClassMethodList) { - if (MethodSels.empty()) + if (Methods.empty()) return NULLPtr; ConstantInitBuilder Builder(CGM); auto MethodList = Builder.beginStruct(); MethodList.addNullPointer(CGM.Int8PtrTy); - MethodList.addInt(Int32Ty, MethodTypes.size()); + MethodList.addInt(Int32Ty, Methods.size()); // Get the method structure type. llvm::StructType *ObjCMethodTy = @@ -1525,20 +2585,48 @@ GenerateMethodList(StringRef ClassName, PtrToInt8Ty, // Method types IMPTy // Method pointer }); - auto Methods = MethodList.beginArray(); - for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) { + bool isV2ABI = isRuntime(ObjCRuntime::GNUstep, 2); + if (isV2ABI) { + // size_t size; + llvm::DataLayout td(&TheModule); + MethodList.addInt(SizeTy, td.getTypeSizeInBits(ObjCMethodTy) / + CGM.getContext().getCharWidth()); + ObjCMethodTy = + llvm::StructType::get(CGM.getLLVMContext(), { + IMPTy, // Method pointer + PtrToInt8Ty, // Selector + PtrToInt8Ty // Extended type encoding + }); + } else { + ObjCMethodTy = + llvm::StructType::get(CGM.getLLVMContext(), { + PtrToInt8Ty, // Really a selector, but the runtime creates it us. + PtrToInt8Ty, // Method types + IMPTy // Method pointer + }); + } + auto MethodArray = MethodList.beginArray(); + ASTContext &Context = CGM.getContext(); + for (const auto *OMD : Methods) { llvm::Constant *FnPtr = TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName, - MethodSels[i], + OMD->getSelector(), isClassMethodList)); assert(FnPtr && "Can't generate metadata for method that doesn't exist"); - auto Method = Methods.beginStruct(ObjCMethodTy); - Method.add(MakeConstantString(MethodSels[i].getAsString())); - Method.add(MethodTypes[i]); - Method.addBitCast(FnPtr, IMPTy); - Method.finishAndAddTo(Methods); + auto Method = MethodArray.beginStruct(ObjCMethodTy); + if (isV2ABI) { + Method.addBitCast(FnPtr, IMPTy); + Method.add(GetConstantSelector(OMD->getSelector(), + Context.getObjCEncodingForMethodDecl(OMD))); + Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD, true))); + } else { + Method.add(MakeConstantString(OMD->getSelector().getAsString())); + Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD))); + Method.addBitCast(FnPtr, IMPTy); + } + Method.finishAndAddTo(MethodArray); } - Methods.finishAndAddTo(MethodList); + MethodArray.finishAndAddTo(MethodList); // Create an instance of the structure return MethodList.finishAndCreateGlobal(".objc_method_list", @@ -1549,7 +2637,9 @@ GenerateMethodList(StringRef ClassName, llvm::Constant *CGObjCGNU:: GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, ArrayRef<llvm::Constant *> IvarTypes, - ArrayRef<llvm::Constant *> IvarOffsets) { + ArrayRef<llvm::Constant *> IvarOffsets, + ArrayRef<llvm::Constant *> IvarAlign, + ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership) { if (IvarNames.empty()) return NULLPtr; @@ -1664,7 +2754,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( // gc_object_type Elements.add(NULLPtr); // abi_version - Elements.addInt(LongTy, 1); + Elements.addInt(LongTy, ClassABIVersion); // ivar_offsets Elements.add(IvarOffsets); // properties @@ -1693,22 +2783,22 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( } llvm::Constant *CGObjCGNU:: -GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames, - ArrayRef<llvm::Constant *> MethodTypes) { +GenerateProtocolMethodList(ArrayRef<const ObjCMethodDecl*> Methods) { // Get the method structure type. llvm::StructType *ObjCMethodDescTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty }); + ASTContext &Context = CGM.getContext(); ConstantInitBuilder Builder(CGM); auto MethodList = Builder.beginStruct(); - MethodList.addInt(IntTy, MethodNames.size()); - auto Methods = MethodList.beginArray(ObjCMethodDescTy); - for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) { - auto Method = Methods.beginStruct(ObjCMethodDescTy); - Method.add(MethodNames[i]); - Method.add(MethodTypes[i]); - Method.finishAndAddTo(Methods); - } - Methods.finishAndAddTo(MethodList); + MethodList.addInt(IntTy, Methods.size()); + auto MethodArray = MethodList.beginArray(ObjCMethodDescTy); + for (auto *M : Methods) { + auto Method = MethodArray.beginStruct(ObjCMethodDescTy); + Method.add(MakeConstantString(M->getSelector().getAsString())); + Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(M))); + Method.finishAndAddTo(MethodArray); + } + MethodArray.finishAndAddTo(MethodList); return MethodList.finishAndCreateGlobal(".objc_method_list", CGM.getPointerAlign()); } @@ -1742,16 +2832,19 @@ CGObjCGNU::GenerateProtocolList(ArrayRef<std::string> Protocols) { llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) { - llvm::Value *protocol = ExistingProtocols[PD->getNameAsString()]; + llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()]; + if (!protocol) + GenerateProtocol(PD); llvm::Type *T = CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); } llvm::Constant * -CGObjCGNU::GenerateEmptyProtocol(const std::string &ProtocolName) { +CGObjCGNU::GenerateEmptyProtocol(StringRef ProtocolName) { llvm::Constant *ProtocolList = GenerateProtocolList({}); - llvm::Constant *MethodList = GenerateProtocolMethodList({}, {}); + llvm::Constant *MethodList = GenerateProtocolMethodList({}); + MethodList = llvm::ConstantExpr::getBitCast(MethodList, PtrToInt8Ty); // Protocols are objects containing lists of the methods implemented and // protocols adopted. ConstantInitBuilder Builder(CGM); @@ -1763,17 +2856,18 @@ CGObjCGNU::GenerateEmptyProtocol(const std::string &ProtocolName) { llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); Elements.add(MakeConstantString(ProtocolName, ".objc_protocol_name")); - Elements.add(ProtocolList); - Elements.add(MethodList); - Elements.add(MethodList); - Elements.add(MethodList); - Elements.add(MethodList); - return Elements.finishAndCreateGlobal(".objc_protocol", + Elements.add(ProtocolList); /* .protocol_list */ + Elements.add(MethodList); /* .instance_methods */ + Elements.add(MethodList); /* .class_methods */ + Elements.add(MethodList); /* .optional_instance_methods */ + Elements.add(MethodList); /* .optional_class_methods */ + Elements.add(NULLPtr); /* .properties */ + Elements.add(NULLPtr); /* .optional_properties */ + return Elements.finishAndCreateGlobal(SymbolForProtocol(ProtocolName), CGM.getPointerAlign()); } void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { - ASTContext &Context = CGM.getContext(); std::string ProtocolName = PD->getNameAsString(); // Use the protocol definition, if there is one. @@ -1783,51 +2877,31 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { SmallVector<std::string, 16> Protocols; for (const auto *PI : PD->protocols()) Protocols.push_back(PI->getNameAsString()); - SmallVector<llvm::Constant*, 16> InstanceMethodNames; - SmallVector<llvm::Constant*, 16> InstanceMethodTypes; - SmallVector<llvm::Constant*, 16> OptionalInstanceMethodNames; - SmallVector<llvm::Constant*, 16> OptionalInstanceMethodTypes; - for (const auto *I : PD->instance_methods()) { - std::string TypeStr = Context.getObjCEncodingForMethodDecl(I); - if (I->getImplementationControl() == ObjCMethodDecl::Optional) { - OptionalInstanceMethodNames.push_back( - MakeConstantString(I->getSelector().getAsString())); - OptionalInstanceMethodTypes.push_back(MakeConstantString(TypeStr)); - } else { - InstanceMethodNames.push_back( - MakeConstantString(I->getSelector().getAsString())); - InstanceMethodTypes.push_back(MakeConstantString(TypeStr)); - } - } + SmallVector<const ObjCMethodDecl*, 16> InstanceMethods; + SmallVector<const ObjCMethodDecl*, 16> OptionalInstanceMethods; + for (const auto *I : PD->instance_methods()) + if (I->isOptional()) + OptionalInstanceMethods.push_back(I); + else + InstanceMethods.push_back(I); // Collect information about class methods: - SmallVector<llvm::Constant*, 16> ClassMethodNames; - SmallVector<llvm::Constant*, 16> ClassMethodTypes; - SmallVector<llvm::Constant*, 16> OptionalClassMethodNames; - SmallVector<llvm::Constant*, 16> OptionalClassMethodTypes; - for (const auto *I : PD->class_methods()) { - std::string TypeStr = Context.getObjCEncodingForMethodDecl(I); - if (I->getImplementationControl() == ObjCMethodDecl::Optional) { - OptionalClassMethodNames.push_back( - MakeConstantString(I->getSelector().getAsString())); - OptionalClassMethodTypes.push_back(MakeConstantString(TypeStr)); - } else { - ClassMethodNames.push_back( - MakeConstantString(I->getSelector().getAsString())); - ClassMethodTypes.push_back(MakeConstantString(TypeStr)); - } - } + SmallVector<const ObjCMethodDecl*, 16> ClassMethods; + SmallVector<const ObjCMethodDecl*, 16> OptionalClassMethods; + for (const auto *I : PD->class_methods()) + if (I->isOptional()) + OptionalClassMethods.push_back(I); + else + ClassMethods.push_back(I); llvm::Constant *ProtocolList = GenerateProtocolList(Protocols); llvm::Constant *InstanceMethodList = - GenerateProtocolMethodList(InstanceMethodNames, InstanceMethodTypes); + GenerateProtocolMethodList(InstanceMethods); llvm::Constant *ClassMethodList = - GenerateProtocolMethodList(ClassMethodNames, ClassMethodTypes); + GenerateProtocolMethodList(ClassMethods); llvm::Constant *OptionalInstanceMethodList = - GenerateProtocolMethodList(OptionalInstanceMethodNames, - OptionalInstanceMethodTypes); + GenerateProtocolMethodList(OptionalInstanceMethods); llvm::Constant *OptionalClassMethodList = - GenerateProtocolMethodList(OptionalClassMethodNames, - OptionalClassMethodTypes); + GenerateProtocolMethodList(OptionalClassMethods); // Property metadata: name, attributes, isSynthesized, setter name, setter // types, getter name, getter types. @@ -1835,78 +2909,10 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { // simplify the runtime library by allowing it to use the same data // structures for protocol metadata everywhere. - llvm::Constant *PropertyList; - llvm::Constant *OptionalPropertyList; - { - llvm::StructType *propertyMetadataTy = - llvm::StructType::get(CGM.getLLVMContext(), - { PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, - PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty }); - - unsigned numReqProperties = 0, numOptProperties = 0; - for (auto property : PD->instance_properties()) { - if (property->isOptional()) - numOptProperties++; - else - numReqProperties++; - } - - ConstantInitBuilder reqPropertyListBuilder(CGM); - auto reqPropertiesList = reqPropertyListBuilder.beginStruct(); - reqPropertiesList.addInt(IntTy, numReqProperties); - reqPropertiesList.add(NULLPtr); - auto reqPropertiesArray = reqPropertiesList.beginArray(propertyMetadataTy); - - ConstantInitBuilder optPropertyListBuilder(CGM); - auto optPropertiesList = optPropertyListBuilder.beginStruct(); - optPropertiesList.addInt(IntTy, numOptProperties); - optPropertiesList.add(NULLPtr); - auto optPropertiesArray = optPropertiesList.beginArray(propertyMetadataTy); - - // Add all of the property methods need adding to the method list and to the - // property metadata list. - for (auto *property : PD->instance_properties()) { - auto &propertiesArray = - (property->isOptional() ? optPropertiesArray : reqPropertiesArray); - auto fields = propertiesArray.beginStruct(propertyMetadataTy); - - fields.add(MakePropertyEncodingString(property, nullptr)); - PushPropertyAttributes(fields, property); - - if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) { - std::string typeStr = Context.getObjCEncodingForMethodDecl(getter); - llvm::Constant *typeEncoding = MakeConstantString(typeStr); - InstanceMethodTypes.push_back(typeEncoding); - fields.add(MakeConstantString(getter->getSelector().getAsString())); - fields.add(typeEncoding); - } else { - fields.add(NULLPtr); - fields.add(NULLPtr); - } - if (ObjCMethodDecl *setter = property->getSetterMethodDecl()) { - std::string typeStr = Context.getObjCEncodingForMethodDecl(setter); - llvm::Constant *typeEncoding = MakeConstantString(typeStr); - InstanceMethodTypes.push_back(typeEncoding); - fields.add(MakeConstantString(setter->getSelector().getAsString())); - fields.add(typeEncoding); - } else { - fields.add(NULLPtr); - fields.add(NULLPtr); - } - - fields.finishAndAddTo(propertiesArray); - } - - reqPropertiesArray.finishAndAddTo(reqPropertiesList); - PropertyList = - reqPropertiesList.finishAndCreateGlobal(".objc_property_list", - CGM.getPointerAlign()); - - optPropertiesArray.finishAndAddTo(optPropertiesList); - OptionalPropertyList = - optPropertiesList.finishAndCreateGlobal(".objc_property_list", - CGM.getPointerAlign()); - } + llvm::Constant *PropertyList = + GeneratePropertyList(nullptr, PD, false, false); + llvm::Constant *OptionalPropertyList = + GeneratePropertyList(nullptr, PD, false, true); // Protocols are objects containing lists of the methods implemented and // protocols adopted. @@ -1917,8 +2923,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { Elements.add( llvm::ConstantExpr::getIntToPtr( llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); - Elements.add( - MakeConstantString(ProtocolName, ".objc_protocol_name")); + Elements.add(MakeConstantString(ProtocolName)); Elements.add(ProtocolList); Elements.add(InstanceMethodList); Elements.add(ClassMethodList); @@ -1933,8 +2938,6 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { } void CGObjCGNU::GenerateProtocolHolderCategory() { // Collect information about instance methods - SmallVector<Selector, 1> MethodSels; - SmallVector<llvm::Constant*, 1> MethodTypes; ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(); @@ -1945,10 +2948,10 @@ void CGObjCGNU::GenerateProtocolHolderCategory() { Elements.add(MakeConstantString(ClassName)); // Instance method list Elements.addBitCast(GenerateMethodList( - ClassName, CategoryName, MethodSels, MethodTypes, false), PtrTy); + ClassName, CategoryName, {}, false), PtrTy); // Class method list Elements.addBitCast(GenerateMethodList( - ClassName, CategoryName, MethodSels, MethodTypes, true), PtrTy); + ClassName, CategoryName, {}, true), PtrTy); // Protocol list ConstantInitBuilder ProtocolListBuilder(CGM); @@ -2016,25 +3019,9 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) { } void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { - std::string ClassName = OCD->getClassInterface()->getNameAsString(); + const ObjCInterfaceDecl *Class = OCD->getClassInterface(); + std::string ClassName = Class->getNameAsString(); std::string CategoryName = OCD->getNameAsString(); - // Collect information about instance methods - SmallVector<Selector, 16> InstanceMethodSels; - SmallVector<llvm::Constant*, 16> InstanceMethodTypes; - for (const auto *I : OCD->instance_methods()) { - InstanceMethodSels.push_back(I->getSelector()); - std::string TypeStr = CGM.getContext().getObjCEncodingForMethodDecl(I); - InstanceMethodTypes.push_back(MakeConstantString(TypeStr)); - } - - // Collect information about class methods - SmallVector<Selector, 16> ClassMethodSels; - SmallVector<llvm::Constant*, 16> ClassMethodTypes; - for (const auto *I : OCD->class_methods()) { - ClassMethodSels.push_back(I->getSelector()); - std::string TypeStr = CGM.getContext().getObjCEncodingForMethodDecl(I); - ClassMethodTypes.push_back(MakeConstantString(TypeStr)); - } // Collect the names of referenced protocols SmallVector<std::string, 16> Protocols; @@ -2049,84 +3036,125 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { Elements.add(MakeConstantString(CategoryName)); Elements.add(MakeConstantString(ClassName)); // Instance method list + SmallVector<ObjCMethodDecl*, 16> InstanceMethods; + InstanceMethods.insert(InstanceMethods.begin(), OCD->instmeth_begin(), + OCD->instmeth_end()); Elements.addBitCast( - GenerateMethodList(ClassName, CategoryName, InstanceMethodSels, - InstanceMethodTypes, false), + GenerateMethodList(ClassName, CategoryName, InstanceMethods, false), PtrTy); // Class method list + + SmallVector<ObjCMethodDecl*, 16> ClassMethods; + ClassMethods.insert(ClassMethods.begin(), OCD->classmeth_begin(), + OCD->classmeth_end()); Elements.addBitCast( - GenerateMethodList(ClassName, CategoryName, ClassMethodSels, - ClassMethodTypes, true), + GenerateMethodList(ClassName, CategoryName, ClassMethods, true), PtrTy); // Protocol list Elements.addBitCast(GenerateProtocolList(Protocols), PtrTy); + if (isRuntime(ObjCRuntime::GNUstep, 2)) { + const ObjCCategoryDecl *Category = + Class->FindCategoryDeclaration(OCD->getIdentifier()); + if (Category) { + // Instance properties + Elements.addBitCast(GeneratePropertyList(OCD, Category, false), PtrTy); + // Class properties + Elements.addBitCast(GeneratePropertyList(OCD, Category, true), PtrTy); + } else { + Elements.addNullPointer(PtrTy); + Elements.addNullPointer(PtrTy); + } + } + Categories.push_back(llvm::ConstantExpr::getBitCast( - Elements.finishAndCreateGlobal("", CGM.getPointerAlign()), + Elements.finishAndCreateGlobal( + std::string(".objc_category_")+ClassName+CategoryName, + CGM.getPointerAlign()), PtrTy)); } -llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OID, - SmallVectorImpl<Selector> &InstanceMethodSels, - SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes) { +llvm::Constant *CGObjCGNU::GeneratePropertyList(const Decl *Container, + const ObjCContainerDecl *OCD, + bool isClassProperty, + bool protocolOptionalProperties) { + + SmallVector<const ObjCPropertyDecl *, 16> Properties; + llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet; + bool isProtocol = isa<ObjCProtocolDecl>(OCD); ASTContext &Context = CGM.getContext(); - // Property metadata: name, attributes, attributes2, padding1, padding2, - // setter name, setter types, getter name, getter types. - llvm::StructType *propertyMetadataTy = - llvm::StructType::get(CGM.getLLVMContext(), - { PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, - PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty }); - unsigned numProperties = 0; - for (auto *propertyImpl : OID->property_impls()) { - (void) propertyImpl; - numProperties++; + std::function<void(const ObjCProtocolDecl *Proto)> collectProtocolProperties + = [&](const ObjCProtocolDecl *Proto) { + for (const auto *P : Proto->protocols()) + collectProtocolProperties(P); + for (const auto *PD : Proto->properties()) { + if (isClassProperty != PD->isClassProperty()) + continue; + // Skip any properties that are declared in protocols that this class + // conforms to but are not actually implemented by this class. + if (!isProtocol && !Context.getObjCPropertyImplDeclForPropertyDecl(PD, Container)) + continue; + if (!PropertySet.insert(PD->getIdentifier()).second) + continue; + Properties.push_back(PD); + } + }; + + if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) + for (const ObjCCategoryDecl *ClassExt : OID->known_extensions()) + for (auto *PD : ClassExt->properties()) { + if (isClassProperty != PD->isClassProperty()) + continue; + PropertySet.insert(PD->getIdentifier()); + Properties.push_back(PD); + } + + for (const auto *PD : OCD->properties()) { + if (isClassProperty != PD->isClassProperty()) + continue; + // If we're generating a list for a protocol, skip optional / required ones + // when generating the other list. + if (isProtocol && (protocolOptionalProperties != PD->isOptional())) + continue; + // Don't emit duplicate metadata for properties that were already in a + // class extension. + if (!PropertySet.insert(PD->getIdentifier()).second) + continue; + + Properties.push_back(PD); } + if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) + for (const auto *P : OID->all_referenced_protocols()) + collectProtocolProperties(P); + else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) + for (const auto *P : CD->protocols()) + collectProtocolProperties(P); + + auto numProperties = Properties.size(); + + if (numProperties == 0) + return NULLPtr; + ConstantInitBuilder builder(CGM); auto propertyList = builder.beginStruct(); - propertyList.addInt(IntTy, numProperties); - propertyList.add(NULLPtr); - auto properties = propertyList.beginArray(propertyMetadataTy); + auto properties = PushPropertyListHeader(propertyList, numProperties); // Add all of the property methods need adding to the method list and to the // property metadata list. - for (auto *propertyImpl : OID->property_impls()) { - auto fields = properties.beginStruct(propertyMetadataTy); - ObjCPropertyDecl *property = propertyImpl->getPropertyDecl(); - bool isSynthesized = (propertyImpl->getPropertyImplementation() == - ObjCPropertyImplDecl::Synthesize); - bool isDynamic = (propertyImpl->getPropertyImplementation() == - ObjCPropertyImplDecl::Dynamic); - - fields.add(MakePropertyEncodingString(property, OID)); - PushPropertyAttributes(fields, property, isSynthesized, isDynamic); - if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) { - std::string TypeStr = Context.getObjCEncodingForMethodDecl(getter); - llvm::Constant *TypeEncoding = MakeConstantString(TypeStr); - if (isSynthesized) { - InstanceMethodTypes.push_back(TypeEncoding); - InstanceMethodSels.push_back(getter->getSelector()); + for (auto *property : Properties) { + bool isSynthesized = false; + bool isDynamic = false; + if (!isProtocol) { + auto *propertyImpl = Context.getObjCPropertyImplDeclForPropertyDecl(property, Container); + if (propertyImpl) { + isSynthesized = (propertyImpl->getPropertyImplementation() == + ObjCPropertyImplDecl::Synthesize); + isDynamic = (propertyImpl->getPropertyImplementation() == + ObjCPropertyImplDecl::Dynamic); } - fields.add(MakeConstantString(getter->getSelector().getAsString())); - fields.add(TypeEncoding); - } else { - fields.add(NULLPtr); - fields.add(NULLPtr); } - if (ObjCMethodDecl *setter = property->getSetterMethodDecl()) { - std::string TypeStr = Context.getObjCEncodingForMethodDecl(setter); - llvm::Constant *TypeEncoding = MakeConstantString(TypeStr); - if (isSynthesized) { - InstanceMethodTypes.push_back(TypeEncoding); - InstanceMethodSels.push_back(setter->getSelector()); - } - fields.add(MakeConstantString(setter->getSelector().getAsString())); - fields.add(TypeEncoding); - } else { - fields.add(NULLPtr); - fields.add(NULLPtr); - } - fields.finishAndAddTo(properties); + PushProperty(properties, property, Container, isSynthesized, isDynamic); } properties.finishAndAddTo(propertyList); @@ -2179,6 +3207,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { SmallVector<llvm::Constant*, 16> IvarNames; SmallVector<llvm::Constant*, 16> IvarTypes; SmallVector<llvm::Constant*, 16> IvarOffsets; + SmallVector<llvm::Constant*, 16> IvarAligns; + SmallVector<Qualifiers::ObjCLifetime, 16> IvarOwnership; ConstantInitBuilder IvarOffsetBuilder(CGM); auto IvarOffsetValues = IvarOffsetBuilder.beginArray(PtrToIntTy); @@ -2201,6 +3231,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { std::string TypeStr; Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD); IvarTypes.push_back(MakeConstantString(TypeStr)); + IvarAligns.push_back(llvm::ConstantInt::get(IntTy, + Context.getTypeSize(IVD->getType()))); // Get the offset uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); uint64_t Offset = BaseOffset; @@ -2211,6 +3243,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // Create the direct offset value std::string OffsetName = "__objc_ivar_offset_value_" + ClassName +"." + IVD->getNameAsString(); + llvm::GlobalVariable *OffsetVar = TheModule.getGlobalVariable(OffsetName); if (OffsetVar) { OffsetVar->setInitializer(OffsetValue); @@ -2219,14 +3252,13 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // copy. OffsetVar->setLinkage(llvm::GlobalValue::ExternalLinkage); } else - OffsetVar = new llvm::GlobalVariable(TheModule, IntTy, + OffsetVar = new llvm::GlobalVariable(TheModule, Int32Ty, false, llvm::GlobalValue::ExternalLinkage, - OffsetValue, - "__objc_ivar_offset_value_" + ClassName +"." + - IVD->getNameAsString()); + OffsetValue, OffsetName); IvarOffsets.push_back(OffsetValue); IvarOffsetValues.add(OffsetVar); Qualifiers::ObjCLifetime lt = IVD->getType().getQualifiers().getObjCLifetime(); + IvarOwnership.push_back(lt); switch (lt) { case Qualifiers::OCL_Strong: StrongIvars.push_back(true); @@ -2248,25 +3280,30 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { CGM.getPointerAlign()); // Collect information about instance methods - SmallVector<Selector, 16> InstanceMethodSels; - SmallVector<llvm::Constant*, 16> InstanceMethodTypes; - for (const auto *I : OID->instance_methods()) { - InstanceMethodSels.push_back(I->getSelector()); - std::string TypeStr = Context.getObjCEncodingForMethodDecl(I); - InstanceMethodTypes.push_back(MakeConstantString(TypeStr)); - } + SmallVector<const ObjCMethodDecl*, 16> InstanceMethods; + InstanceMethods.insert(InstanceMethods.begin(), OID->instmeth_begin(), + OID->instmeth_end()); + + SmallVector<const ObjCMethodDecl*, 16> ClassMethods; + ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(), + OID->classmeth_end()); + + // Collect the same information about synthesized properties, which don't + // show up in the instance method lists. + for (auto *propertyImpl : OID->property_impls()) + if (propertyImpl->getPropertyImplementation() == + ObjCPropertyImplDecl::Synthesize) { + ObjCPropertyDecl *property = propertyImpl->getPropertyDecl(); + auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { + if (accessor) + InstanceMethods.push_back(accessor); + }; + addPropertyMethod(property->getGetterMethodDecl()); + addPropertyMethod(property->getSetterMethodDecl()); + } - llvm::Constant *Properties = GeneratePropertyList(OID, InstanceMethodSels, - InstanceMethodTypes); + llvm::Constant *Properties = GeneratePropertyList(OID, ClassDecl); - // Collect information about class methods - SmallVector<Selector, 16> ClassMethodSels; - SmallVector<llvm::Constant*, 16> ClassMethodTypes; - for (const auto *I : OID->class_methods()) { - ClassMethodSels.push_back(I->getSelector()); - std::string TypeStr = Context.getObjCEncodingForMethodDecl(I); - ClassMethodTypes.push_back(MakeConstantString(TypeStr)); - } // Collect the names of referenced protocols SmallVector<std::string, 16> Protocols; for (const auto *I : ClassDecl->protocols()) @@ -2283,11 +3320,11 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { SmallVector<llvm::Constant*, 1> empty; // Generate the method and instance variable lists llvm::Constant *MethodList = GenerateMethodList(ClassName, "", - InstanceMethodSels, InstanceMethodTypes, false); + InstanceMethods, false); llvm::Constant *ClassMethodList = GenerateMethodList(ClassName, "", - ClassMethodSels, ClassMethodTypes, true); + ClassMethods, true); llvm::Constant *IvarList = GenerateIvarList(IvarNames, IvarTypes, - IvarOffsets); + IvarOffsets, IvarAligns, IvarOwnership); // Irrespective of whether we are compiling for a fragile or non-fragile ABI, // we emit a symbol containing the offset for each ivar in the class. This // allows code compiled for the non-Fragile ABI to inherit from code compiled @@ -2300,14 +3337,13 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // the offset (third field in ivar structure) llvm::Type *IndexTy = Int32Ty; llvm::Constant *offsetPointerIndexes[] = {Zeros[0], - llvm::ConstantInt::get(IndexTy, 1), nullptr, - llvm::ConstantInt::get(IndexTy, 2) }; + llvm::ConstantInt::get(IndexTy, ClassABIVersion > 1 ? 2 : 1), nullptr, + llvm::ConstantInt::get(IndexTy, ClassABIVersion > 1 ? 3 : 2) }; unsigned ivarIndex = 0; for (const ObjCIvarDecl *IVD = ClassDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { - const std::string Name = "__objc_ivar_offset_" + ClassName + '.' - + IVD->getNameAsString(); + const std::string Name = GetIVarOffsetVariableName(ClassDecl, IVD); offsetPointerIndexes[2] = llvm::ConstantInt::get(IndexTy, ivarIndex); // Get the correct ivar field llvm::Constant *offsetValue = llvm::ConstantExpr::getGetElementPtr( @@ -2321,12 +3357,10 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // different modules will use this one, rather than their private // copy. offset->setLinkage(llvm::GlobalValue::ExternalLinkage); - } else { + } else // Add a new alias if there isn't one already. - offset = new llvm::GlobalVariable(TheModule, offsetValue->getType(), + new llvm::GlobalVariable(TheModule, offsetValue->getType(), false, llvm::GlobalValue::ExternalLinkage, offsetValue, Name); - (void) offset; // Silence dead store warning. - } ++ivarIndex; } llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0); @@ -2334,16 +3368,10 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { //Generate metaclass for class methods llvm::Constant *MetaClassStruct = GenerateClassStructure( NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], - GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr, - NULLPtr, ZeroPtr, ZeroPtr, true); - if (CGM.getTriple().isOSBinFormatCOFF()) { - auto Storage = llvm::GlobalValue::DefaultStorageClass; - if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) - Storage = llvm::GlobalValue::DLLImportStorageClass; - else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) - Storage = llvm::GlobalValue::DLLExportStorageClass; - cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage); - } + NULLPtr, ClassMethodList, NULLPtr, NULLPtr, + GeneratePropertyList(OID, ClassDecl, true), ZeroPtr, ZeroPtr, true); + CGM.setGVProperties(cast<llvm::GlobalValue>(MetaClassStruct), + OID->getClassInterface()); // Generate the class structure llvm::Constant *ClassStruct = GenerateClassStructure( @@ -2351,14 +3379,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList, GenerateProtocolList(Protocols), IvarOffsetArray, Properties, StrongIvarBitmap, WeakIvarBitmap); - if (CGM.getTriple().isOSBinFormatCOFF()) { - auto Storage = llvm::GlobalValue::DefaultStorageClass; - if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) - Storage = llvm::GlobalValue::DLLImportStorageClass; - else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) - Storage = llvm::GlobalValue::DLLExportStorageClass; - cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage); - } + CGM.setGVProperties(cast<llvm::GlobalValue>(ClassStruct), + OID->getClassInterface()); // Resolve the class aliases, if they exist. if (ClassPtrAlias) { @@ -2785,8 +3807,7 @@ void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF, llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { - const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString() - + '.' + Ivar->getNameAsString(); + const std::string Name = GetIVarOffsetVariableName(ID, Ivar); // Emit the variable and initialize it with what we think the correct value // is. This allows code compiled with non-fragile ivars to work correctly // when linked against code which isn't (most of the time). @@ -2895,8 +3916,11 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, CGObjCRuntime * clang::CodeGen::CreateGNUObjCRuntime(CodeGenModule &CGM) { - switch (CGM.getLangOpts().ObjCRuntime.getKind()) { + auto Runtime = CGM.getLangOpts().ObjCRuntime; + switch (Runtime.getKind()) { case ObjCRuntime::GNUstep: + if (Runtime.getVersion() >= VersionTuple(2, 0)) + return new CGObjCGNUstep2(CGM); return new CGObjCGNUstep(CGM); case ObjCRuntime::GCC: diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index ef4e6cd4f01b..0c766575dc21 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -888,7 +888,7 @@ protected: /// int * but is actually an Obj-C class pointer. llvm::WeakTrackingVH ConstantStringClassRef; - /// \brief The LLVM type corresponding to NSConstantString. + /// The LLVM type corresponding to NSConstantString. llvm::StructType *NSConstantStringType = nullptr; llvm::StringMap<llvm::GlobalVariable *> NSConstantStringMap; @@ -1708,7 +1708,7 @@ struct NullReturnState { e = Method->param_end(); i != e; ++i, ++I) { const ParmVarDecl *ParamDecl = (*i); if (ParamDecl->hasAttr<NSConsumedAttr>()) { - RValue RV = I->RV; + RValue RV = I->getRValue(CGF); assert(RV.isScalar() && "NullReturnState::complete - arg not on object"); CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime); @@ -3401,7 +3401,9 @@ static bool hasMRCWeakIvars(CodeGenModule &CGM, See EmitClassExtension(); */ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { - DefinedSymbols.insert(ID->getIdentifier()); + IdentifierInfo *RuntimeName = + &CGM.getContext().Idents.get(ID->getObjCRuntimeNameAsString()); + DefinedSymbols.insert(RuntimeName); std::string ClassName = ID->getNameAsString(); // FIXME: Gross @@ -4179,10 +4181,6 @@ void FragileHazards::emitHazardsInNewBlocks() { } } -static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, llvm::Value *V) { - if (V) S.insert(V); -} - static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, Address V) { if (V.isValid()) S.insert(V.getPointer()); } @@ -4984,7 +4982,9 @@ llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF, if (ID->hasAttr<ObjCRuntimeVisibleAttr>()) return EmitClassRefViaRuntime(CGF, ID, ObjCTypes); - return EmitClassRefFromId(CGF, ID->getIdentifier()); + IdentifierInfo *RuntimeName = + &CGM.getContext().Idents.get(ID->getObjCRuntimeNameAsString()); + return EmitClassRefFromId(CGF, RuntimeName); } llvm::Value *CGObjCMac::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { @@ -6309,9 +6309,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { llvm::GlobalVariable *MetaTClass = BuildClassObject(CI, /*metaclass*/ true, IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden); - if (CGM.getTriple().isOSBinFormatCOFF()) - if (CI->hasAttr<DLLExportAttr>()) - MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + CGM.setGVProperties(MetaTClass, CI); DefinedMetaClasses.push_back(MetaTClass); // Metadata for the class @@ -6351,9 +6349,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { llvm::GlobalVariable *ClassMD = BuildClassObject(CI, /*metaclass*/ false, MetaTClass, SuperClassGV, CLASS_RO_GV, classIsHidden); - if (CGM.getTriple().isOSBinFormatCOFF()) - if (CI->hasAttr<DLLExportAttr>()) - ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + CGM.setGVProperties(ClassMD, CI); DefinedClasses.push_back(ClassMD); ImplementedClasses.push_back(CI); @@ -6403,7 +6399,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, PTGV->setAlignment(Align.getQuantity()); if (!CGM.getTriple().isOSBinFormatMachO()) PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName)); - CGM.addCompilerUsedGlobal(PTGV); + CGM.addUsedGlobal(PTGV); return CGF.Builder.CreateAlignedLoad(PTGV, Align); } @@ -6847,7 +6843,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( Protocols[PD->getIdentifier()] = Entry; } Entry->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.addCompilerUsedGlobal(Entry); + CGM.addUsedGlobal(Entry); // Use this protocol meta-data to build protocol list table in section // __DATA, __objc_protolist @@ -6866,7 +6862,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( PTGV->setSection(GetSectionName("__objc_protolist", "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.addCompilerUsedGlobal(PTGV); + CGM.addUsedGlobal(PTGV); return Entry; } @@ -6952,7 +6948,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitIvarOffset( // This could be 32bit int or 64bit integer depending on the architecture. // Cast it to 64bit integer value, if it is a 32bit integer ivar offset value - // as this is what caller always expectes. + // as this is what caller always expects. if (ObjCTypes.IvarOffsetVarTy == ObjCTypes.IntTy) IvarOffsetValue = CGF.Builder.CreateIntCast( IvarOffsetValue, ObjCTypes.LongTy, true, "ivar.conv"); @@ -7079,7 +7075,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, CGF.getPointerAlign()); // Update the message ref argument. - args[1].RV = RValue::get(mref.getPointer()); + args[1].setRValue(RValue::get(mref.getPointer())); // Load the function to call from the message ref table. Address calleeAddr = @@ -7528,12 +7524,7 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, EHTypeName); - if (CGM.getTriple().isOSBinFormatCOFF()) { - if (ID->hasAttr<DLLExportAttr>()) - Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - else if (ID->hasAttr<DLLImportAttr>()) - Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - } + CGM.setGVProperties(Entry, ID); return Entry; } } @@ -7572,10 +7563,8 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, CGM.getPointerAlign(), /*constant*/ false, L); - if (CGM.getTriple().isOSBinFormatCOFF()) - if (hasObjCExceptionAttribute(CGM.getContext(), ID)) - if (ID->hasAttr<DLLExportAttr>()) - Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + if (hasObjCExceptionAttribute(CGM.getContext(), ID)) + CGM.setGVProperties(Entry, ID); } assert(Entry->getLinkage() == L); diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index d140e7f09e9a..1da19a90c387 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -66,13 +66,19 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { } llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) { - if (!PipeTy){ - uint32_t PipeAddrSpc = CGM.getContext().getTargetAddressSpace( - CGM.getContext().getOpenCLTypeAddrSpace(T)); - PipeTy = llvm::PointerType::get(llvm::StructType::create( - CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc); - } + if (T->isReadOnly()) + return getPipeType(T, "opencl.pipe_ro_t", PipeROTy); + else + return getPipeType(T, "opencl.pipe_wo_t", PipeWOTy); +} +llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T, StringRef Name, + llvm::Type *&PipeTy) { + if (!PipeTy) + PipeTy = llvm::PointerType::get(llvm::StructType::create( + CGM.getLLVMContext(), Name), + CGM.getContext().getTargetAddressSpace( + CGM.getContext().getOpenCLTypeAddrSpace(T))); return PipeTy; } @@ -112,37 +118,64 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } -CGOpenCLRuntime::EnqueuedBlockInfo -CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { - // The block literal may be assigned to a const variable. Chasing down - // to get the block literal. +// Get the block literal from an expression derived from the block expression. +// OpenCL v2.0 s6.12.5: +// Block variable declarations are implicitly qualified with const. Therefore +// all block variables must be initialized at declaration time and may not be +// reassigned. +static const BlockExpr *getBlockExpr(const Expr *E) { + if (auto Cast = dyn_cast<CastExpr>(E)) { + E = Cast->getSubExpr(); + } if (auto DR = dyn_cast<DeclRefExpr>(E)) { E = cast<VarDecl>(DR->getDecl())->getInit(); } + E = E->IgnoreImplicit(); if (auto Cast = dyn_cast<CastExpr>(E)) { E = Cast->getSubExpr(); } - auto *Block = cast<BlockExpr>(E); + return cast<BlockExpr>(E); +} + +/// Record emitted llvm invoke function and llvm block literal for the +/// corresponding block expression. +void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, + llvm::Function *InvokeF, + llvm::Value *Block) { + assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() && + "Block expression emitted twice"); + assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function"); + assert(Block->getType()->isPointerTy() && "Invalid block literal type"); + EnqueuedBlockMap[E].InvokeFunc = InvokeF; + EnqueuedBlockMap[E].BlockArg = Block; + EnqueuedBlockMap[E].Kernel = nullptr; +} + +llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { + return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; +} + +CGOpenCLRuntime::EnqueuedBlockInfo +CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { + CGF.EmitScalarExpr(E); + + const BlockExpr *Block = getBlockExpr(E); + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && + "Block expression not emitted"); - // The same block literal may be enqueued multiple times. Cache it if - // possible. - auto Loc = EnqueuedBlockMap.find(Block); - if (Loc != EnqueuedBlockMap.end()) { - return Loc->second; + // Do not emit the block wrapper again if it has been emitted. + if (EnqueuedBlockMap[Block].Kernel) { + return EnqueuedBlockMap[Block]; } - // Emit block literal as a common block expression and get the block invoke - // function. - llvm::Function *Invoke; - auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke); auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel( - CGF, Invoke, V->stripPointerCasts()); + CGF, EnqueuedBlockMap[Block].InvokeFunc, + EnqueuedBlockMap[Block].BlockArg->stripPointerCasts()); // The common part of the post-processing of the kernel goes here. F->addFnAttr(llvm::Attribute::NoUnwind); F->setCallingConv( CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel)); - EnqueuedBlockInfo Info{F, V}; - EnqueuedBlockMap[Block] = Info; - return Info; + EnqueuedBlockMap[Block].Kernel = F; + return EnqueuedBlockMap[Block]; } diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index ead303d1d0d5..a513340827a8 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -23,6 +23,7 @@ namespace clang { +class BlockExpr; class Expr; class VarDecl; @@ -34,20 +35,25 @@ class CodeGenModule; class CGOpenCLRuntime { protected: CodeGenModule &CGM; - llvm::Type *PipeTy; + llvm::Type *PipeROTy; + llvm::Type *PipeWOTy; llvm::PointerType *SamplerTy; /// Structure for enqueued block information. struct EnqueuedBlockInfo { - llvm::Function *Kernel; /// Enqueued block kernel. - llvm::Value *BlockArg; /// The first argument to enqueued block kernel. + llvm::Function *InvokeFunc; /// Block invoke function. + llvm::Function *Kernel; /// Enqueued block kernel. + llvm::Value *BlockArg; /// The first argument to enqueued block kernel. }; /// Maps block expression to block information. llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap; + virtual llvm::Type *getPipeType(const PipeType *T, StringRef Name, + llvm::Type *&PipeTy); + public: - CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr), - SamplerTy(nullptr) {} + CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), + PipeROTy(nullptr), PipeWOTy(nullptr), SamplerTy(nullptr) {} virtual ~CGOpenCLRuntime(); /// Emit the IR required for a work-group-local variable declaration, and add @@ -62,11 +68,11 @@ public: llvm::PointerType *getSamplerType(const Type *T); - // \brief Returnes a value which indicates the size in bytes of the pipe + // Returns a value which indicates the size in bytes of the pipe // element. virtual llvm::Value *getPipeElemSize(const Expr *PipeArg); - // \brief Returnes a value which indicates the alignment in bytes of the pipe + // Returns a value which indicates the alignment in bytes of the pipe // element. virtual llvm::Value *getPipeElemAlign(const Expr *PipeArg); @@ -76,6 +82,19 @@ public: /// \return enqueued block information for enqueued block. EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E); + + /// Record invoke function and block literal emitted during normal + /// codegen for a block expression. The information is used by + /// emitOpenCLEnqueuedBlock to emit wrapper kernel. + /// + /// \param InvokeF invoke function emitted for the block expression. + /// \param Block block literal emitted for the block expression. + void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, + llvm::Value *Block); + + /// \return LLVM block invoke function emitted for an expression derived from + /// the block expression. + llvm::Function *getInvokeFunction(const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index fa38ee80bf41..3730b9af12fa 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -14,12 +14,13 @@ #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGOpenMPRuntime.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/BitmaskEnum.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitmaskEnum.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -33,20 +34,20 @@ using namespace clang; using namespace CodeGen; namespace { -/// \brief Base class for handling code generation inside OpenMP regions. +/// Base class for handling code generation inside OpenMP regions. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: - /// \brief Kinds of OpenMP regions used in codegen. + /// Kinds of OpenMP regions used in codegen. enum CGOpenMPRegionKind { - /// \brief Region with outlined function for standalone 'parallel' + /// Region with outlined function for standalone 'parallel' /// directive. ParallelOutlinedRegion, - /// \brief Region with outlined function for standalone 'task' directive. + /// Region with outlined function for standalone 'task' directive. TaskOutlinedRegion, - /// \brief Region for constructs that do not require function outlining, + /// Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, - /// \brief Region with outlined function for standalone 'target' directive. + /// Region with outlined function for standalone 'target' directive. TargetRegion, }; @@ -63,14 +64,14 @@ public: : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. virtual const VarDecl *getThreadIDVariable() const = 0; - /// \brief Emit the captured statement body. + /// Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; - /// \brief Get an LValue for the current ThreadID variable. + /// Get an LValue for the current ThreadID variable. /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); @@ -95,7 +96,7 @@ protected: bool HasCancel; }; -/// \brief API for captured statement code generation in OpenMP constructs. +/// API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, @@ -108,11 +109,11 @@ public: assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -122,13 +123,13 @@ public: } private: - /// \brief A variable or parameter storing global thread id for OpenMP + /// A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; StringRef HelperName; }; -/// \brief API for captured statement code generation in OpenMP constructs. +/// API for captured statement code generation in OpenMP constructs. class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: class UntiedTaskActionTy final : public PrePostActionTy { @@ -144,11 +145,12 @@ public: void Enter(CodeGenFunction &CGF) override { if (Untied) { // Emit task switching point. - auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(PartIDVar), PartIDVar->getType()->castAs<PointerType>()); - auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); - auto *DoneBB = CGF.createBasicBlock(".untied.done."); + llvm::Value *Res = + CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); CGF.EmitBlock(DoneBB); CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); @@ -160,7 +162,7 @@ public: } void emitUntiedSwitch(CodeGenFunction &CGF) const { if (Untied) { - auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(PartIDVar), PartIDVar->getType()->castAs<PointerType>()); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), @@ -188,14 +190,14 @@ public: assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } - /// \brief Get an LValue for the current ThreadID variable. + /// Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } void emitUntiedSwitch(CodeGenFunction &CGF) override { @@ -209,14 +211,14 @@ public: } private: - /// \brief A variable or parameter storing global thread id for OpenMP + /// A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; /// Action for emitting code for untied tasks. const UntiedTaskActionTy &Action; }; -/// \brief API for inlined captured statement code generation in OpenMP +/// API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { public: @@ -227,7 +229,7 @@ public: OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} - // \brief Retrieve the value of the context parameter. + // Retrieve the value of the context parameter. llvm::Value *getContextValue() const override { if (OuterRegionInfo) return OuterRegionInfo->getContextValue(); @@ -242,7 +244,7 @@ public: llvm_unreachable("No context value for inlined OpenMP region"); } - /// \brief Lookup the captured field decl for a variable. + /// Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) return OuterRegionInfo->lookup(VD); @@ -257,7 +259,7 @@ public: return nullptr; } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { if (OuterRegionInfo) @@ -265,14 +267,14 @@ public: return nullptr; } - /// \brief Get an LValue for the current ThreadID variable. + /// Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { if (OuterRegionInfo) return OuterRegionInfo->getThreadIDVariableLValue(CGF); llvm_unreachable("No LValue for inlined OpenMP construct"); } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { if (auto *OuterRegionInfo = getOldCSI()) return OuterRegionInfo->getHelperName(); @@ -294,12 +296,12 @@ public: ~CGOpenMPInlinedRegionInfo() override = default; private: - /// \brief CodeGen info about outer OpenMP region. + /// CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; CGOpenMPRegionInfo *OuterRegionInfo; }; -/// \brief API for captured statement code generation in OpenMP target +/// API for captured statement code generation in OpenMP target /// constructs. For this captures, implicit parameters are used instead of the /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has @@ -312,11 +314,11 @@ public: /*HasCancel=*/false), HelperName(HelperName) {} - /// \brief This is unused for target regions because each starts executing + /// This is unused for target regions because each starts executing /// with a single thread. const VarDecl *getThreadIDVariable() const override { return nullptr; } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -331,7 +333,7 @@ private: static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { llvm_unreachable("No codegen for expressions"); } -/// \brief API for generation of expressions captured in a innermost OpenMP +/// API for generation of expressions captured in a innermost OpenMP /// region. class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { public: @@ -343,7 +345,7 @@ public: // Make sure the globals captured in the provided statement are local by // using the privatization logic. We assume the same variable is not // captured more than once. - for (auto &C : CS.captures()) { + for (const auto &C : CS.captures()) { if (!C.capturesVariable() && !C.capturesVariableByCopy()) continue; @@ -354,33 +356,32 @@ public: DeclRefExpr DRE(const_cast<VarDecl *>(VD), /*RefersToEnclosingVariableOrCapture=*/false, VD->getType().getNonReferenceType(), VK_LValue, - SourceLocation()); - PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); - }); + C.getLocation()); + PrivScope.addPrivate( + VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); } (void)PrivScope.Privatize(); } - /// \brief Lookup the captured field decl for a variable. + /// Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { - if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) return FD; return nullptr; } - /// \brief Emit the captured statement body. + /// Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { llvm_unreachable("No body for expressions"); } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { llvm_unreachable("No thread id for expressions"); } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { llvm_unreachable("No helper name for expressions"); } @@ -392,14 +393,15 @@ private: CodeGenFunction::OMPPrivateScope PrivScope; }; -/// \brief RAII for emitting code of OpenMP constructs. +/// RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; public: - /// \brief Constructs region for combined constructs. + /// Constructs region for combined constructs. /// \param CodeGen Code generation sequence for combined directives. Includes /// a list of functions used for code generation of implicitly inlined /// regions. @@ -412,6 +414,8 @@ public: std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); LambdaThisCaptureField = CGF.LambdaThisCaptureField; CGF.LambdaThisCaptureField = nullptr; + BlockInfo = CGF.BlockInfo; + CGF.BlockInfo = nullptr; } ~InlinedOpenMPRegionRAII() { @@ -422,28 +426,29 @@ public: CGF.CapturedStmtInfo = OldCSI; std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); CGF.LambdaThisCaptureField = LambdaThisCaptureField; + CGF.BlockInfo = BlockInfo; } }; -/// \brief Values for bit flags used in the ident_t to describe the fields. +/// Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h enum OpenMPLocationFlags : unsigned { - /// \brief Use trampoline for internal microtask. + /// Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, - /// \brief Use c-style ident structure. + /// Use c-style ident structure. OMP_IDENT_KMPC = 0x02, - /// \brief Atomic reduction option for kmpc_reduce. + /// Atomic reduction option for kmpc_reduce. OMP_ATOMIC_REDUCE = 0x10, - /// \brief Explicit 'barrier' directive. + /// Explicit 'barrier' directive. OMP_IDENT_BARRIER_EXPL = 0x20, - /// \brief Implicit barrier in code. + /// Implicit barrier in code. OMP_IDENT_BARRIER_IMPL = 0x40, - /// \brief Implicit barrier in 'for' directive. + /// Implicit barrier in 'for' directive. OMP_IDENT_BARRIER_IMPL_FOR = 0x40, - /// \brief Implicit barrier in 'sections' directive. + /// Implicit barrier in 'sections' directive. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, - /// \brief Implicit barrier in 'single' directive. + /// Implicit barrier in 'single' directive. OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, /// Call of __kmp_for_static_init for static loop. OMP_IDENT_WORK_LOOP = 0x200, @@ -454,7 +459,7 @@ enum OpenMPLocationFlags : unsigned { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; -/// \brief Describes ident structure that describes a source location. +/// Describes ident structure that describes a source location. /// All descriptions are taken from /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h /// Original structure: @@ -481,24 +486,24 @@ enum OpenMPLocationFlags : unsigned { /// */ /// } ident_t; enum IdentFieldIndex { - /// \brief might be used in Fortran + /// might be used in Fortran IdentField_Reserved_1, - /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. + /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. IdentField_Flags, - /// \brief Not really used in Fortran any more + /// Not really used in Fortran any more IdentField_Reserved_2, - /// \brief Source[4] in Fortran, do not use for C++ + /// Source[4] in Fortran, do not use for C++ IdentField_Reserved_3, - /// \brief String describing the source location. The string is composed of + /// String describing the source location. The string is composed of /// semi-colon separated fields which describe the source file, the function /// and a pair of line numbers that delimit the construct. IdentField_PSource }; -/// \brief Schedule types for 'omp for' loops (these enumerators are taken from +/// Schedule types for 'omp for' loops (these enumerators are taken from /// the enum sched_type in kmp.h). enum OpenMPSchedType { - /// \brief Lower bound for default (unordered) versions. + /// Lower bound for default (unordered) versions. OMP_sch_lower = 32, OMP_sch_static_chunked = 33, OMP_sch_static = 34, @@ -508,7 +513,7 @@ enum OpenMPSchedType { OMP_sch_auto = 38, /// static with chunk adjustment (e.g., simd) OMP_sch_static_balanced_chunked = 45, - /// \brief Lower bound for 'ordered' versions. + /// Lower bound for 'ordered' versions. OMP_ord_lower = 64, OMP_ord_static_chunked = 65, OMP_ord_static = 66, @@ -517,7 +522,7 @@ enum OpenMPSchedType { OMP_ord_runtime = 69, OMP_ord_auto = 70, OMP_sch_default = OMP_sch_static, - /// \brief dist_schedule types + /// dist_schedule types OMP_dist_sch_static_chunked = 91, OMP_dist_sch_static = 92, /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. @@ -528,13 +533,13 @@ enum OpenMPSchedType { }; enum OpenMPRTLFunction { - /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, /// kmpc_micro microtask, ...); OMPRTL__kmpc_fork_call, - /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, + /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, /// kmp_int32 global_tid, void *data, size_t size, void ***cache); OMPRTL__kmpc_threadprivate_cached, - /// \brief Call to void __kmpc_threadprivate_register( ident_t *, + /// Call to void __kmpc_threadprivate_register( ident_t *, /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); OMPRTL__kmpc_threadprivate_register, // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); @@ -742,11 +747,11 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { /// UDR decl used for reduction. static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = + if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (const auto *DRE = dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) + if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) return DRD; return nullptr; } @@ -759,48 +764,51 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, if (DRD->getInitializer()) { std::pair<llvm::Function *, llvm::Function *> Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast<CallExpr>(InitOp); - auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); + const auto *CE = cast<CallExpr>(InitOp); + const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); - auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); + const auto *LHSDRE = + cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); + const auto *RHSDRE = + cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); + [=]() { return Private; }); PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); + [=]() { return Original; }); (void)PrivateScope.Privatize(); RValue Func = RValue::get(Reduction.second); CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); CGF.EmitIgnoredExpr(InitOp); } else { llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); auto *GV = new llvm::GlobalVariable( CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); + llvm::GlobalValue::PrivateLinkage, Init, Name); LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); RValue InitRVal; switch (CGF.getEvaluationKind(Ty)) { case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); break; case TEK_Complex: InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; case TEK_Aggregate: InitRVal = RValue::getAggregate(LV.getAddress()); break; } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), /*IsInitializer=*/false); } } -/// \brief Emit initialization of arrays of complex types. +/// Emit initialization of arrays of complex types. /// \param DestAddr Address of the array. /// \param Type Type of array. /// \param Init Initial expression of array. @@ -814,8 +822,8 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType ElementTy; // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); + llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); if (DRD) @@ -825,18 +833,18 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, llvm::Value *SrcBegin = nullptr; if (DRD) SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); + llvm::Value *DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + llvm::Value *IsEmpty = CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); @@ -871,16 +879,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, if (DRD) { // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); } // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); // Check whether we've reached the end. - auto Done = + llvm::Value *Done = CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); @@ -889,6 +897,25 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } +static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> +isDeclareTargetDeclaration(const ValueDecl *VD) { + for (const Decl *D : VD->redecls()) { + if (!D->hasAttrs()) + continue; + if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) + return Attr->getMapType(); + } + if (const auto *V = dyn_cast<VarDecl>(VD)) { + if (const VarDecl *TD = V->getTemplateInstantiationPattern()) + return isDeclareTargetDeclaration(TD); + } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { + if (const auto *TD = FD->getTemplateInstantiationPattern()) + return isDeclareTargetDeclaration(TD); + } + + return llvm::None; +} + LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { return CGF.EmitOMPSharedLValue(E); } @@ -906,7 +933,7 @@ void ReductionCodeGen::emitAggregateInitialization( // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); bool EmitDeclareReductionInit = DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); @@ -926,7 +953,7 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, BaseDecls.reserve(Shareds.size()); auto IPriv = Privates.begin(); auto IRed = ReductionOps.begin(); - for (const auto *Ref : Shareds) { + for (const Expr *Ref : Shareds) { ClausesData.emplace_back(Ref, *IPriv, *IRed); std::advance(IPriv, 1); std::advance(IRed, 1); @@ -942,7 +969,7 @@ void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); @@ -955,7 +982,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - llvm::Type *ElemType = + auto *ElemType = cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); @@ -981,7 +1008,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); if (!PrivateType->isVariablyModifiedType()) { @@ -1002,9 +1029,10 @@ void ReductionCodeGen::emitInitialization( CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { assert(SharedAddresses.size() > N && "No variable was generated"); - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + const OMPDeclareReductionDecl *DRD = + getReductionInit(ClausesData[N].ReductionOp); QualType PrivateType = PrivateVD->getType(); PrivateAddr = CGF.Builder.CreateElementBitCast( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); @@ -1029,7 +1057,7 @@ void ReductionCodeGen::emitInitialization( } bool ReductionCodeGen::needCleanups(unsigned N) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); @@ -1038,7 +1066,7 @@ bool ReductionCodeGen::needCleanups(unsigned N) { void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); @@ -1054,9 +1082,9 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, BaseTy = BaseTy.getNonReferenceType(); while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs<PointerType>()) + if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { + } else { LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } @@ -1097,28 +1125,32 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, return Address(Addr, BaseLVAlignment); } -Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr) { - const DeclRefExpr *DE; +static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { const VarDecl *OrigVD = nullptr; - if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { + const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); DE = cast<DeclRefExpr>(Base); OrigVD = cast<VarDecl>(DE->getDecl()); - } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { + const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); DE = cast<DeclRefExpr>(Base); OrigVD = cast<VarDecl>(DE->getDecl()); } - if (OrigVD) { + return OrigVD; +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { BaseDecls.emplace_back(OrigVD); - auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue OriginalBaseLValue = CGF.EmitLValue(DE); LValue BaseLValue = loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); @@ -1140,7 +1172,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, } bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { - auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + const OMPDeclareReductionDecl *DRD = + getReductionInit(ClausesData[N].ReductionOp); return DRD && DRD->getInitializer(); } @@ -1170,12 +1203,38 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( AlignmentSource::Decl); } -CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), OffloadEntriesInfoManager(CGM) { - IdentTy = llvm::StructType::create( - "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, - CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, - CGM.Int8PtrTy /* psource */); +static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, + QualType FieldTy) { + auto *Field = FieldDecl::Create( + C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, + C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + DC->addDecl(Field); + return Field; +} + +CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, + StringRef Separator) + : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), + OffloadEntriesInfoManager(CGM) { + ASTContext &C = CGM.getContext(); + RecordDecl *RD = C.buildImplicitRecord("ident_t"); + QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + RD->startDefinition(); + // reserved_1 + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // flags + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // reserved_2 + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // reserved_3 + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // psource + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + RD->completeDefinition(); + IdentQTy = C.getRecordType(RD); + IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -1185,12 +1244,23 @@ void CGOpenMPRuntime::clear() { InternalVars.clear(); } +std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + StringRef Sep = FirstSeparator; + for (StringRef Part : Parts) { + OS << Sep << Part; + Sep = Separator; + } + return OS.str(); +} + static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner) { // void .omp_combiner.(Ty *in, Ty *out); - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), @@ -1199,28 +1269,30 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); - auto &FnInfo = + const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); - auto *Fn = llvm::Function::Create( - FnTy, llvm::GlobalValue::InternalLinkage, - IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName( + {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), + Out->getLocation()); CodeGenFunction::OMPPrivateScope Scope(CGF); Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); - Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { + Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) .getAddress(); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); - Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { + Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) .getAddress(); }); @@ -1242,7 +1314,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { if (UDRMap.count(D) > 0) return; - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); if (!In || !Out) { In = &C.Idents.get("omp_in"); Out = &C.Idents.get("omp_out"); @@ -1252,7 +1324,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( cast<VarDecl>(D->lookup(Out).front()), /*IsCombiner=*/true); llvm::Function *Initializer = nullptr; - if (auto *Init = D->getInitializer()) { + if (const Expr *Init = D->getInitializer()) { if (!Priv || !Orig) { Priv = &C.Idents.get("omp_priv"); Orig = &C.Idents.get("omp_orig"); @@ -1265,7 +1337,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( cast<VarDecl>(D->lookup(Priv).front()), /*IsCombiner=*/false); } - UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); + UDRMap.try_emplace(D, Combiner, Initializer); if (CGF) { auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); Decls.second.push_back(D); @@ -1281,25 +1353,6 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } -// Layout information for ident_t. -static CharUnits getIdentAlign(CodeGenModule &CGM) { - return CGM.getPointerAlign(); -} -static CharUnits getIdentSize(CodeGenModule &CGM) { - assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); - return CharUnits::fromQuantity(16) + CGM.getPointerSize(); -} -static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { - // All the fields except the last are i32, so this works beautifully. - return unsigned(Field) * CharUnits::fromQuantity(4); -} -static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, - IdentFieldIndex Field, - const llvm::Twine &Name = "") { - auto Offset = getOffsetOfIdentField(Field); - return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); -} - static llvm::Value *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1308,19 +1361,20 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction( "thread id variable must be of type kmp_int32 *"); CodeGenFunction CGF(CGM, true); bool HasCancel = false; - if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) + if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) HasCancel = OPD->hasCancel(); - else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) + else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) HasCancel = OPSD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) + else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) + else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) + else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) + else if (const auto *OPFD = + dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = + else if (const auto *OPFD = dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, @@ -1352,8 +1406,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( bool Tied, unsigned &NumberOfParts) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { - auto *ThreadID = getThreadID(CGF, D.getLocStart()); - auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart()); + llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), @@ -1366,21 +1420,69 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); - auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); - auto *TD = dyn_cast<OMPTaskDirective>(&D); + const OpenMPDirectiveKind Region = + isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop + : OMPD_task; + const CapturedStmt *CS = D.getCapturedStmt(Region); + const auto *TD = dyn_cast<OMPTaskDirective>(&D); CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - auto *Res = CGF.GenerateCapturedStmtFunction(*CS); + llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) NumberOfParts = Action.getNumberOfParts(); return Res; } +static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, + const RecordDecl *RD, const CGRecordLayout &RL, + ArrayRef<llvm::Constant *> Data) { + llvm::StructType *StructTy = RL.getLLVMType(); + unsigned PrevIdx = 0; + ConstantInitBuilder CIBuilder(CGM); + auto DI = Data.begin(); + for (const FieldDecl *FD : RD->fields()) { + unsigned Idx = RL.getLLVMFieldNo(FD); + // Fill the alignment. + for (unsigned I = PrevIdx; I < Idx; ++I) + Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); + PrevIdx = Idx + 1; + Fields.add(*DI); + ++DI; + } +} + +template <class... As> +static llvm::GlobalVariable * +createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty, + ArrayRef<llvm::Constant *> Data, const Twine &Name, + As &&... Args) { + const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); + const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); + ConstantInitBuilder CIBuilder(CGM); + ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); + buildStructValue(Fields, CGM, RD, RL, Data); + return Fields.finishAndCreateGlobal( + Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), + /*isConstant=*/true, std::forward<As>(Args)...); +} + +template <typename T> +static void +createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, + ArrayRef<llvm::Constant *> Data, + T &Parent) { + const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); + const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); + ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); + buildStructValue(Fields, CGM, RD, RL, Data); + Fields.finishAndAddTo(Parent); +} + Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { - CharUnits Align = getIdentAlign(CGM); + CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { if (!DefaultOpenMPPSource) { @@ -1394,17 +1496,15 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } - ConstantInitBuilder builder(CGM); - auto fields = builder.beginStruct(IdentTy); - fields.addInt(CGM.Int32Ty, 0); - fields.addInt(CGM.Int32Ty, Flags); - fields.addInt(CGM.Int32Ty, 0); - fields.addInt(CGM.Int32Ty, 0); - fields.add(DefaultOpenMPPSource); - auto DefaultOpenMPLocation = - fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, - llvm::GlobalValue::PrivateLinkage); - DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + DefaultOpenMPPSource}; + llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct( + CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage); + DefaultOpenMPLocation->setUnnamedAddr( + llvm::GlobalValue::UnnamedAddr::Global); OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; } @@ -1422,17 +1522,17 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, assert(CGF.CurFn && "No function in current CodeGenFunction."); + CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); Address LocValue = Address::invalid(); auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) - LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); + LocValue = Address(I->second.DebugLoc, Align); // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if // GetOpenMPThreadID was called before this routine. if (!LocValue.isValid()) { // Generate "ident_t .kmpc_loc.addr;" - Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), - ".kmpc_loc.addr"); + Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; @@ -1440,29 +1540,30 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), - CGM.getSize(getIdentSize(CGF.CGM))); + CGF.getTypeSize(IdentQTy)); } // char **psource = &.kmpc_loc_<flags>.addr.psource; - Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); + LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); + auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); + LValue PSource = + CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); - auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); + llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); if (OMPDebugLoc == nullptr) { SmallString<128> Buffer2; llvm::raw_svector_ostream OS2(Buffer2); // Build debug location PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); OS2 << ";" << PLoc.getFilename() << ";"; - if (const FunctionDecl *FD = - dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) OS2 << FD->getQualifiedNameAsString(); - } OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; } // *psource = ";<File>;<Function>;<Line>;<Column>;;"; - CGF.Builder.CreateStore(OMPDebugLoc, PSource); + CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); // Our callers always pass this to a runtime function, so for // convenience, go ahead and return a naked pointer. @@ -1490,8 +1591,8 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->getThreadIDVariable()) { // Check if this an outlined function with thread id passed as argument. - auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { @@ -1509,7 +1610,7 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - auto *Call = CGF.Builder.CreateCall( + llvm::CallInst *Call = CGF.Builder.CreateCall( createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); Call->setCallingConv(CGF.getRuntimeCC()); @@ -1523,17 +1624,14 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { if (OpenMPLocThreadIDMap.count(CGF.CurFn)) OpenMPLocThreadIDMap.erase(CGF.CurFn); if (FunctionUDRMap.count(CGF.CurFn) > 0) { - for(auto *D : FunctionUDRMap[CGF.CurFn]) { + for(auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); - } FunctionUDRMap.erase(CGF.CurFn); } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { - if (!IdentTy) { - } - return llvm::PointerType::getUnqual(IdentTy); + return IdentTy->getPointerTo(); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { @@ -1555,7 +1653,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; @@ -1563,7 +1661,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_global_thread_num: { // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; @@ -1574,7 +1672,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.SizeTy, CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); break; @@ -1585,7 +1683,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); break; @@ -1596,7 +1694,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy), CGM.IntPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); break; @@ -1605,21 +1703,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_threadprivate_register(ident_t *, void *data, // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); // typedef void *(*kmpc_ctor)(void *); - auto KmpcCtorTy = + auto *KmpcCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg*/ false)->getPointerTo(); // typedef void *(*kmpc_cctor)(void *, void *); llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto KmpcCopyCtorTy = + auto *KmpcCopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, - /*isVarArg*/ false)->getPointerTo(); + /*isVarArg*/ false) + ->getPointerTo(); // typedef void (*kmpc_dtor)(void *); - auto KmpcDtorTy = + auto *KmpcDtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) ->getPointerTo(); llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, KmpcCopyCtorTy, KmpcDtorTy}; - auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); break; @@ -1630,7 +1729,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); break; @@ -1639,7 +1738,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); break; @@ -1647,7 +1746,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_barrier: { // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; @@ -1655,7 +1754,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_for_static_fini: { // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); break; @@ -1665,7 +1764,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); break; @@ -1674,7 +1773,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); break; @@ -1683,7 +1782,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; @@ -1691,7 +1790,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_flush: { // Build void __kmpc_flush(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); break; @@ -1699,7 +1798,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_master: { // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); break; @@ -1707,7 +1806,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_master: { // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); break; @@ -1716,7 +1815,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, // int end_part); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); break; @@ -1724,7 +1823,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_single: { // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); break; @@ -1732,7 +1831,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_single: { // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); break; @@ -1746,7 +1845,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; // Return void * and then cast to particular kmp_task_t type. - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); break; @@ -1756,7 +1855,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); break; @@ -1771,7 +1870,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, CpyFnTy->getPointerTo(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); break; @@ -1787,7 +1886,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); break; @@ -1804,7 +1903,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); break; @@ -1815,7 +1914,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); break; @@ -1826,7 +1925,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); @@ -1837,7 +1936,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); @@ -1848,7 +1947,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_complete_if0"); @@ -1857,7 +1956,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_ordered: { // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); break; @@ -1865,7 +1964,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_ordered: { // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); break; @@ -1873,7 +1972,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_omp_taskwait: { // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); break; @@ -1881,7 +1980,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_taskgroup: { // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); break; @@ -1889,7 +1988,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_taskgroup: { // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); break; @@ -1898,7 +1997,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, // int proc_bind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); break; @@ -1910,7 +2009,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); @@ -1923,7 +2022,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); break; @@ -1932,7 +2031,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); break; @@ -1941,7 +2040,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; @@ -1951,7 +2050,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // kmp_int32 num_teams, kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); break; @@ -1961,7 +2060,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); break; @@ -1981,7 +2080,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.IntTy, CGM.Int64Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); break; @@ -1993,7 +2092,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); break; @@ -2001,7 +2100,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_doacross_fini: { // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); break; @@ -2011,7 +2110,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *vec); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); break; @@ -2021,7 +2120,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *vec); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; @@ -2030,7 +2129,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void // *data); llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); @@ -2040,7 +2139,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); @@ -2057,7 +2156,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; @@ -2073,7 +2172,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); break; @@ -2091,7 +2190,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); break; @@ -2109,7 +2208,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; @@ -2119,7 +2218,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); break; @@ -2129,7 +2228,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; @@ -2143,7 +2242,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); break; @@ -2172,7 +2271,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); break; @@ -2201,7 +2300,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); break; @@ -2230,12 +2329,12 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" - : "__kmpc_for_static_init_4u") - : (IVSigned ? "__kmpc_for_static_init_8" - : "__kmpc_for_static_init_8u"); - auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto PtrTy = llvm::PointerType::getUnqual(ITy); + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" + : "__kmpc_for_static_init_4u") + : (IVSigned ? "__kmpc_for_static_init_8" + : "__kmpc_for_static_init_8u"); + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid @@ -2247,7 +2346,7 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, ITy, // incr ITy // chunk }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } @@ -2256,11 +2355,11 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); - auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid CGM.Int32Ty, // schedtype @@ -2269,7 +2368,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, ITy, // stride ITy // chunk }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } @@ -2278,7 +2377,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); @@ -2286,7 +2385,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FnTy, Name); } @@ -2295,12 +2394,12 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); - auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid @@ -2309,18 +2408,48 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, PtrTy, // p_upper PtrTy // p_stride }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } +Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { + if (CGM.getLangOpts().OpenMPSimd) + return Address::invalid(); + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD); + if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { + SmallString<64> PtrName; + { + llvm::raw_svector_ostream OS(PtrName); + OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; + } + llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); + if (!Ptr) { + QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); + Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), + PtrName); + if (!CGM.getLangOpts().OpenMPIsDevice) { + auto *GV = cast<llvm::GlobalVariable>(Ptr); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + GV->setInitializer(CGM.GetAddrOfGlobal(VD)); + } + CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); + registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); + } + return Address(Ptr, CGM.getContext().getDeclAlign(VD)); + } + return Address::invalid(); +} + llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { assert(!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. - return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, - Twine(CGM.getMangledName(VD)) + ".cache."); + std::string Suffix = getName({"cache", ""}); + return getOrCreateInternalVariable( + CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); } Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, @@ -2331,7 +2460,7 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, CGM.getContext().getTargetInfo().isTLSSupported()) return VDAddr; - auto VarTy = VDAddr.getElementType(); + llvm::Type *VarTy = VDAddr.getElementType(); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), @@ -2347,15 +2476,14 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. - auto OMPLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), OMPLoc); // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. - llvm::Value *Args[] = {OMPLoc, - CGF.Builder.CreatePointerCast(VDAddr.getPointer(), - CGM.VoidPtrTy), - Ctor, CopyCtor, Dtor}; + llvm::Value *Args[] = { + OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), + Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); } @@ -2373,29 +2501,31 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; - auto Init = VD->getAnyInitializer(); + const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { // Generate function that re-emits the declaration's initializer into the // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, + /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( CGM.getContext().VoidPtrTy, Args); - auto FTy = CGM.getTypes().GetFunctionType(FI); - auto Fn = CGM.CreateGlobalInitOrDestructFunction( - FTy, ".__kmpc_global_ctor_.", FI, Loc); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string Name = getName({"__kmpc_global_ctor_", ""}); + llvm::Function *Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, - Args, SourceLocation()); - auto ArgVal = CtorCGF.EmitLoadOfScalar( + Args, Loc, Loc); + llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); Address Arg = Address(ArgVal, VDAddr.getAlignment()); - Arg = CtorCGF.Builder.CreateElementBitCast(Arg, - CtorCGF.ConvertTypeForMem(ASTTy)); + Arg = CtorCGF.Builder.CreateElementBitCast( + Arg, CtorCGF.ConvertTypeForMem(ASTTy)); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( @@ -2410,21 +2540,23 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // of the variable VD CodeGenFunction DtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, + /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( CGM.getContext().VoidTy, Args); - auto FTy = CGM.getTypes().GetFunctionType(FI); - auto Fn = CGM.CreateGlobalInitOrDestructFunction( - FTy, ".__kmpc_global_dtor_.", FI, Loc); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string Name = getName({"__kmpc_global_dtor_", ""}); + llvm::Function *Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, - SourceLocation()); + Loc, Loc); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); - auto ArgVal = DtorCGF.EmitLoadOfScalar( + llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, @@ -2438,34 +2570,36 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto CopyCtorTy = - llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, - /*isVarArg=*/false)->getPointerTo(); + auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, + /*isVarArg=*/false) + ->getPointerTo(); // Copying constructor for the threadprivate variable. // Must be NULL - reserved by runtime, but currently it requires that this // parameter is always NULL. Otherwise it fires assertion. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); if (Ctor == nullptr) { - auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, - /*isVarArg=*/false)->getPointerTo(); + auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg=*/false) + ->getPointerTo(); Ctor = llvm::Constant::getNullValue(CtorTy); } if (Dtor == nullptr) { - auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, - /*isVarArg=*/false)->getPointerTo(); + auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, + /*isVarArg=*/false) + ->getPointerTo(); Dtor = llvm::Constant::getNullValue(DtorTy); } if (!CGF) { - auto InitFunctionTy = + auto *InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); - auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( - InitFunctionTy, ".__omp_threadprivate_init_.", - CGM.getTypes().arrangeNullaryFunction()); + std::string Name = getName({"__omp_threadprivate_init_", ""}); + llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( + InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); CodeGenFunction InitCGF(CGM); FunctionArgList ArgList; InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, CGM.getTypes().arrangeNullaryFunction(), ArgList, - Loc); + Loc, Loc); emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); InitCGF.FinishFunction(); return InitFunction; @@ -2475,19 +2609,156 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } +/// Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum) { + SourceManager &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); +} + +bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, + llvm::GlobalVariable *Addr, + bool PerformInit) { + Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD); + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) + return false; + VD = VD->getDefinition(CGM.getContext()); + if (VD && !DeclareTargetWithDefinition.insert(VD).second) + return CGM.getLangOpts().OpenMPIsDevice; + + QualType ASTTy = VD->getType(); + + SourceLocation Loc = VD->getCanonicalDecl()->getLocStart(); + // Produce the unique prefix to identify the new target regions. We use + // the source location of the variable declaration which we know to not + // conflict with any target region. + unsigned DeviceID; + unsigned FileID; + unsigned Line; + getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); + SmallString<128> Buffer, Out; + { + llvm::raw_svector_ostream OS(Buffer); + OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) + << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; + } + + const Expr *Init = VD->getAnyInitializer(); + if (CGM.getLangOpts().CPlusPlus && PerformInit) { + llvm::Constant *Ctor; + llvm::Constant *ID; + if (CGM.getLangOpts().OpenMPIsDevice) { + // Generate function that re-emits the declaration's initializer into + // the threadprivate copy of the variable VD + CodeGenFunction CtorCGF(CGM); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, Twine(Buffer, "_ctor"), FI, Loc); + auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); + CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, + FunctionArgList(), Loc, Loc); + auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); + CtorCGF.EmitAnyExprToMem(Init, + Address(Addr, CGM.getContext().getDeclAlign(VD)), + Init->getType().getQualifiers(), + /*IsInitializer=*/true); + CtorCGF.FinishFunction(); + Ctor = Fn; + ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); + } else { + Ctor = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); + ID = Ctor; + } + + // Register the information for the entry associated with the constructor. + Out.clear(); + OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, + ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); + } + if (VD->getType().isDestructedType() != QualType::DK_none) { + llvm::Constant *Dtor; + llvm::Constant *ID; + if (CGM.getLangOpts().OpenMPIsDevice) { + // Generate function that emits destructor call for the threadprivate + // copy of the variable VD + CodeGenFunction DtorCGF(CGM); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, Twine(Buffer, "_dtor"), FI, Loc); + auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); + DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, + FunctionArgList(), Loc, Loc); + // Create a scope with an artificial location for the body of this + // function. + auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); + DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), + ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), + DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); + DtorCGF.FinishFunction(); + Dtor = Fn; + ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); + } else { + Dtor = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); + ID = Dtor; + } + // Register the information for the entry associated with the destructor. + Out.clear(); + OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, + ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); + } + return CGM.getLangOpts().OpenMPIsDevice; +} + Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { - llvm::Twine VarName(Name, ".artificial."); + std::string Suffix = getName({"artificial", ""}); + std::string CacheSuffix = getName({"cache", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); - llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *GAddr = + getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); llvm::Value *Args[] = { emitUpdateLocation(CGF, SourceLocation()), getThreadID(CGF, SourceLocation()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, /*IsSigned=*/false), - getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + getOrCreateInternalVariable( + CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; return Address( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitRuntimeCall( @@ -2496,13 +2767,6 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGM.getPointerAlign()); } -/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen -/// function. Here is the logic: -/// if (Cond) { -/// ThenGen(); -/// } else { -/// ElseGen(); -/// } void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen) { @@ -2521,9 +2785,9 @@ void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // Otherwise, the condition did not fold, or we couldn't elide it. Just // emit the conditional branch. - auto ThenBlock = CGF.createBasicBlock("omp_if.then"); - auto ElseBlock = CGF.createBasicBlock("omp_if.else"); - auto ContBlock = CGF.createBasicBlock("omp_if.end"); + llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); + llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); // Emit the 'then' code. @@ -2548,11 +2812,11 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) return; - auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); - auto &RT = CGF.CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars @@ -2561,13 +2825,13 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); + llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); - auto ThreadID = RT.getThreadID(CGF, Loc); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); + llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; @@ -2575,13 +2839,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); - auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); - Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + // ThreadId for serialized parallels is 0. + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2592,9 +2855,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - else { + } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } @@ -2613,10 +2876,10 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (OMPRegionInfo->getThreadIDVariable()) return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); - auto ThreadID = getThreadID(CGF, Loc); - auto Int32Ty = + llvm::Value *ThreadID = getThreadID(CGF, Loc); + QualType Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); - auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); + Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); @@ -2629,8 +2892,8 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Name; - auto RuntimeName = Out.str(); - auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; + StringRef RuntimeName = Out.str(); + auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; if (Elem.second) { assert(Elem.second->getType()->getPointerElementType() == Ty && "OMP internal variable has different type than requested"); @@ -2644,8 +2907,9 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, } llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { - llvm::Twine Name(".gomp_critical_user_", CriticalName); - return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); + std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); + std::string Name = getName({Prefix, "var"}); + return getOrCreateInternalVariable(KmpCriticalNameTy, Name); } namespace { @@ -2779,21 +3043,28 @@ static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, static llvm::Value *emitCopyprivateCopyFunction( CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, - ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { - auto &C = CGM.getContext(); + ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); - ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - ".omp.copyprivate.copy_func", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + const auto &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( @@ -2807,13 +3078,15 @@ static llvm::Value *emitCopyprivateCopyFunction( // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { - auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); + const auto *DestVar = + cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); - auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); + const auto *SrcVar = + cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); - auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); + const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); QualType Type = VD->getType(); CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); } @@ -2833,7 +3106,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, assert(CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()); - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // int32 did_it = 0; // if(__kmpc_single(ident_t *, gtid)) { // SingleOpGen(); @@ -2846,7 +3119,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address DidIt = Address::invalid(); if (!CopyprivateVars.empty()) { // int32 did_it = 0; - auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpInt32Ty = + C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); } @@ -2866,7 +3140,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // <copy_func>, did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); - auto CopyprivateArrayTy = + QualType CopyprivateArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. @@ -2882,14 +3156,14 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Build function that copies private values from single region to all other // threads in the corresponding parallel region. - auto *CpyFn = emitCopyprivateCopyFunction( + llvm::Value *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), - CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); - auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); + CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); + llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); - auto *DidItVal = CGF.Builder.CreateLoad(DidIt); + llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), // ident_t *<loc> getThreadID(CGF, Loc), // i32 <gtid> @@ -2948,19 +3222,19 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { - auto *Result = CGF.EmitRuntimeCall( + llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); if (EmitChecks) { // if (__kmpc_cancel_barrier()) { // exit from construct; // } - auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); - auto *ContBB = CGF.createBasicBlock(".cancel.continue"); - auto *Cmp = CGF.Builder.CreateIsNotNull(Result); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; - auto CancelDestination = + CodeGenFunction::JumpDest CancelDestination = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDestination); CGF.EmitBlock(ContBB, /*IsFinished=*/true); @@ -2971,7 +3245,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); } -/// \brief Map the OpenMP loop schedule to the runtime enumeration. +/// Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { switch (ScheduleKind) { @@ -2993,7 +3267,7 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, llvm_unreachable("Unexpected runtime schedule"); } -/// \brief Map the OpenMP distribute schedule to the runtime enumeration. +/// Map the OpenMP distribute schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { // only static is allowed for dist_schedule @@ -3002,19 +3276,20 @@ getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { - auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } bool CGOpenMPRuntime::isStaticNonchunked( OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { - auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); return Schedule == OMP_dist_sch_static; } bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { - auto Schedule = + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); return Schedule != OMP_sch_static; @@ -3147,12 +3422,12 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); assert(isOpenMPWorksharingDirective(DKind) && "Expected loop-based or sections-based directive."); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, + llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, isOpenMPLoopDirective(DKind) ? OMP_IDENT_WORK_LOOP : OMP_IDENT_WORK_SECTIONS); - auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Constant *StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -3164,10 +3439,10 @@ void CGOpenMPRuntime::emitDistributeStaticInit( const CGOpenMPRuntime::StaticRTInput &Values) { OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); - auto *UpdatedLocation = + llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); - auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Constant *StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, @@ -3223,7 +3498,7 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, llvm::Value *Call = CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); return CGF.EmitScalarConversion( - Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), + Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), CGF.getContext().BoolTy, Loc); } @@ -3285,13 +3560,13 @@ void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, } namespace { -/// \brief Indexes of fields for type kmp_task_t. +/// Indexes of fields for type kmp_task_t. enum KmpTaskTFields { - /// \brief List of shared variables. + /// List of shared variables. KmpTaskTShareds, - /// \brief Task routine. + /// Task routine. KmpTaskTRoutine, - /// \brief Partition id for the untied tasks. + /// Partition id for the untied tasks. KmpTaskTPartId, /// Function with call of destructors for private variables. Data1, @@ -3311,11 +3586,11 @@ enum KmpTaskTFields { } // anonymous namespace bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { - // FIXME: Add other entries type when they become supported. - return OffloadEntriesTargetRegion.empty(); + return OffloadEntriesTargetRegion.empty() && + OffloadEntriesDeviceGlobalVar.empty(); } -/// \brief Initialize target region entry. +/// Initialize target region entry. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, @@ -3325,7 +3600,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: "code generation."); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, - /*Flags=*/0); + OMPTargetRegionEntryTargetRegion); ++OffloadingEntriesNum; } @@ -3333,22 +3608,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, - int32_t Flags) { + OMPTargetRegionEntryKind Flags) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { - assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && - "Entry must exist."); + if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Unable to find target region on line '%0' in the device code."); + CGM.getDiags().Report(DiagID) << LineNum; + return; + } auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); Entry.setFlags(Flags); - return; } else { - OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); + OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; + ++OffloadingEntriesNum; } } @@ -3376,48 +3656,69 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( const OffloadTargetRegionEntryInfoActTy &Action) { // Scan all target region entries and perform the provided action. - for (auto &D : OffloadEntriesTargetRegion) - for (auto &F : D.second) - for (auto &P : F.second) - for (auto &L : P.second) + for (const auto &D : OffloadEntriesTargetRegion) + for (const auto &F : D.second) + for (const auto &P : F.second) + for (const auto &L : P.second) Action(D.first, F.first, P.first(), L.first, L.second); } -/// \brief Create a Ctor/Dtor-like function whose body is emitted through -/// \a Codegen. This is used to emit the two functions that register and -/// unregister the descriptor of the current compilation unit. -static llvm::Function * -createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, - const RegionCodeGenTy &Codegen) { - auto &C = CGM.getContext(); - FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); - Args.push_back(&DummyPtr); +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + initializeDeviceGlobalVarEntryInfo(StringRef Name, + OMPTargetGlobalVarEntryKind Flags, + unsigned Order) { + assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " + "only required for the device " + "code generation."); + OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); + ++OffloadingEntriesNum; +} - CodeGenFunction CGF(CGM); - auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto FTy = CGM.getTypes().GetFunctionType(FI); - auto *Fn = - CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); - Codegen(CGF); - CGF.FinishFunction(); - return Fn; +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, + CharUnits VarSize, + OMPTargetGlobalVarEntryKind Flags, + llvm::GlobalValue::LinkageTypes Linkage) { + if (CGM.getLangOpts().OpenMPIsDevice) { + auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; + assert(Entry.isValid() && Entry.getFlags() == Flags && + "Entry not initialized!"); + assert((!Entry.getAddress() || Entry.getAddress() == Addr) && + "Resetting with the new address."); + if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) + return; + Entry.setAddress(Addr); + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } else { + if (hasDeviceGlobalVarEntryInfo(VarName)) + return; + OffloadEntriesDeviceGlobalVar.try_emplace( + VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); + ++OffloadingEntriesNum; + } +} + +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + actOnDeviceGlobalVarEntriesInfo( + const OffloadDeviceGlobalVarEntryInfoActTy &Action) { + // Scan all target region entries and perform the provided action. + for (const auto &E : OffloadEntriesDeviceGlobalVar) + Action(E.getKey(), E.getValue()); } llvm::Function * CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { - // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) return nullptr; - auto &M = CGM.getModule(); - auto &C = CGM.getContext(); + llvm::Module &M = CGM.getModule(); + ASTContext &C = CGM.getContext(); // Get list of devices we care about - auto &Devices = CGM.getLangOpts().OMPTargetTriples; + const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; // We should be creating an offloading descriptor only if there are devices // specified. @@ -3425,46 +3726,49 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // Create the external variables that will point to the begin and end of the // host entries section. These will be defined by the linker. - auto *OffloadEntryTy = + llvm::Type *OffloadEntryTy = CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); - llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( - M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - ".omp_offloading.entries_begin"); - llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( + std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); + auto *HostEntriesBegin = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - ".omp_offloading.entries_end"); + EntriesBeginName); + std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); + auto *HostEntriesEnd = + new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, + llvm::GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, EntriesEndName); // Create all device images auto *DeviceImageTy = cast<llvm::StructType>( CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); ConstantInitBuilder DeviceImagesBuilder(CGM); - auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); + ConstantArrayBuilder DeviceImagesEntries = + DeviceImagesBuilder.beginArray(DeviceImageTy); - for (unsigned i = 0; i < Devices.size(); ++i) { - StringRef T = Devices[i].getTriple(); + for (const llvm::Triple &Device : Devices) { + StringRef T = Device.getTriple(); + std::string BeginName = getName({"omp_offloading", "img_start", ""}); auto *ImgBegin = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, - Twine(".omp_offloading.img_start.") + Twine(T)); + /*Initializer=*/nullptr, Twine(BeginName).concat(T)); + std::string EndName = getName({"omp_offloading", "img_end", ""}); auto *ImgEnd = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); + /*Initializer=*/nullptr, Twine(EndName).concat(T)); - auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); - Dev.add(ImgBegin); - Dev.add(ImgEnd); - Dev.add(HostEntriesBegin); - Dev.add(HostEntriesEnd); - Dev.finishAndAddTo(DeviceImagesEntries); + llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, + HostEntriesEnd}; + createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, + DeviceImagesEntries); } // Create device images global array. + std::string ImagesName = getName({"omp_offloading", "device_images"}); llvm::GlobalVariable *DeviceImages = - DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", - CGM.getPointerAlign(), - /*isConstant=*/true); + DeviceImagesEntries.finishAndCreateGlobal(ImagesName, + CGM.getPointerAlign(), + /*isConstant=*/true); DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // This is a Zero array to be used in the creation of the constant expressions @@ -3472,49 +3776,64 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { llvm::Constant::getNullValue(CGM.Int32Ty)}; // Create the target region descriptor. - auto *BinaryDescriptorTy = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); - ConstantInitBuilder DescBuilder(CGM); - auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); - DescInit.addInt(CGM.Int32Ty, Devices.size()); - DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), - DeviceImages, - Index)); - DescInit.add(HostEntriesBegin); - DescInit.add(HostEntriesEnd); - - auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", - CGM.getPointerAlign(), - /*isConstant=*/true); + llvm::Constant *Data[] = { + llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), + llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), + DeviceImages, Index), + HostEntriesBegin, HostEntriesEnd}; + std::string Descriptor = getName({"omp_offloading", "descriptor"}); + llvm::GlobalVariable *Desc = createConstantGlobalStruct( + CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor); // Emit code to register or unregister the descriptor at execution // startup or closing, respectively. - // Create a variable to drive the registration and unregistration of the - // descriptor, so we can reuse the logic that emits Ctors and Dtors. - auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); - ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), - IdentInfo, C.CharTy, ImplicitParamDecl::Other); - - auto *UnRegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_unreg", - [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); - }); - auto *RegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_reg", - [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), - Desc); - CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); - }); + llvm::Function *UnRegFn; + { + FunctionArgList Args; + ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.push_back(&DummyPtr); + + CodeGenFunction CGF(CGM); + // Disable debug info for global (de-)initializer because they are not part + // of some particular construct. + CGF.disableDebugInfo(); + const auto &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); + UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); + CGF.FinishFunction(); + } + llvm::Function *RegFn; + { + CodeGenFunction CGF(CGM); + // Disable debug info for global (de-)initializer because they are not part + // of some particular construct. + CGF.disableDebugInfo(); + const auto &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string Descriptor = getName({"omp_offloading", "descriptor_reg"}); + RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); + // Create a variable to drive the registration and unregistration of the + // descriptor, so we can reuse the logic that emits Ctors and Dtors. + ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), + SourceLocation(), nullptr, C.CharTy, + ImplicitParamDecl::Other); + CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); + CGF.FinishFunction(); + } if (CGM.supportsCOMDAT()) { // It is sufficient to call registration function only once, so create a // COMDAT group for registration/unregistration functions and associated // data. That would reduce startup time and code size. Registration // function serves as a COMDAT group key. - auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); + llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); RegFn->setComdat(ComdatKey); @@ -3525,48 +3844,35 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { return RegFn; } -void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, uint64_t Size, - int32_t Flags) { +void CGOpenMPRuntime::createOffloadEntry( + llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, + llvm::GlobalValue::LinkageTypes Linkage) { StringRef Name = Addr->getName(); - auto *TgtOffloadEntryType = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); - llvm::LLVMContext &C = CGM.getModule().getContext(); llvm::Module &M = CGM.getModule(); - - // Make sure the address has the right type. - llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); + llvm::LLVMContext &C = M.getContext(); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); - llvm::GlobalVariable *Str = - new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, StrPtrInit, - ".omp_offloading.entry_name"); + std::string StringName = getName({"omp_offloading", "entry_name"}); + auto *Str = new llvm::GlobalVariable( + M, StrPtrInit->getType(), /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); - - // We can't have any padding between symbols, so we need to have 1-byte - // alignment. - auto Align = CharUnits::fromQuantity(1); - - // Create the entry struct. - ConstantInitBuilder EntryBuilder(CGM); - auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); - EntryInit.add(AddrPtr); - EntryInit.add(StrPtr); - EntryInit.addInt(CGM.SizeTy, Size); - EntryInit.addInt(CGM.Int32Ty, Flags); - EntryInit.addInt(CGM.Int32Ty, 0); - llvm::GlobalVariable *Entry = - EntryInit.finishAndCreateGlobal(".omp_offloading.entry", - Align, - /*constant*/ true, - llvm::GlobalValue::ExternalLinkage); + + llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), + llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), + llvm::ConstantInt::get(CGM.SizeTy, Size), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, 0)}; + std::string EntryName = getName({"omp_offloading", "entry", ""}); + llvm::GlobalVariable *Entry = createConstantGlobalStruct( + CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name), + llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. - Entry->setSection(".omp_offloading.entries"); + std::string Section = getName({"omp_offloading", "entries"}); + Entry->setSection(Section); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -3579,71 +3885,142 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Right now we only generate metadata for function that contain target // regions. - // If we do not have entries, we dont need to do anything. + // If we do not have entries, we don't need to do anything. if (OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); - SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> OrderedEntries(OffloadEntriesInfoManager.size()); - // Create the offloading info metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - // Auxiliary methods to create metadata values and strings. - auto getMDInt = [&](unsigned v) { + auto &&GetMDInt = [this](unsigned V) { return llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); + llvm::ConstantInt::get(CGM.Int32Ty, V)); }; - auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; + auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; + + // Create the offloading info metadata node. + llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); // Create function that emits metadata for each target region entry; - auto &&TargetRegionMetadataEmitter = [&]( - unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, - OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { - llvm::SmallVector<llvm::Metadata *, 32> Ops; - // Generate metadata for target regions. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (0). - // - Entry 1 -> Device ID of the file where the entry was identified. - // - Entry 2 -> File ID of the file where the entry was identified. - // - Entry 3 -> Mangled name of the function where the entry was identified. - // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. - // The first element of the metadata node is the kind. - Ops.push_back(getMDInt(E.getKind())); - Ops.push_back(getMDInt(DeviceID)); - Ops.push_back(getMDInt(FileID)); - Ops.push_back(getMDString(ParentName)); - Ops.push_back(getMDInt(Line)); - Ops.push_back(getMDInt(E.getOrder())); - - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; + auto &&TargetRegionMetadataEmitter = + [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + unsigned DeviceID, unsigned FileID, StringRef ParentName, + unsigned Line, + const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { + // Generate metadata for target regions. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (0). + // - Entry 1 -> Device ID of the file where the entry was identified. + // - Entry 2 -> File ID of the file where the entry was identified. + // - Entry 3 -> Mangled name of the function where the entry was + // identified. + // - Entry 4 -> Line in the file where the entry was identified. + // - Entry 5 -> Order the entry was created. + // The first element of the metadata node is the kind. + llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), + GetMDInt(FileID), GetMDString(ParentName), + GetMDInt(Line), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = &E; + + // Add metadata to the named metadata node. + MD->addOperand(llvm::MDNode::get(C, Ops)); + }; OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( TargetRegionMetadataEmitter); - for (auto *E : OrderedEntries) { + // Create function that emits metadata for each device global variable entry; + auto &&DeviceGlobalVarMetadataEmitter = + [&C, &OrderedEntries, &GetMDInt, &GetMDString, + MD](StringRef MangledName, + const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar + &E) { + // Generate metadata for global variables. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (1). + // - Entry 1 -> Mangled name of the variable. + // - Entry 2 -> Declare target kind. + // - Entry 3 -> Order the entry was created. + // The first element of the metadata node is the kind. + llvm::Metadata *Ops[] = { + GetMDInt(E.getKind()), GetMDString(MangledName), + GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = &E; + + // Add metadata to the named metadata node. + MD->addOperand(llvm::MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + DeviceGlobalVarMetadataEmitter); + + for (const auto *E : OrderedEntries) { assert(E && "All ordered entries must exist!"); - if (auto *CE = + if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( E)) { - assert(CE->getID() && CE->getAddress() && - "Entry ID and Addr are invalid!"); - createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); - } else + if (!CE->getID() || !CE->getAddress()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for target region is incorrect: either the " + "address or the ID is invalid."); + CGM.getDiags().Report(DiagID); + continue; + } + createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, + CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); + } else if (const auto *CE = + dyn_cast<OffloadEntriesInfoManagerTy:: + OffloadEntryInfoDeviceGlobalVar>(E)) { + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = + static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( + CE->getFlags()); + switch (Flags) { + case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { + if (!CE->getAddress()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + continue; + } + break; + } + case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: + assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || + (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && + "Declaret target link address is set."); + if (CGM.getLangOpts().OpenMPIsDevice) + continue; + if (!CE->getAddress()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + continue; + } + break; + } + createOffloadEntry(CE->getAddress(), CE->getAddress(), + CE->getVarSize().getQuantity(), Flags, + CE->getLinkage()); + } else { llvm_unreachable("Unsupported entry kind."); + } } } -/// \brief Loads all the offload entries information from the host IR +/// Loads all the offload entries information from the host IR /// metadata. void CGOpenMPRuntime::loadOffloadInfoMetadata() { // If we are in target mode, load the metadata from the host IR. This code has @@ -3656,44 +4033,57 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); - if (Buf.getError()) + if (auto EC = Buf.getError()) { + CGM.getDiags().Report(diag::err_cannot_open_file) + << CGM.getLangOpts().OMPHostIRFile << EC.message(); return; + } llvm::LLVMContext C; auto ME = expectedToErrorOrAndEmitErrors( C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); - if (ME.getError()) + if (auto EC = ME.getError()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); + CGM.getDiags().Report(DiagID) + << CGM.getLangOpts().OMPHostIRFile << EC.message(); return; + } llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); if (!MD) return; - for (auto I : MD->operands()) { - llvm::MDNode *MN = cast<llvm::MDNode>(I); - - auto getMDInt = [&](unsigned Idx) { - llvm::ConstantAsMetadata *V = - cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); + for (llvm::MDNode *MN : MD->operands()) { + auto &&GetMDInt = [MN](unsigned Idx) { + auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); }; - auto getMDString = [&](unsigned Idx) { - llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); + auto &&GetMDString = [MN](unsigned Idx) { + auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); return V->getString(); }; - switch (getMDInt(0)) { + switch (GetMDInt(0)) { default: llvm_unreachable("Unexpected metadata!"); break; case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: - OFFLOAD_ENTRY_INFO_TARGET_REGION: + OffloadingEntryInfoTargetRegion: OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), - /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), - /*Order=*/getMDInt(5)); + /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), + /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), + /*Order=*/GetMDInt(5)); + break; + case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: + OffloadingEntryInfoDeviceGlobalVar: + OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + /*MangledName=*/GetMDString(1), + static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( + /*Flags=*/GetMDInt(2)), + /*Order=*/GetMDInt(3)); break; } } @@ -3702,7 +4092,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { if (!KmpRoutineEntryPtrTy) { // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; FunctionProtoType::ExtProtoInfo EPI; KmpRoutineEntryPtrQTy = C.getPointerType( @@ -3711,19 +4101,7 @@ void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { } } -static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, - QualType FieldTy) { - auto *Field = FieldDecl::Create( - C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, - C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - DC->addDecl(Field); - return Field; -} - QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { - // Make sure the type of the entry is already created. This is the type we // have to create: // struct __tgt_offload_entry{ @@ -3736,7 +4114,7 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { // }; if (TgtOffloadEntryQTy.isNull()) { ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); + RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); @@ -3746,6 +4124,7 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); RD->completeDefinition(); + RD->addAttr(PackedAttr::CreateImplicit(C)); TgtOffloadEntryQTy = C.getRecordType(RD); } return TgtOffloadEntryQTy; @@ -3765,7 +4144,7 @@ QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { // }; if (TgtDeviceImageQTy.isNull()) { ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("__tgt_device_image"); + RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.VoidPtrTy); @@ -3789,7 +4168,7 @@ QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { // }; if (TgtBinaryDescriptorQTy.isNull()) { ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); + RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); RD->startDefinition(); addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); @@ -3818,17 +4197,16 @@ typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { if (!Privates.empty()) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Build struct .kmp_privates_t. { // /* private vars */ // }; - auto *RD = C.buildImplicitRecord(".kmp_privates.t"); + RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); RD->startDefinition(); - for (auto &&Pair : Privates) { - auto *VD = Pair.second.Original; - auto Type = VD->getType(); - Type = Type.getNonReferenceType(); - auto *FD = addFieldToRecordDecl(C, RD, Type); + for (const auto &Pair : Privates) { + const VarDecl *VD = Pair.second.Original; + QualType Type = VD->getType().getNonReferenceType(); + FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), E(VD->getAttrs().end()); @@ -3846,7 +4224,7 @@ static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Build struct kmp_task_t { // void * shareds; // kmp_routine_entry_t routine; @@ -3860,13 +4238,13 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_int32 liter; // void * reductions; // }; - auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); + RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); addFieldToRecordDecl(C, UD, KmpInt32Ty); addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); UD->completeDefinition(); QualType KmpCmplrdataTy = C.getRecordType(UD); - auto *RD = C.buildImplicitRecord("kmp_task_t"); + RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); @@ -3891,22 +4269,21 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef<PrivateDataTy> Privates) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Build struct kmp_task_t_with_privates { // kmp_task_t task_data; // .kmp_privates_t. privates; // }; - auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); + RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); RD->startDefinition(); addFieldToRecordDecl(C, RD, KmpTaskTQTy); - if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { + if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); - } RD->completeDefinition(); return RD; } -/// \brief Emit a proxy function which accepts kmp_task_t as the second +/// Emit a proxy function which accepts kmp_task_t as the second /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { @@ -3924,7 +4301,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, ImplicitParamDecl::Other); @@ -3933,49 +4310,53 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - auto &TaskEntryFnInfo = + const auto &TaskEntryFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); - auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); - auto *TaskEntry = - llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_entry.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); + llvm::FunctionType *TaskEntryTy = + CGM.getTypes().GetFunctionType(TaskEntryFnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); + auto *TaskEntry = llvm::Function::Create( + TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); + TaskEntry->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); + CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, + Loc, Loc); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, // tt, // For taskloops: // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, // tt->task_data.shareds); - auto *GtidParam = CGF.EmitLoadOfScalar( + llvm::Value *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); - auto *KmpTaskTWithPrivatesQTyRD = + const auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); LValue Base = CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); - auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); + const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); - auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - auto *PartidParam = PartIdLVal.getPointer(); + LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); + llvm::Value *PartidParam = PartIdLVal.getPointer(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); - auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); - auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), + LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); + llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(SharedsLVal, Loc), CGF.ConvertTypeForMem(SharedsPtrTy)); auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); llvm::Value *PrivatesParam; if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { - auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); + LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); - } else + } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, @@ -3987,20 +4368,20 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, std::end(CommonArgs)); if (isOpenMPTaskLoopDirective(Kind)) { auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); - auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); - auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); + LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); + llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); - auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); - auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); + LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); + llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); - auto StLVal = CGF.EmitLValueForField(Base, *StFI); - auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); + LValue StLVal = CGF.EmitLValueForField(Base, *StFI); + llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); - auto LILVal = CGF.EmitLValueForField(Base, *LIFI); - auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); + llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); - auto RLVal = CGF.EmitLValueForField(Base, *RFI); - auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); + LValue RLVal = CGF.EmitLValueForField(Base, *RFI); + llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); @@ -4011,9 +4392,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, CallArgs); - CGF.EmitStoreThroughLValue( - RValue::get(CGF.Builder.getInt32(/*C=*/0)), - CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); + CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), + CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); CGF.FinishFunction(); return TaskEntry; } @@ -4023,7 +4403,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, ImplicitParamDecl::Other); @@ -4032,30 +4412,34 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - auto &DestructorFnInfo = + const auto &DestructorFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); - auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); + llvm::FunctionType *DestructorFnTy = + CGM.getTypes().GetFunctionType(DestructorFnInfo); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); auto *DestructorFn = llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_destructor.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, DestructorFnInfo); + DestructorFn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, - Args); + Args, Loc, Loc); LValue Base = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); - auto *KmpTaskTWithPrivatesQTyRD = + const auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); Base = CGF.EmitLValueForField(Base, *FI); - for (auto *Field : + for (const auto *Field : cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { - if (auto DtorKind = Field->getType().isDestructedType()) { - auto FieldLValue = CGF.EmitLValueForField(Base, Field); + if (QualType::DestructionKind DtorKind = + Field->getType().isDestructedType()) { + LValue FieldLValue = CGF.EmitLValueForField(Base, Field); CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); } } @@ -4063,7 +4447,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, return DestructorFn; } -/// \brief Emit a privates mapping function for correct handling of private and +/// Emit a privates mapping function for correct handling of private and /// firstprivate variables. /// \code /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> @@ -4080,7 +4464,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef<const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef<PrivateDataTy> Privates) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, @@ -4089,67 +4473,69 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, Args.push_back(&TaskPrivatesArg); llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (auto *E: PrivateVars) { + for (const Expr *E : PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), ImplicitParamDecl::Other)); - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } - for (auto *E : FirstprivateVars) { + for (const Expr *E : FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), ImplicitParamDecl::Other)); - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } - for (auto *E: LastprivateVars) { + for (const Expr *E : LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), ImplicitParamDecl::Other)); - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } - auto &TaskPrivatesMapFnInfo = + const auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *TaskPrivatesMapTy = + llvm::FunctionType *TaskPrivatesMapTy = CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); auto *TaskPrivatesMap = llvm::Function::Create( - TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_privates_map.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, + TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, - TaskPrivatesMapFnInfo, Args); + TaskPrivatesMapFnInfo, Args, Loc, Loc); // *privi = &.privates.privi; LValue Base = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()->castAs<PointerType>()); - auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); + const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); Counter = 0; - for (auto *Field : PrivatesQTyRD->fields()) { - auto FieldLVal = CGF.EmitLValueForField(Base, Field); - auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; - auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); - auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( + for (const FieldDecl *Field : PrivatesQTyRD->fields()) { + LValue FieldLVal = CGF.EmitLValueForField(Base, Field); + const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; + LValue RefLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); + LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); ++Counter; @@ -4171,9 +4557,14 @@ static void emitPrivatesInit(CodeGenFunction &CGF, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef<PrivateDataTy> Privates, bool ForDup) { - auto &C = CGF.getContext(); + ASTContext &C = CGF.getContext(); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); + OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) + ? OMPD_taskloop + : OMPD_task; + const CapturedStmt &CS = *D.getCapturedStmt(Kind); + CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); LValue SrcBase; bool IsTargetTask = isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || @@ -4182,40 +4573,38 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // PointersArray and SizesArray. The original variables for these arrays are // not captured and we get their addresses explicitly. if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || - (IsTargetTask && Data.FirstprivateVars.size() > 3)) { + (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } - OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) - ? OMPD_taskloop - : OMPD_task; - CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); - for (auto &&Pair : Privates) { - auto *VD = Pair.second.PrivateCopy; - auto *Init = VD->getAnyInitializer(); + for (const PrivateDataTy &Pair : Privates) { + const VarDecl *VD = Pair.second.PrivateCopy; + const Expr *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && !CGF.isTrivialInitializer(Init)))) { LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); - if (auto *Elem = Pair.second.PrivateElemInit) { - auto *OriginalVD = Pair.second.Original; + if (const VarDecl *Elem = Pair.second.PrivateElemInit) { + const VarDecl *OriginalVD = Pair.second.Original; // Check if the variable is the target-based BasePointersArray, // PointersArray or SizesArray. LValue SharedRefLValue; QualType Type = OriginalVD->getType(); - if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) && - isa<CapturedDecl>(OriginalVD->getDeclContext()) && - cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() == - 0 && - isa<TranslationUnitDecl>( - cast<CapturedDecl>(OriginalVD->getDeclContext()) - ->getDeclContext())) { + const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); + if (IsTargetTask && !SharedField) { + assert(isa<ImplicitParamDecl>(OriginalVD) && + isa<CapturedDecl>(OriginalVD->getDeclContext()) && + cast<CapturedDecl>(OriginalVD->getDeclContext()) + ->getNumParams() == 0 && + isa<TranslationUnitDecl>( + cast<CapturedDecl>(OriginalVD->getDeclContext()) + ->getDeclContext()) && + "Expected artificial target data variable."); SharedRefLValue = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); } else { - auto *SharedField = CapturesInfo.lookup(OriginalVD); SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), @@ -4226,8 +4615,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array. if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { // Perform simple memcpy. - CGF.EmitAggregateAssign(PrivateLValue.getAddress(), - SharedRefLValue.getAddress(), Type); + CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); } else { // Initialize firstprivate array using element-by-element // initialization. @@ -4258,8 +4646,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF, CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } - } else + } else { CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); + } } ++FI; } @@ -4269,11 +4658,13 @@ static void emitPrivatesInit(CodeGenFunction &CGF, static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef<PrivateDataTy> Privates) { bool InitRequired = false; - for (auto &&Pair : Privates) { - auto *VD = Pair.second.PrivateCopy; - auto *Init = VD->getAnyInitializer(); + for (const PrivateDataTy &Pair : Privates) { + const VarDecl *VD = Pair.second.PrivateCopy; + const Expr *Init = VD->getAnyInitializer(); InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && !CGF.isTrivialInitializer(Init)); + if (InitRequired) + break; } return InitRequired; } @@ -4297,7 +4688,7 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy, @@ -4310,16 +4701,17 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); - auto &TaskDupFnInfo = + const auto &TaskDupFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); - auto *TaskDup = - llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_dup.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); + llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); + auto *TaskDup = llvm::Function::Create( + TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); + TaskDup->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, + Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&DstArg), @@ -4362,9 +4754,9 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { bool NeedsCleanup = false; - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); - for (auto *FD : PrivateRD->fields()) { + auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); + const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); + for (const FieldDecl *FD : PrivateRD->fields()) { NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); if (NeedsCleanup) break; @@ -4377,41 +4769,41 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); llvm::SmallVector<PrivateDataTy, 4> Privates; // Aggregate privates and sort them by the alignment. auto I = Data.PrivateCopies.begin(); - for (auto *E : Data.PrivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Privates.push_back(std::make_pair( + for (const Expr *E : Data.PrivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), - /*PrivateElemInit=*/nullptr))); + /*PrivateElemInit=*/nullptr)); ++I; } I = Data.FirstprivateCopies.begin(); auto IElemInitRef = Data.FirstprivateInits.begin(); - for (auto *E : Data.FirstprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Privates.push_back(std::make_pair( + for (const Expr *E : Data.FirstprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy( VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), - cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); + cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); ++I; ++IElemInitRef; } I = Data.LastprivateCopies.begin(); - for (auto *E : Data.LastprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Privates.push_back(std::make_pair( + for (const Expr *E : Data.LastprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), - /*PrivateElemInit=*/nullptr))); + /*PrivateElemInit=*/nullptr)); ++I; } std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); - auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). @@ -4432,21 +4824,23 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } KmpTaskTQTy = SavedKmpTaskTQTy; } - auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); + const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. - auto *KmpTaskTWithPrivatesQTyRD = + const RecordDecl *KmpTaskTWithPrivatesQTyRD = createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); - auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); + QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); QualType KmpTaskTWithPrivatesPtrQTy = C.getPointerType(KmpTaskTWithPrivatesQTy); - auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); - auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); - auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); + llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); + llvm::Type *KmpTaskTWithPrivatesPtrTy = + KmpTaskTWithPrivatesTy->getPointerTo(); + llvm::Value *KmpTaskTWithPrivatesTySize = + CGF.getTypeSize(KmpTaskTWithPrivatesQTy); QualType SharedsPtrTy = C.getPointerType(SharedsTy); // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; - auto *TaskPrivatesMapTy = + llvm::Type *TaskPrivatesMapTy = std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); @@ -4461,7 +4855,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); - auto *TaskEntry = emitProxyTaskFunction( + llvm::Value *TaskEntry = emitProxyTaskFunction( CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); @@ -4487,23 +4881,24 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; - auto *TaskFlags = + llvm::Value *TaskFlags = Data.Final.getPointer() ? CGF.Builder.CreateSelect(Data.Final.getPointer(), CGF.Builder.getInt32(FinalFlag), CGF.Builder.getInt32(/*C=*/0)) : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); - auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); + llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskEntry, KmpRoutineEntryPtrTy)}; - auto *NewTask = CGF.EmitRuntimeCall( + llvm::Value *NewTask = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); - auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - NewTask, KmpTaskTWithPrivatesPtrTy); + llvm::Value *NewTaskNewTaskTTy = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NewTask, KmpTaskTWithPrivatesPtrTy); LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, KmpTaskTWithPrivatesQTy); LValue TDBase = @@ -4519,7 +4914,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskTShareds)), Loc), CGF.getNaturalTypeAlignment(SharedsTy)); - CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); + LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); + LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); + CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); } // Emit initial values for private copies (if any). TaskResultTy Result; @@ -4539,7 +4936,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, enum { Priority = 0, Destructors = 1 }; // Provide pointer to function with destructors for privates. auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); - auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); + const RecordDecl *KmpCmplrdataUD = + (*FI)->getType()->getAsUnionType()->getDecl(); if (NeedsCleanup) { llvm::Value *DestructorFn = emitDestructorsFunction( CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, @@ -4582,8 +4980,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *TaskEntry = Result.TaskEntry; llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; LValue TDBase = Result.TDBase; - RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; - auto &C = CGM.getContext(); + const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; + ASTContext &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); unsigned NumDependencies = Data.Dependences.size(); @@ -4603,8 +5001,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); KmpDependInfoRD->completeDefinition(); KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); - } else + } else { KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + } CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( @@ -4613,12 +5012,13 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // kmp_depend_info[<Dependences.size()>] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); - for (unsigned i = 0; i < NumDependencies; ++i) { - const Expr *E = Data.Dependences[i].second; - auto Addr = CGF.EmitLValue(E); + for (unsigned I = 0; I < NumDependencies; ++I) { + const Expr *E = Data.Dependences[I].second; + LValue Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); - if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { + if (const auto *ASE = + dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); llvm::Value *UpAddr = @@ -4627,24 +5027,25 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else + } else { Size = CGF.getTypeSize(Ty); - auto Base = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), + } + LValue Base = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), KmpDependInfoTy); // deps[i].base_addr = &<Dependences[i].second>; - auto BaseAddrLVal = CGF.EmitLValueForField( + LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(<Dependences[i].second>); - auto LenLVal = CGF.EmitLValueForField( + LValue LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = <Dependences[i].first>; RTLDependenceKindTy DepKind; - switch (Data.Dependences[i].first) { + switch (Data.Dependences[I].first) { case OMPC_DEPEND_in: DepKind = DepIn; break; @@ -4658,7 +5059,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); } - auto FlagsLVal = CGF.EmitLValueForField( + LValue FlagsLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), FlagsLVal); @@ -4668,14 +5069,14 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.VoidPtrTy); } - // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() // libcall. // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence // list is not empty - auto *ThreadID = getThreadID(CGF, Loc); - auto *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; llvm::Value *DepTaskArgs[7]; if (NumDependencies) { @@ -4692,7 +5093,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { if (!Data.Tied) { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); - auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); + LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); } if (NumDependencies) { @@ -4720,7 +5121,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, NumDependencies, &DepWaitTaskArgs, Loc](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 @@ -4750,9 +5151,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, RCG(CGF); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); - else { + } else { RegionCodeGenTy ThenRCG(ThenCodeGen); ThenRCG(CGF); } @@ -4768,7 +5169,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, return; TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); - // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() // libcall. // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int @@ -4779,27 +5180,28 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (IfCond) { IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, /*isSigned=*/true); - } else + } else { IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); + } LValue LBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); - auto *LBVar = + const auto *LBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); - auto *UBVar = + const auto *UBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); - auto *StVar = + const auto *StVar = cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); @@ -4807,9 +5209,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, LValue RedLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); - if (Data.Reductions) + if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); - else { + } else { CGF.EmitNullInitialization(RedLVal.getAddress(), CGF.getContext().VoidPtrTy); } @@ -4821,7 +5223,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, IfVal, LBLVal.getPointer(), UBLVal.getPointer(), - CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + CGF.EmitLoadOfScalar(StLVal, Loc), llvm::ConstantInt::getNullValue( CGF.IntTy), // Always 0 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( @@ -4838,7 +5240,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } -/// \brief Emit reduction operation for each element of array (required for +/// Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. /// \param LHSVar Variable on the left side of the reduction operation @@ -4860,22 +5262,22 @@ static void EmitOMPAggregateReduction( Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); + const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); + llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); - auto RHSBegin = RHSAddr.getPointer(); - auto LHSBegin = LHSAddr.getPointer(); + llvm::Value *RHSBegin = RHSAddr.getPointer(); + llvm::Value *LHSBegin = LHSAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); + llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); - auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); - auto IsEmpty = + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); + llvm::Value *IsEmpty = CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); @@ -4896,19 +5298,19 @@ static void EmitOMPAggregateReduction( // Emit copy. CodeGenFunction::OMPPrivateScope Scope(CGF); - Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); - Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); + Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); + Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); Scope.Privatize(); RedOpGen(CGF, XExpr, EExpr, UpExpr); Scope.ForceCleanup(); // Shift the address forward by one element. - auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. - auto Done = + llvm::Value *Done = CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); @@ -4923,11 +5325,12 @@ static void EmitOMPAggregateReduction( /// UDR combiner function. static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = + if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (const auto *DRE = dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { + if (const auto *DRD = + dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { std::pair<llvm::Function *, llvm::Function *> Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); RValue Func = RValue::get(Reduction.first); @@ -4939,24 +5342,29 @@ static void emitReductionCombiner(CodeGenFunction &CGF, } llvm::Value *CGOpenMPRuntime::emitReductionFunction( - CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps) { - auto &C = CGM.getContext(); + CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, + ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { + ASTContext &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); - ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - ".omp.reduction.reduction_func", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + const auto &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + std::string Name = getName({"omp", "reduction", "reduction_func"}); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); // Dst = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); @@ -4974,12 +5382,14 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { - auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); - Scope.addPrivate(RHSVar, [&]() -> Address { + const auto *RHSVar = + cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); + Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); }); - auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); - Scope.addPrivate(LHSVar, [&]() -> Address { + const auto *LHSVar = + cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); + Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); }); QualType PrivTy = (*IPriv)->getType(); @@ -4989,8 +5399,9 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); - auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); - auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); + const VariableArrayType *VLA = + CGF.getContext().getAsVariableArrayType(PrivTy); + const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); CodeGenFunction::OpaqueValueMapping OpaqueMap( CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); CGF.EmitVariablyModifiedType(PrivTy); @@ -5000,19 +5411,20 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); EmitOMPAggregateReduction( CGF, (*IPriv)->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitReductionCombiner(CGF, E); }); - } else + } else { // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, E); + } ++IPriv; ++ILHS; ++IRHS; @@ -5029,16 +5441,17 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, const DeclRefExpr *RHS) { if (PrivateRef->getType()->isArrayType()) { // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(LHS->getDecl()); - auto *RHSVar = cast<VarDecl>(RHS->getDecl()); + const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); + const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); EmitOMPAggregateReduction( CGF, PrivateRef->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitReductionCombiner(CGF, ReductionOp); }); - } else + } else { // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, ReductionOp); + } } void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, @@ -5088,14 +5501,14 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); // ... - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); ++IPriv; @@ -5108,7 +5521,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // 1. Build a list of reduction variables. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; auto Size = RHSExprs.size(); - for (auto *E : Privates) { + for (const Expr *E : Privates) { if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; @@ -5136,7 +5549,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Size = CGF.Builder.CreateIntCast( CGF.getVLASize( CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .first, + .NumElts, CGF.SizeTy, /*isSigned=*/false); CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), Elem); @@ -5144,19 +5557,20 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } // 2. Emit reduce_func(). - auto *ReductionFn = emitReductionFunction( - CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, - LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ReductionFn = emitReductionFunction( + CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), + Privates, LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; - auto *Lock = getCriticalRegionLock(".reduction"); + std::string Name = getName({"reduction"}); + llvm::Value *Lock = getCriticalRegionLock(Name); // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), // RedList, reduce_func, &<lock>); - auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); - auto *ThreadId = getThreadID(CGF, Loc); - auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( ReductionList.getPointer(), CGF.VoidPtrTy); llvm::Value *Args[] = { IdentTLoc, // ident_t *<loc> @@ -5167,14 +5581,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ReductionFn, // void (*) (void *, void *) <reduce_func> Lock // kmp_critical_name *&<lock> }; - auto Res = CGF.EmitRuntimeCall( + llvm::Value *Res = CGF.EmitRuntimeCall( createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait : OMPRTL__kmpc_reduce), Args); // 5. Build switch(res) - auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); - auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); + llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + llvm::SwitchInst *SwInst = + CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); // 6. Build case 1: // ... @@ -5182,7 +5597,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // ... // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); // break; - auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); @@ -5192,13 +5607,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ThreadId, // i32 <gtid> Lock // kmp_critical_name *&<lock> }; - auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( - CodeGenFunction &CGF, PrePostActionTy &Action) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); ++IPriv; @@ -5222,44 +5637,44 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); // ... // break; - auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); + llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); - auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { const Expr *XExpr = nullptr; const Expr *EExpr = nullptr; const Expr *UpExpr = nullptr; BinaryOperatorKind BO = BO_Comma; - if (auto *BO = dyn_cast<BinaryOperator>(E)) { + if (const auto *BO = dyn_cast<BinaryOperator>(E)) { if (BO->getOpcode() == BO_Assign) { XExpr = BO->getLHS(); UpExpr = BO->getRHS(); } } // Try to emit update expression as a simple atomic. - auto *RHSExpr = UpExpr; + const Expr *RHSExpr = UpExpr; if (RHSExpr) { // Analyze RHS part of the whole expression. - if (auto *ACO = dyn_cast<AbstractConditionalOperator>( + if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( RHSExpr->IgnoreParenImpCasts())) { // If this is a conditional operator, analyze its condition for // min/max reduction operator. RHSExpr = ACO->getCond(); } - if (auto *BORHS = + if (const auto *BORHS = dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { EExpr = BORHS->getRHS(); BO = BORHS->getOpcode(); } } if (XExpr) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); auto &&AtomicRedGen = [BO, VD, Loc](CodeGenFunction &CGF, const Expr *XExpr, const Expr *EExpr, const Expr *UpExpr) { @@ -5273,7 +5688,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, [&CGF, UpExpr, VD, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue, Loc]() -> Address { + VD, [&CGF, VD, XRValue, Loc]() { Address LHSTemp = CGF.CreateMemTemp(VD->getType()); CGF.emitOMPSimpleStore( CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, @@ -5286,19 +5701,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, }; if ((*IPriv)->getType()->isArrayType()) { // Emit atomic reduction for array section. - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *RHSVar = + cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, AtomicRedGen, XExpr, EExpr, UpExpr); - } else + } else { // Emit atomic reduction for array subscript or single variable. AtomicRedGen(CGF, XExpr, EExpr, UpExpr); + } } else { // Emit as a critical region. auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, - const Expr *, const Expr *) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + const Expr *, const Expr *) { + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); + std::string Name = RT.getName({"atomic_reduction"}); RT.emitCriticalRegion( - CGF, ".atomic_reduction", + CGF, Name, [=](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); emitReductionCombiner(CGF, E); @@ -5306,12 +5724,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Loc); }; if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *LHSVar = + cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVar = + cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, CritRedGen); - } else + } else { CritRedGen(CGF, nullptr, nullptr, nullptr); + } } ++ILHS; ++IRHS; @@ -5331,20 +5752,29 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, EndArgs); AtomicRCG.setAction(Action); AtomicRCG(CGF); - } else + } else { AtomicRCG(CGF); + } CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } /// Generates unique name for artificial threadprivate variables. -/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> -static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, - unsigned N) { +/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" +static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, + const Expr *Ref) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + const clang::DeclRefExpr *DE; + const VarDecl *D = ::getBaseDecl(Ref, DE); + if (!D) + D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); + D = D->getCanonicalDecl(); + std::string Name = CGM.getOpenMPRuntime().getName( + {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); + Out << Prefix << Name << "_" + << D->getCanonicalDecl()->getLocStart().getRawEncoding(); return Out.str(); } @@ -5359,19 +5789,21 @@ static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&Param); - auto &FnInfo = + const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, - ".red_init.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); Address PrivateAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&Param), C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); @@ -5381,10 +5813,9 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, if (RCG.getSizes(N).second) { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); - Size = - CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, - CGM.getContext().getSizeType(), SourceLocation()); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); + Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); LValue SharedLVal; @@ -5395,7 +5826,10 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, Address SharedAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().VoidPtrTy, - generateUniqueName("reduction", Loc, N)); + generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); + SharedAddr = CGF.EmitLoadOfPointer( + SharedAddr, + CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); } else { SharedLVal = CGF.MakeNaturalAlignAddrLValue( @@ -5427,40 +5861,42 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef) { - auto &C = CGM.getContext(); - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); + ASTContext &C = CGM.getContext(); + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); FunctionArgList Args; - ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); - ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&ParamInOut); Args.emplace_back(&ParamIn); - auto &FnInfo = + const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, - ".red_comb.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. if (RCG.getSizes(N).second) { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); - Size = - CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, - CGM.getContext().getSizeType(), SourceLocation()); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); + Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); // Remap lhs and rhs variables to the addresses of the function arguments. // %lhs = bitcast void* %arg0 to <type>* // %rhs = bitcast void* %arg1 to <type>* CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { // Pull out the pointer to the variable. Address PtrAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&ParamInOut), @@ -5468,7 +5904,7 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, return CGF.Builder.CreateElementBitCast( PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); }); - PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { // Pull out the pointer to the variable. Address PtrAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&ParamIn), @@ -5500,19 +5936,21 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, ReductionCodeGen &RCG, unsigned N) { if (!RCG.needCleanups(N)) return nullptr; - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&Param); - auto &FnInfo = + const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, - ".red_fini.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); Address PrivateAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&Param), C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); @@ -5522,10 +5960,9 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, if (RCG.getSizes(N).second) { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); - Size = - CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, - CGM.getContext().getSizeType(), SourceLocation()); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); + Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); // Emit the finalizer body: @@ -5551,7 +5988,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( // kmp_task_red_flags_t flags; // flags for additional info from compiler // } kmp_task_red_input_t; ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); RD->startDefinition(); const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); @@ -5652,14 +6089,14 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, /*isSigned=*/false); Address SizeAddr = getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); } // Store address of the original reduction item if custom initializer is used. if (RCG.usesReductionInitializer(N)) { Address SharedAddr = getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().VoidPtrTy, - generateUniqueName("reduction", Loc, N)); + generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), @@ -5749,18 +6186,18 @@ void CGOpenMPRuntime::emitCancellationPointCall( emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. - auto *Result = CGF.EmitRuntimeCall( + llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); // if (__kmpc_cancellationpoint()) { // exit from construct; // } - auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); - auto *ContBB = CGF.createBasicBlock(".cancel.continue"); - auto *Cmp = CGF.Builder.CreateIsNotNull(Result); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; - auto CancelDest = + CodeGenFunction::JumpDest CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); @@ -5779,70 +6216,42 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. - auto *Result = CGF.EmitRuntimeCall( + llvm::Value *Result = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); // if (__kmpc_cancel()) { // exit from construct; // } - auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); - auto *ContBB = CGF.createBasicBlock(".cancel.continue"); - auto *Cmp = CGF.Builder.CreateIsNotNull(Result); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; - auto CancelDest = + CodeGenFunction::JumpDest CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &, PrePostActionTy &) {}); - else { + } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } } } -/// \brief Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - - auto &SM = C.getSourceManager(); - - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) - - assert(Loc.isValid() && "Source location is expected to be always valid."); - assert(Loc.isFileID() && "Source location is expected to refer to a file."); - - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - llvm_unreachable("Source file with target region no longer exists!"); - - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); -} - void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); - emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } @@ -5872,7 +6281,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; } - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); CodeGenFunction CGF(CGM, true); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); @@ -5898,22 +6307,25 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( if (CGM.getLangOpts().OpenMPIsDevice) { OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); - OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); - } else + OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + OutlinedFn->setDSOLocal(false); + } else { + std::string Name = getName({EntryFnName, "region_id"}); OutlinedFnID = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); + llvm::GlobalValue::WeakAnyLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), Name); + } // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, - /*Flags=*/0); + OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); } /// discard all CompoundStmts intervening between two constructs static const Stmt *ignoreCompoundStmts(const Stmt *Body) { - while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) + while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) Body = CS->body_front(); return Body; @@ -5931,12 +6343,11 @@ static llvm::Value * emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // If the target directive is combined with a teams directive: // Return the value in the num_teams clause, if any. @@ -5944,8 +6355,8 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, if (isOpenMPTeamsDirective(D.getDirectiveKind())) { if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), - /*IgnoreResultAssign*/ true); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), + /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, /*IsSigned=*/true); } @@ -5965,12 +6376,12 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, // the expression is captured in the enclosing target environment when the // teams directive is not combined with target. - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( + if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { + if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); @@ -6000,12 +6411,11 @@ static llvm::Value * emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // // If the target directive is combined with a teams directive: @@ -6030,8 +6440,9 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, if (const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); - auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), - /*IgnoreResultAssign*/ true); + llvm::Value *ThreadLimit = + CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), + /*IgnoreResultAssign*/ true); ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); } @@ -6068,12 +6479,12 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, // the expression is captured in the enclosing target environment when the // teams directive is not combined with target. - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( + if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { + if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); @@ -6092,42 +6503,50 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, } namespace { -// \brief Utility to handle information from clauses associated with a given +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + +// Utility to handle information from clauses associated with a given // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). // It provides a convenient interface to obtain the information and generate // code for that information. class MappableExprsHandler { public: - /// \brief Values for bit flags used to specify the mapping type for + /// Values for bit flags used to specify the mapping type for /// offloading. - enum OpenMPOffloadMappingFlags { - /// \brief Allocate memory on the device and move data from host to device. + enum OpenMPOffloadMappingFlags : uint64_t { + /// No flags + OMP_MAP_NONE = 0x0, + /// Allocate memory on the device and move data from host to device. OMP_MAP_TO = 0x01, - /// \brief Allocate memory on the device and move data from device to host. + /// Allocate memory on the device and move data from device to host. OMP_MAP_FROM = 0x02, - /// \brief Always perform the requested mapping action on the element, even + /// Always perform the requested mapping action on the element, even /// if it was already mapped before. OMP_MAP_ALWAYS = 0x04, - /// \brief Delete the element from the device environment, ignoring the + /// Delete the element from the device environment, ignoring the /// current reference count associated with the element. OMP_MAP_DELETE = 0x08, - /// \brief The element being mapped is a pointer-pointee pair; both the + /// The element being mapped is a pointer-pointee pair; both the /// pointer and the pointee should be mapped. OMP_MAP_PTR_AND_OBJ = 0x10, - /// \brief This flags signals that the base address of an entry should be + /// This flags signals that the base address of an entry should be /// passed to the target kernel as an argument. OMP_MAP_TARGET_PARAM = 0x20, - /// \brief Signal that the runtime library has to return the device pointer + /// Signal that the runtime library has to return the device pointer /// in the current position for the data being mapped. Used when we have the /// use_device_ptr clause. OMP_MAP_RETURN_PARAM = 0x40, - /// \brief This flag signals that the reference being passed is a pointer to + /// This flag signals that the reference being passed is a pointer to /// private data. OMP_MAP_PRIVATE = 0x80, - /// \brief Pass the element to the device by value. + /// Pass the element to the device by value. OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, + /// The 16 MSBs of the flags indicate whether the entry is member of some + /// struct/class. + OMP_MAP_MEMBER_OF = 0xffff000000000000, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), }; /// Class that associates information with a base pointer to be passed to the @@ -6147,21 +6566,60 @@ public: void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } }; - typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; - typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; - typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; + using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; + using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; + using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; + + /// Map between a struct and the its lowest & highest elements which have been + /// mapped. + /// [ValueDecl *] --> {LE(FieldIndex, Pointer), + /// HE(FieldIndex, Pointer)} + struct StructRangeInfoTy { + std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { + 0, Address::invalid()}; + std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { + 0, Address::invalid()}; + Address Base = Address::invalid(); + }; private: - /// \brief Directive from where the map clauses were extracted. + /// Kind that defines how a device pointer has to be returned. + struct MapInfo { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + bool ReturnDevicePointer = false; + bool IsImplicit = false; + + MapInfo() = default; + MapInfo( + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + bool ReturnDevicePointer, bool IsImplicit) + : Components(Components), MapType(MapType), + MapTypeModifier(MapTypeModifier), + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} + }; + + /// If use_device_ptr is used on a pointer which is a struct member and there + /// is no map information about it, then emission of that entry is deferred + /// until the whole struct has been processed. + struct DeferredDevicePtrEntryTy { + const Expr *IE = nullptr; + const ValueDecl *VD = nullptr; + + DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) + : IE(IE), VD(VD) {} + }; + + /// Directive from where the map clauses were extracted. const OMPExecutableDirective &CurDir; - /// \brief Function the directive is being generated for. + /// Function the directive is being generated for. CodeGenFunction &CGF; - /// \brief Set of all first private variables in the current directive. + /// Set of all first private variables in the current directive. llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; - /// Set of all reduction variables in the current directive. - llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; /// Map between device pointer declarations and their expression components. /// The key value for declarations in 'this' is null. @@ -6171,10 +6629,10 @@ private: DevPointersMap; llvm::Value *getExprTypeSize(const Expr *E) const { - auto ExprTy = E->getType().getCanonicalType(); + QualType ExprTy = E->getType().getCanonicalType(); // Reference types are ignored for mapping purposes. - if (auto *RefTy = ExprTy->getAs<ReferenceType>()) + if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) ExprTy = RefTy->getPointeeType().getCanonicalType(); // Given that an array section is considered a built-in type, we need to @@ -6191,10 +6649,10 @@ private: return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; - if (auto *PTy = BaseTy->getAs<PointerType>()) + if (const auto *PTy = BaseTy->getAs<PointerType>()) { ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); - else { - auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); + } else { + const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); assert(ATy && "Expecting array type if not a pointer type."); ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); } @@ -6204,7 +6662,7 @@ private: if (!OAE->getLength()) return ElemSize; - auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); + llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); LengthVal = CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); @@ -6212,14 +6670,16 @@ private: return CGF.getTypeSize(ExprTy); } - /// \brief Return the corresponding bits for a given map clause modifier. Add + /// Return the corresponding bits for a given map clause modifier. Add /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, - OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, - bool AddIsTargetParamFlag) const { - uint64_t Bits = 0u; + OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType, + OpenMPMapClauseKind MapTypeModifier, + bool IsImplicit, bool AddPtrFlag, + bool AddIsTargetParamFlag) const { + OpenMPOffloadMappingFlags Bits = + IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: @@ -6229,20 +6689,20 @@ private: // type modifiers. break; case OMPC_MAP_to: - Bits = OMP_MAP_TO; + Bits |= OMP_MAP_TO; break; case OMPC_MAP_from: - Bits = OMP_MAP_FROM; + Bits |= OMP_MAP_FROM; break; case OMPC_MAP_tofrom: - Bits = OMP_MAP_TO | OMP_MAP_FROM; + Bits |= OMP_MAP_TO | OMP_MAP_FROM; break; case OMPC_MAP_delete: - Bits = OMP_MAP_DELETE; + Bits |= OMP_MAP_DELETE; break; - default: + case OMPC_MAP_always: + case OMPC_MAP_unknown: llvm_unreachable("Unexpected map type!"); - break; } if (AddPtrFlag) Bits |= OMP_MAP_PTR_AND_OBJ; @@ -6253,10 +6713,10 @@ private: return Bits; } - /// \brief Return true if the provided expression is a final array section. A + /// Return true if the provided expression is a final array section. A /// final array section, is one whose length can't be proved to be one. bool isFinalArraySectionExpression(const Expr *E) const { - auto *OASE = dyn_cast<OMPArraySectionExpr>(E); + const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); // It is not an array section and therefore not a unity-size one. if (!OASE) @@ -6266,16 +6726,16 @@ private: if (OASE->getColonLoc().isInvalid()) return false; - auto *Length = OASE->getLength(); + const Expr *Length = OASE->getLength(); // If we don't have a length we have to check if the array has size 1 // for this dimension. Also, we should always expect a length if the // base type is pointer. if (!Length) { - auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( - OASE->getBase()->IgnoreParenImpCasts()) - .getCanonicalType(); - if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) + QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( + OASE->getBase()->IgnoreParenImpCasts()) + .getCanonicalType(); + if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) return ATy->getSize().getSExtValue() != 1; // If we don't have a constant dimension length, we have to consider // the current section as having any size, so it is not necessarily @@ -6291,7 +6751,7 @@ private: return ConstLength.getSExtValue() != 1; } - /// \brief Generate the base pointers, section pointers, sizes and map type + /// Generate the base pointers, section pointers, sizes and map type /// bits for the provided map type, map modifier, and expression components. /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. @@ -6300,10 +6760,10 @@ private: OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, - bool IsFirstComponentList, bool IsImplicit) const { - + StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, + bool IsImplicit) const { // The following summarizes what has to be generated for each map and the - // types bellow. The generated information is expressed in this order: + // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags // (to add to the ones that come from the map type and modifier). // @@ -6326,96 +6786,141 @@ private: // S2 *ps; // // map(d) - // &d, &d, sizeof(double), noflags + // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM // // map(i) - // &i, &i, 100*sizeof(int), noflags + // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM // // map(i[1:23]) - // &i(=&i[0]), &i[1], 23*sizeof(int), noflags + // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM // // map(p) - // &p, &p, sizeof(float*), noflags + // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM // // map(p[1:24]) - // p, &p[1], 24*sizeof(float), noflags + // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // // map(s) - // &s, &s, sizeof(S2), noflags + // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM // // map(s.i) - // &s, &(s.i), sizeof(int), noflags + // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM // // map(s.s.f) - // &s, &(s.i.f), 50*sizeof(int), noflags + // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM // // map(s.p) - // &s, &(s.p), sizeof(double*), noflags + // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM // - // map(s.p[:22], s.a s.b) - // &s, &(s.p), sizeof(double*), noflags - // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + // map(to: s.p[:22]) + // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) + // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) + // &(s.p), &(s.p[0]), 22*sizeof(double), + // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) + // (*) alloc space for struct members, only this is a target parameter + // (**) map the pointer (nothing to be mapped in this example) (the compiler + // optimizes this entry out, same in the examples below) + // (***) map the pointee (map: to) // // map(s.ps) - // &s, &(s.ps), sizeof(S2*), noflags + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM // - // map(s.ps->s.i) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + // map(from: s.ps->s.i) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM // - // map(s.ps->ps) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + // map(to: s.ps->ps) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO // // map(s.ps->ps->ps) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag - // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM // - // map(s.ps->ps->s.f[:22]) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag - // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + // map(to: s.ps->ps->s.f[:22]) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO // // map(ps) - // &ps, &ps, sizeof(S2*), noflags + // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM // // map(ps->i) - // ps, &(ps->i), sizeof(int), noflags + // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM // // map(ps->s.f) - // ps, &(ps->s.f[0]), 50*sizeof(float), noflags + // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM // - // map(ps->p) - // ps, &(ps->p), sizeof(double*), noflags + // map(from: ps->p) + // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM // - // map(ps->p[:22]) - // ps, &(ps->p), sizeof(double*), noflags - // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + // map(to: ps->p[:22]) + // ps, &(ps->p), sizeof(double*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) + // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO // // map(ps->ps) - // ps, &(ps->ps), sizeof(S2*), noflags + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM // - // map(ps->ps->s.i) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + // map(from: ps->ps->s.i) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM // - // map(ps->ps->ps) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + // map(from: ps->ps->ps) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM // // map(ps->ps->ps->ps) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag - // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM // - // map(ps->ps->ps->s.f[:22]) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag - // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + // map(to: ps->ps->ps->s.f[:22]) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO + // + // map(to: s.f[:22]) map(from: s.p[:33]) + // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + + // sizeof(double*) (**), TARGET_PARAM + // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO + // &s, &(s.p), sizeof(double*), MEMBER_OF(1) + // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM + // (*) allocate contiguous space needed to fit all mapped members even if + // we allocate space for members not mapped (in this example, + // s.f[22..49] and s.s are not mapped, yet we must allocate space for + // them as well because they fall between &s.f[0] and &s.p) + // + // map(from: s.f[:22]) map(to: ps->p[:33]) + // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM + // ps, &(ps->p), sizeof(S2*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) + // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO + // (*) the struct this entry pertains to is the 2nd element in the list of + // arguments, hence MEMBER_OF(2) + // + // map(from: s.f[:22], s.s) map(to: ps->p[:33]) + // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM + // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM + // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM + // ps, &(ps->p), sizeof(S2*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) + // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO + // (*) the struct this entry pertains to is the 4th element in the list + // of arguments, hence MEMBER_OF(4) // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; + bool IsLink = false; // Is this variable a "declare target link"? // Scan the components from the base to the complete expression. auto CI = Components.rbegin(); @@ -6425,16 +6930,25 @@ private: // Track if the map information being generated is the first for a list of // components. bool IsExpressionFirstInfo = true; - llvm::Value *BP = nullptr; + Address BP = Address::invalid(); - if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { + if (isa<MemberExpr>(I->getAssociatedExpression())) { // The base is the 'this' pointer. The content of the pointer is going // to be the base of the field being mapped. - BP = CGF.EmitScalarExpr(ME->getBase()); + BP = CGF.LoadCXXThisAddress(); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); + BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + if (const auto *VD = + dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { + if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD)) + if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { + IsLink = true; + BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + } + } // If the variable is a pointer and is being dereferenced (i.e. is not // the last component), the base has to be the pointer itself, not its @@ -6442,10 +6956,7 @@ private: QualType Ty = I->getAssociatedDeclaration()->getType().getNonReferenceType(); if (Ty->isAnyPointerType() && std::next(I) != CE) { - auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); - BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), - Ty->castAs<PointerType>()) - .getPointer(); + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); // We do not need to generate individual map information for the // pointer, it can be associated with the combined storage. @@ -6453,8 +6964,41 @@ private: } } - uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; + // Track whether a component of the list should be marked as MEMBER_OF some + // combined entry (for partial structs). Only the first PTR_AND_OBJ entry + // in a component list should be marked as MEMBER_OF, all subsequent entries + // do not belong to the base struct. E.g. + // struct S2 s; + // s.ps->ps->ps->f[:] + // (1) (2) (3) (4) + // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a + // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) + // is the pointee of ps(2) which is not member of struct s, so it should not + // be marked as such (it is still PTR_AND_OBJ). + // The variable is initialized to false so that PTR_AND_OBJ entries which + // are not struct members are not considered (e.g. array of pointers to + // data). + bool ShouldBeMemberOf = false; + + // Variable keeping track of whether or not we have encountered a component + // in the component list which is a member expression. Useful when we have a + // pointer or a final array section, in which case it is the previous + // component in the list which tells us whether we have a member expression. + // E.g. X.f[:] + // While processing the final array section "[:]" it is "f" which tells us + // whether we are dealing with a member of a declared struct. + const MemberExpr *EncounteredME = nullptr; + for (; I != CE; ++I) { + // If the current component is member of a struct (parent struct) mark it. + if (!EncounteredME) { + EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); + // If we encounter a PTR_AND_OBJ entry from now on it should be marked + // as MEMBER_OF the parent struct. + if (EncounteredME) + ShouldBeMemberOf = true; + } + auto Next = std::next(I); // We need to generate the addresses and sizes if this is the last @@ -6472,14 +7016,12 @@ private: const auto *OASE = dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); bool IsPointer = - (OASE && - OMPArraySectionExpr::getBaseOriginalType(OASE) - .getCanonicalType() - ->isAnyPointerType()) || + (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) + .getCanonicalType() + ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); if (Next == CE || IsPointer || IsFinalArraySection) { - // If this is not the last component, we expect the pointer to be // associated with an array expression or member expression. assert((Next == CE || @@ -6488,44 +7030,68 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - llvm::Value *LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); - auto *Size = getExprTypeSize(I->getAssociatedExpression()); - - // If we have a member expression and the current component is a - // reference, we have to map the reference too. Whenever we have a - // reference, the section that reference refers to is going to be a - // load instruction from the storage assigned to the reference. - if (isa<MemberExpr>(I->getAssociatedExpression()) && - I->getAssociatedDeclaration()->getType()->isReferenceType()) { - auto *LI = cast<llvm::LoadInst>(LB); - auto *RefAddr = LI->getPointerOperand(); - - BasePointers.push_back(BP); - Pointers.push_back(RefAddr); - Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(DefaultFlags | - getMapTypeBits( - /*MapType*/ OMPC_MAP_alloc, - /*MapTypeModifier=*/OMPC_MAP_unknown, - !IsExpressionFirstInfo, IsCaptureFirstInfo)); - IsExpressionFirstInfo = false; - IsCaptureFirstInfo = false; - // The reference will be the next base address. - BP = RefAddr; - } + Address LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); + + // If this component is a pointer inside the base struct then we don't + // need to create any entry for it - it will be combined with the object + // it is pointing to into a single PTR_AND_OBJ entry. + bool IsMemberPointer = + IsPointer && EncounteredME && + (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == + EncounteredME); + if (!IsMemberPointer) { + BasePointers.push_back(BP.getPointer()); + Pointers.push_back(LB.getPointer()); + Sizes.push_back(Size); + + // We need to add a pointer flag for each map that comes from the + // same expression except for the first one. We also need to signal + // this map is the first one that relates with the current capture + // (there is a set of entries for each capture). + OpenMPOffloadMappingFlags Flags = getMapTypeBits( + MapType, MapTypeModifier, IsImplicit, + !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); + + if (!IsExpressionFirstInfo) { + // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, + // then we reset the TO/FROM/ALWAYS/DELETE flags. + if (IsPointer) + Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | + OMP_MAP_DELETE); + + if (ShouldBeMemberOf) { + // Set placeholder value MEMBER_OF=FFFF to indicate that the flag + // should be later updated with the correct value of MEMBER_OF. + Flags |= OMP_MAP_MEMBER_OF; + // From now on, all subsequent PTR_AND_OBJ entries should not be + // marked as MEMBER_OF. + ShouldBeMemberOf = false; + } + } - BasePointers.push_back(BP); - Pointers.push_back(LB); - Sizes.push_back(Size); + Types.push_back(Flags); + } - // We need to add a pointer flag for each map that comes from the - // same expression except for the first one. We also need to signal - // this map is the first one that relates with the current capture - // (there is a set of entries for each capture). - Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, - !IsExpressionFirstInfo, - IsCaptureFirstInfo)); + // If we have encountered a member expression so far, keep track of the + // mapped member. If the parent is "*this", then the value declaration + // is nullptr. + if (EncounteredME) { + const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); + unsigned FieldIndex = FD->getFieldIndex(); + + // Update info about the lowest and highest elements for this struct + if (!PartialStruct.Base.isValid()) { + PartialStruct.LowestElem = {FieldIndex, LB}; + PartialStruct.HighestElem = {FieldIndex, LB}; + PartialStruct.Base = BP; + } else if (FieldIndex < PartialStruct.LowestElem.first) { + PartialStruct.LowestElem = {FieldIndex, LB}; + } else if (FieldIndex > PartialStruct.HighestElem.first) { + PartialStruct.HighestElem = {FieldIndex, LB}; + } + } // If we have a final array section, we are done with this expression. if (IsFinalArraySection) @@ -6541,11 +7107,11 @@ private: } } - /// \brief Return the adjusted map modifiers if the declaration a capture - /// refers to appears in a first-private clause. This is expected to be used - /// only with directives that start with 'target'. - unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, - unsigned CurrentModifiers) { + /// Return the adjusted map modifiers if the declaration a capture refers to + /// appears in a first-private clause. This is expected to be used only with + /// directives that start with 'target'. + MappableExprsHandler::OpenMPOffloadMappingFlags + getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { assert(Cap.capturesVariable() && "Expected capture by reference only!"); // A first private variable captured by reference will use only the @@ -6554,15 +7120,29 @@ private: if (FirstPrivateDecls.count(Cap.getCapturedVar())) return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; - // Reduction variable will use only the 'private ptr' and 'map to_from' - // flag. - if (ReductionDecls.count(Cap.getCapturedVar())) { - return MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM; - } + return MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM; + } + + static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { + // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. + return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) + << 48); + } + + static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, + OpenMPOffloadMappingFlags MemberOfFlag) { + // If the entry is PTR_AND_OBJ but has not been marked with the special + // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be + // marked as MEMBER_OF. + if ((Flags & OMP_MAP_PTR_AND_OBJ) && + ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) + return; - // We didn't modify anything. - return CurrentModifiers; + // Reset the placeholder value to prepare the flag for the assignment of the + // proper MEMBER_OF value. + Flags &= ~OMP_MAP_MEMBER_OF; + Flags |= MemberOfFlag; } public: @@ -6573,58 +7153,54 @@ public: for (const auto *D : C->varlists()) FirstPrivateDecls.insert( cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); - for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { - for (const auto *D : C->varlists()) { - ReductionDecls.insert( - cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); - } - } // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) DevPointersMap[L.first].push_back(L.second); } - /// \brief Generate all the base pointers, section pointers, sizes and map + /// Generate code for the combined entry if we have a partially mapped struct + /// and take care of the mapping flags of the arguments corresponding to + /// individual struct members. + void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, + const StructRangeInfoTy &PartialStruct) const { + // Base is the base of the struct + BasePointers.push_back(PartialStruct.Base.getPointer()); + // Pointer is the address of the lowest element + llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); + Pointers.push_back(LB); + // Size is (addr of {highest+1} element) - (addr of lowest element) + llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); + llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); + llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); + llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); + llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); + llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, + /*isSinged=*/false); + Sizes.push_back(Size); + // Map type is always TARGET_PARAM + Types.push_back(OMP_MAP_TARGET_PARAM); + // Remove TARGET_PARAM flag from the first element + (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; + + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = + getMemberOfFlag(BasePointers.size() - 1); + for (auto &M : CurTypes) + setCorrectMemberOfFlag(M, MemberOfFlag); + } + + /// Generate all the base pointers, section pointers, sizes and map /// types for the extracted mappable expressions. Also, for each item that /// relates with a device pointer, a pair of the relevant declaration and /// index where it occurs is appended to the device pointers info array. void generateAllInfo(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { - BasePointers.clear(); - Pointers.clear(); - Sizes.clear(); - Types.clear(); - - struct MapInfo { - /// Kind that defines how a device pointer has to be returned. - enum ReturnPointerKind { - // Don't have to return any pointer. - RPK_None, - // Pointer is the base of the declaration. - RPK_Base, - // Pointer is a member of the base declaration - 'this' - RPK_Member, - // Pointer is a reference and a member of the base declaration - 'this' - RPK_MemberReference, - }; - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType = OMPC_MAP_unknown; - OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; - ReturnPointerKind ReturnDevicePointer = RPK_None; - bool IsImplicit = false; - - MapInfo() = default; - MapInfo( - OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, - ReturnPointerKind ReturnDevicePointer, bool IsImplicit) - : Components(Components), MapType(MapType), - MapTypeModifier(MapTypeModifier), - ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} - }; - // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. @@ -6636,7 +7212,7 @@ public: const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, - MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { + bool ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, @@ -6644,33 +7220,39 @@ public: }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), - MapInfo::RPK_None, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) + for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, - MapInfo::RPK_None, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) + for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, - MapInfo::RPK_None, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit()); } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the // use_device_ptr list, we create one with map type 'alloc' and zero size - // section. It is the user fault if that was not mapped before. + // section. It is the user fault if that was not mapped before. If there is + // no map information and the pointer is a struct member, then we defer the + // emission of that entry until the whole struct has been processed. + llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> + DeferredInfo; + // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : + this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { + for (const auto &L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); - auto *IE = L.second.back().getAssociatedExpression(); + const Expr *IE = L.second.back().getAssociatedExpression(); // If the first component is a member expression, we have to look into // 'this', which maps to null in the map of map information. Otherwise // look directly for the information. @@ -6686,113 +7268,135 @@ public: // If we found a map entry, signal that the pointer has to be returned // and move on to the next declaration. if (CI != It->second.end()) { - CI->ReturnDevicePointer = isa<MemberExpr>(IE) - ? (VD->getType()->isReferenceType() - ? MapInfo::RPK_MemberReference - : MapInfo::RPK_Member) - : MapInfo::RPK_Base; + CI->ReturnDevicePointer = true; continue; } } // We didn't find any match in our map information - generate a zero - // size array section. + // size array section - if the pointer is a struct member we defer this + // action until the whole struct has been processed. // FIXME: MSVC 2013 seems to require this-> to find member CGF. - llvm::Value *Ptr = - this->CGF - .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) - .getScalarVal(); - BasePointers.push_back({Ptr, VD}); - Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); - Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); + if (isa<MemberExpr>(IE)) { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been processed. + // Nonetheless, generateInfoForComponentList must be called to take + // the pointer into account for the calculation of the range of the + // partial struct. + InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown, + /*ReturnDevicePointer=*/false, C->isImplicit()); + DeferredInfo[nullptr].emplace_back(IE, VD); + } else { + llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( + this->CGF.EmitLValue(IE), IE->getExprLoc()); + BasePointers.emplace_back(Ptr, VD); + Pointers.push_back(Ptr); + Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); + Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); + } } + } - for (auto &M : Info) { + for (const auto &M : Info) { // We need to know when we generate information for the first component // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; - for (MapInfo &L : M.second) { + + // Temporary versions of arrays + MapBaseValuesArrayTy CurBasePointers; + MapValuesArrayTy CurPointers; + MapValuesArrayTy CurSizes; + MapFlagsArrayTy CurTypes; + StructRangeInfoTy PartialStruct; + + for (const MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); // Remember the current base pointer index. - unsigned CurrentBasePointersIdx = BasePointers.size(); + unsigned CurrentBasePointersIdx = CurBasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. this->generateInfoForComponentList( - L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, - Sizes, Types, IsFirstComponentList, L.IsImplicit); + L.MapType, L.MapTypeModifier, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. - if (IsFirstComponentList && - L.ReturnDevicePointer != MapInfo::RPK_None) { - // If the pointer is not the base of the map, we need to skip the - // base. If it is a reference in a member field, we also need to skip - // the map of the reference. - if (L.ReturnDevicePointer != MapInfo::RPK_Base) { - ++CurrentBasePointersIdx; - if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) - ++CurrentBasePointersIdx; - } - assert(BasePointers.size() > CurrentBasePointersIdx && + if (L.ReturnDevicePointer) { + assert(CurBasePointers.size() > CurrentBasePointersIdx && "Unexpected number of mapped base pointers."); - auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); + const ValueDecl *RelevantVD = + L.Components.back().getAssociatedDeclaration(); assert(RelevantVD && "No relevant declaration related with device pointer??"); - BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); - Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; + CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); + CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; } IsFirstComponentList = false; } + + // Append any pending zero-length pointers which are struct members and + // used with use_device_ptr. + auto CI = DeferredInfo.find(M.first); + if (CI != DeferredInfo.end()) { + for (const DeferredDevicePtrEntryTy &L : CI->second) { + llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); + llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( + this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); + CurBasePointers.emplace_back(BasePtr, L.VD); + CurPointers.push_back(Ptr); + CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); + // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder + // value MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | + OMP_MAP_MEMBER_OF); + } + } + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, + PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + Types.append(CurTypes.begin(), CurTypes.end()); } } - /// \brief Generate the base pointers, section pointers, sizes and map types + /// Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + StructRangeInfoTy &PartialStruct) const { assert(!Cap->capturesVariableArrayType() && "Not expecting to generate map info for a variable array type!"); - BasePointers.clear(); - Pointers.clear(); - Sizes.clear(); - Types.clear(); - // We need to know when we generating information for the first component // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; - const ValueDecl *VD = - Cap->capturesThis() - ? nullptr - : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); + const ValueDecl *VD = Cap->capturesThis() + ? nullptr + : Cap->getCapturedVar()->getCanonicalDecl(); // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just - // pass its value, otherwise, if it is a member expression, we need to map - // 'to' the field. - if (!VD) { - auto It = DevPointersMap.find(VD); - if (It != DevPointersMap.end()) { - for (auto L : It->second) { - generateInfoForComponentList( - /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, - BasePointers, Pointers, Sizes, Types, IsFirstComponentList, - /*IsImplicit=*/false); - IsFirstComponentList = false; - } - return; - } - } else if (DevPointersMap.count(VD)) { - BasePointers.push_back({Arg, VD}); + // pass its value. + if (DevPointersMap.count(VD)) { + BasePointers.emplace_back(Arg, VD); Pointers.push_back(Arg); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); @@ -6800,35 +7404,63 @@ public: } // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->decl_component_lists(VD)) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + for (const auto &L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList( - C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, - Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); + generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), + L.second, BasePointers, Pointers, Sizes, + Types, PartialStruct, IsFirstComponentList, + C->isImplicit()); IsFirstComponentList = false; } + } - return; + /// Generate the base pointers, section pointers, sizes and map types + /// associated with the declare target link variables. + void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + // Map other list items in the map clause which are not captured variables + // but "declare target link" global variables., + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { + for (const auto &L : C->component_lists()) { + if (!L.first) + continue; + const auto *VD = dyn_cast<VarDecl>(L.first); + if (!VD) + continue; + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD); + if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) + continue; + StructRangeInfoTy PartialStruct; + generateInfoForComponentList( + C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + Pointers, Sizes, Types, PartialStruct, + /*IsFirstComponentList=*/true, C->isImplicit()); + assert(!PartialStruct.Base.isValid() && + "No partial structs for declare target link expected."); + } + } } - /// \brief Generate the default map information for a given capture \a CI, + /// Generate the default map information for a given capture \a CI, /// record field declaration \a RI and captured value \a CV. void generateDefaultMapInfo(const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, MapBaseValuesArrayTy &CurBasePointers, MapValuesArrayTy &CurPointers, MapValuesArrayTy &CurSizes, - MapFlagsArrayTy &CurMapTypes) { - + MapFlagsArrayTy &CurMapTypes) const { // Do the default mapping. if (CI.capturesThis()) { CurBasePointers.push_back(CV); CurPointers.push_back(CV); - const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); + const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); // Default map type. CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); @@ -6843,7 +7475,7 @@ public: } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). - CurMapTypes.push_back(0u); + CurMapTypes.push_back(OMP_MAP_NONE); CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); } } else { @@ -6851,30 +7483,30 @@ public: CurBasePointers.push_back(CV); CurPointers.push_back(CV); - const ReferenceType *PtrTy = - cast<ReferenceType>(RI.getType().getTypePtr()); + const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); CurSizes.push_back(CGF.getTypeSize(ElementType)); // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. - CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( - CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) - : OMP_MAP_TO)); + CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); } // Every default map produces a single argument which is a target parameter. CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; + + // Add flag stating this is an implicit map. + CurMapTypes.back() |= OMP_MAP_IMPLICIT; } }; enum OpenMPOffloadingReservedDeviceIDs { - /// \brief Device ID if the device was not defined, runtime should get it + /// Device ID if the device was not defined, runtime should get it /// from environment variables in the spec. OMP_DEVICEID_UNDEF = -1, }; } // anonymous namespace -/// \brief Emit the arrays used to pass the captures and map information to the +/// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. static void @@ -6884,8 +7516,8 @@ emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info) { - auto &CGM = CGF.CGM; - auto &Ctx = CGF.getContext(); + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGF.getContext(); // Reset the array information. Info.clearArrayInfo(); @@ -6895,7 +7527,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, // Detect if we have any capture size requiring runtime evaluation of the // size so that a constant array could be eventually used. bool hasRuntimeEvaluationCaptureSize = false; - for (auto *S : Sizes) + for (llvm::Value *S : Sizes) if (!isa<llvm::Constant>(S)) { hasRuntimeEvaluationCaptureSize = true; break; @@ -6924,48 +7556,53 @@ emitOffloadingArrays(CodeGenFunction &CGF, // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector<llvm::Constant *, 16> ConstSizes; - for (auto S : Sizes) + for (llvm::Value *S : Sizes) ConstSizes.push_back(cast<llvm::Constant>(S)); auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); + std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); auto *SizesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - SizesArrayInit, ".offload_sizes"); + SizesArrayInit, Name); SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.SizesArray = SizesArrayGbl; } // The map types are always constant so we don't need to generate code to // fill arrays. Instead, we create an array constant. + SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); + llvm::copy(MapTypes, Mapping.begin()); llvm::Constant *MapTypesArrayInit = - llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); + llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); + std::string MaptypesName = + CGM.getOpenMPRuntime().getName({"offload_maptypes"}); auto *MapTypesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), MapTypesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapTypesArrayInit, ".offload_maptypes"); + MapTypesArrayInit, MaptypesName); MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.MapTypesArray = MapTypesArrayGbl; - for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { - llvm::Value *BPVal = *BasePointers[i]; + for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { + llvm::Value *BPVal = *BasePointers[I]; llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.BasePointersArray, 0, i); + Info.BasePointersArray, 0, I); BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); if (Info.requiresDevicePointerInfo()) - if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) - Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); + if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) + Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); - llvm::Value *PVal = Pointers[i]; + llvm::Value *PVal = Pointers[I]; llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, 0, i); + Info.PointersArray, 0, I); P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); @@ -6976,22 +7613,22 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, - /*Idx1=*/i); + /*Idx1=*/I); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); CGF.Builder.CreateStore( - CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), + CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), SAddr); } } } } -/// \brief Emit the arguments to be passed to the runtime library based on the +/// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { - auto &CGM = CGF.CGM; + CodeGenModule &CGM = CGF.CGM; if (Info.NumberOfPtrs) { BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), @@ -7023,86 +7660,27 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - ArrayRef<llvm::Value *> CapturedVars) { + const Expr *IfCond, const Expr *Device) { if (!CGF.HaveInsertPoint()) return; assert(OutlinedFn && "Invalid outlined function!"); - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapValuesArrayTy KernelArgs; - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; - - MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; - MappableExprsHandler::MapValuesArrayTy CurPointers; - MappableExprsHandler::MapValuesArrayTy CurSizes; - MappableExprsHandler::MapFlagsArrayTy CurMapTypes; - - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - CurBasePointers.clear(); - CurPointers.clear(); - CurSizes.clear(); - CurMapTypes.clear(); - - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurBasePointers.push_back(*CV); - CurPointers.push_back(*CV); - CurSizes.push_back(CGF.getTypeSize(RI->getType())); - // Copy to the device as an argument. No need to retrieve it. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, - CurSizes, CurMapTypes); - if (CurBasePointers.empty()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, - CurPointers, CurSizes, CurMapTypes); - } - // We expect to have at least an element of information for this capture. - assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); - assert(CurBasePointers.size() == CurPointers.size() && - CurBasePointers.size() == CurSizes.size() && - CurBasePointers.size() == CurMapTypes.size() && - "Inconsistent map information sizes!"); - - // The kernel args are always the first elements of the base pointers - // associated with a capture. - KernelArgs.push_back(*CurBasePointers.front()); - // We need to append the results of this capture to what we already have. - BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); - Pointers.append(CurPointers.begin(), CurPointers.end()); - Sizes.append(CurSizes.begin(), CurSizes.end()); - MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); - } + const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + }; + emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFn, OutlinedFnID, &D, - &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); - // Emit the offloading arrays. - TargetDataInfo Info; - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); - + auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, + &MapTypesArray, &CS, RequiresOuterTask, + &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -7125,13 +7703,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, } // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + llvm::Value *PointerNum = + CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); - auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); + llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); + llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime @@ -7169,25 +7748,30 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // passed to the runtime library - a 32-bit integer with the value zero. assert(NumThreads && "Thread limit expression should be available along " "with number of teams."); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, - PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, NumTeams, - NumThreads}; + llvm::Value *OffloadingArgs[] = {DeviceID, + OutlinedFnID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray, + NumTeams, + NumThreads}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait - : OMPRTL__tgt_target_teams), + createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait + : OMPRTL__tgt_target_teams), OffloadingArgs); } else { - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, - PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray}; + llvm::Value *OffloadingArgs[] = {DeviceID, + OutlinedFnID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait - : OMPRTL__tgt_target), + createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait + : OMPRTL__tgt_target), OffloadingArgs); } @@ -7200,17 +7784,120 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, - PrePostActionTy &) { - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, - KernelArgs); + auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, + RequiresOuterTask](CodeGenFunction &CGF, + PrePostActionTy &) { + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + }; + + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, + &CapturedVars, RequiresOuterTask, + &CS](CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; + MappableExprsHandler::MapValuesArrayTy CurPointers; + MappableExprsHandler::MapValuesArrayTy CurSizes; + MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + MappableExprsHandler::StructRangeInfoTy PartialStruct; + + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurBasePointers.push_back(*CV); + CurPointers.push_back(*CV); + CurSizes.push_back(CGF.getTypeSize(RI->getType())); + // Copy to the device as an argument. No need to retrieve it. + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | + MappableExprsHandler::OMP_MAP_TARGET_PARAM); + } else { + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, + CurSizes, CurMapTypes, PartialStruct); + if (CurBasePointers.empty()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, + CurPointers, CurSizes, CurMapTypes); + } + // We expect to have at least an element of information for this capture. + assert(!CurBasePointers.empty() && + "Non-existing map pointer for capture!"); + assert(CurBasePointers.size() == CurPointers.size() && + CurBasePointers.size() == CurSizes.size() && + CurBasePointers.size() == CurMapTypes.size() && + "Inconsistent map information sizes!"); + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, + CurMapTypes, PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); + } + // Map other list items in the map clause which are not captured variables + // but "declare target link" global variables. + MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, + MapTypes); + + TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = + Address(Info.BasePointersArray, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); + MapTypesArray = Info.MapTypesArray; + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); + }; + + auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( + CodeGenFunction &CGF, PrePostActionTy &) { + if (RequiresOuterTask) { + CodeGenFunction::OMPTargetDataInfo InputInfo; + CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); + } else { + emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); + } }; // If we have a target function ID it means that we need to support @@ -7218,14 +7905,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { - if (IfCond) - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - else { - RegionCodeGenTy ThenRCG(ThenGen); + if (IfCond) { + emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); + } else { + RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } } else { - RegionCodeGenTy ElseRCG(ElseGen); + RegionCodeGenTy ElseRCG(TargetElseGen); ElseRCG(CGF); } } @@ -7236,13 +7923,13 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, return; // Codegen OMP target directives that offload compute to the device. - bool requiresDeviceCodegen = + bool RequiresDeviceCodegen = isa<OMPExecutableDirective>(S) && isOpenMPTargetExecutionDirective( cast<OMPExecutableDirective>(S)->getDirectiveKind()); - if (requiresDeviceCodegen) { - auto &E = *cast<OMPExecutableDirective>(S); + if (RequiresDeviceCodegen) { + const auto &E = *cast<OMPExecutableDirective>(S); unsigned DeviceID; unsigned FileID; unsigned Line; @@ -7255,66 +7942,118 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, ParentName, Line)) return; - switch (S->getStmtClass()) { - case Stmt::OMPTargetDirectiveClass: - CodeGenFunction::EmitOMPTargetDeviceFunction( - CGM, ParentName, cast<OMPTargetDirective>(*S)); + switch (E.getDirectiveKind()) { + case OMPD_target: + CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, + cast<OMPTargetDirective>(E)); break; - case Stmt::OMPTargetParallelDirectiveClass: + case OMPD_target_parallel: CodeGenFunction::EmitOMPTargetParallelDeviceFunction( - CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); + CGM, ParentName, cast<OMPTargetParallelDirective>(E)); break; - case Stmt::OMPTargetTeamsDirectiveClass: + case OMPD_target_teams: CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( - CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); + CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); break; - case Stmt::OMPTargetTeamsDistributeDirectiveClass: + case OMPD_target_teams_distribute: CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( - CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); + CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); break; - case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: + case OMPD_target_teams_distribute_simd: CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( - CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); + CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); break; - case Stmt::OMPTargetParallelForDirectiveClass: + case OMPD_target_parallel_for: CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( - CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); + CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); break; - case Stmt::OMPTargetParallelForSimdDirectiveClass: + case OMPD_target_parallel_for_simd: CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( - CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); + CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); break; - case Stmt::OMPTargetSimdDirectiveClass: + case OMPD_target_simd: CodeGenFunction::EmitOMPTargetSimdDeviceFunction( - CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); + CGM, ParentName, cast<OMPTargetSimdDirective>(E)); break; - default: + case OMPD_target_teams_distribute_parallel_for: + CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( + CGM, ParentName, + cast<OMPTargetTeamsDistributeParallelForDirective>(E)); + break; + case OMPD_target_teams_distribute_parallel_for_simd: + CodeGenFunction:: + EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( + CGM, ParentName, + cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); + break; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } return; } - if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { - if (!E->hasAssociatedStmt()) + if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { + if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) return; scanForTargetRegionsFunctions( - cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), - ParentName); + E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); return; } // If this is a lambda function, look into its body. - if (auto *L = dyn_cast<LambdaExpr>(S)) + if (const auto *L = dyn_cast<LambdaExpr>(S)) S = L->getBody(); // Keep looking for target regions recursively. - for (auto *II : S->children()) + for (const Stmt *II : S->children()) scanForTargetRegionsFunctions(II, ParentName); } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { - auto &FD = *cast<FunctionDecl>(GD.getDecl()); + const auto *FD = cast<FunctionDecl>(GD.getDecl()); // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. @@ -7322,12 +8061,11 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { return false; // Try to detect target regions in the function. - scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); + scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); - // We should not emit any function other that the ones created during the - // scanning. Therefore, we signal that this function is completely dealt - // with. - return true; + // Do not to emit function if it is not marked as declare target. + return !isDeclareTargetDeclaration(FD) && + AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -7338,33 +8076,101 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { // regions in it. We use the complete variant to produce the kernel name // mangling. QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); - if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { - for (auto *Ctor : RD->ctors()) { + if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { + for (const CXXConstructorDecl *Ctor : RD->ctors()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); } - auto *Dtor = RD->getDestructor(); - if (Dtor) { + if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); } } - // If we are in target mode, we do not emit any global (declare target is not - // implemented yet). Therefore we signal that GD was processed in this case. - return true; + // Do not to emit variable if it is not marked as declare target. + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl())); + return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link; +} + +void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, + llvm::Constant *Addr) { + if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD)) { + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; + StringRef VarName; + CharUnits VarSize; + llvm::GlobalValue::LinkageTypes Linkage; + switch (*Res) { + case OMPDeclareTargetDeclAttr::MT_To: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + VarName = CGM.getMangledName(VD); + VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); + Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + break; + case OMPDeclareTargetDeclAttr::MT_Link: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + if (CGM.getLangOpts().OpenMPIsDevice) { + VarName = Addr->getName(); + Addr = nullptr; + } else { + VarName = getAddrOfDeclareTargetLink(VD).getName(); + Addr = + cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + } + VarSize = CGM.getPointerSize(); + Linkage = llvm::GlobalValue::WeakAnyLinkage; + break; + } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, Flags, Linkage); + } } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { - auto *VD = GD.getDecl(); - if (isa<FunctionDecl>(VD)) + if (isa<FunctionDecl>(GD.getDecl())) return emitTargetFunctions(GD); return emitTargetGlobalVariable(GD); } +CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( + CodeGenModule &CGM) + : CGM(CGM) { + if (CGM.getLangOpts().OpenMPIsDevice) { + SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; + CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; + } +} + +CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { + if (CGM.getLangOpts().OpenMPIsDevice) + CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; +} + +bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { + if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) + return true; + + const auto *D = cast<FunctionDecl>(GD.getDecl()); + const FunctionDecl *FD = D->getCanonicalDecl(); + // Do not to emit function if it is marked as declare target as it was already + // emitted. + if (isDeclareTargetDeclaration(D)) { + if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>( + CGM.GetGlobalValue(CGM.getMangledName(GD)))) + return !F->isDeclaration(); + return false; + } + return true; + } + + return !AlreadyEmittedTargetFunctions.insert(FD).second; +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. @@ -7384,7 +8190,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); CodeGenFunction::RunCleanupsScope Scope(CGF); // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); @@ -7396,7 +8202,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); + llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); CGF.EmitRuntimeCall(RTLFn, RealArgs); } @@ -7407,16 +8213,16 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *NumTeamsVal = - (NumTeams) + NumTeams ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); llvm::Value *ThreadLimitVal = - (ThreadLimit) + ThreadLimit ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); @@ -7473,7 +8279,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); + llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, @@ -7509,7 +8315,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); + llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, @@ -7596,9 +8402,6 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); OpenMPRTLFunction RTLFn; switch (D.getDirectiveKind()) { - default: - llvm_unreachable("Unexpected standalone target data directive."); - break; case OMPD_target_enter_data: RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait : OMPRTL__tgt_target_data_begin; @@ -7611,6 +8414,58 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait : OMPRTL__tgt_target_data_update; break; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_target: + case OMPD_target_simd: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_unknown: + llvm_unreachable("Unexpected standalone target data directive."); + break; } CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); }; @@ -7644,13 +8499,13 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( if (D.hasClausesOfKind<OMPDependClause>()) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else - emitInlinedDirective(CGF, OMPD_target_update, ThenGen); + emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, TargetThenGen, [](CodeGenFunction &CGF, PrePostActionTy &) {}); - else { + } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } @@ -7693,11 +8548,11 @@ static unsigned evaluateCDTSize(const FunctionDecl *FD, return 0; ASTContext &C = FD->getASTContext(); QualType CDT; - if (!RetType.isNull() && !RetType->isVoidType()) + if (!RetType.isNull() && !RetType->isVoidType()) { CDT = RetType; - else { + } else { unsigned Offset = 0; - if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { if (ParamAttrs[Offset].Kind == Vector) CDT = C.getPointerType(C.getRecordType(MD->getParent())); ++Offset; @@ -7755,17 +8610,18 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, Masked.push_back('M'); break; } - for (auto Mask : Masked) { - for (auto &Data : ISAData) { + for (char Mask : Masked) { + for (const ISADataTy &Data : ISAData) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << "_ZGV" << Data.ISA << Mask; if (!VLENVal) { Out << llvm::APSInt::getUnsigned(Data.VecRegSize / evaluateCDTSize(FD, ParamAttrs)); - } else + } else { Out << VLENVal; - for (auto &ParamAttr : ParamAttrs) { + } + for (const ParamAttrTy &ParamAttr : ParamAttrs) { switch (ParamAttr.Kind){ case LinearWithVarStride: Out << 's' << ParamAttr.StrideOrArg; @@ -7794,90 +8650,95 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); - FD = FD->getCanonicalDecl(); + FD = FD->getMostRecentDecl(); // Map params to their positions in function decl. llvm::DenseMap<const Decl *, unsigned> ParamPositions; if (isa<CXXMethodDecl>(FD)) - ParamPositions.insert({FD, 0}); + ParamPositions.try_emplace(FD, 0); unsigned ParamPos = ParamPositions.size(); - for (auto *P : FD->parameters()) { - ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); + for (const ParmVarDecl *P : FD->parameters()) { + ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); ++ParamPos; } - for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { - llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); - // Mark uniform parameters. - for (auto *E : Attr->uniforms()) { - E = E->IgnoreParenImpCasts(); - unsigned Pos; - if (isa<CXXThisExpr>(E)) - Pos = ParamPositions[FD]; - else { - auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) - ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; - } - ParamAttrs[Pos].Kind = Uniform; - } - // Get alignment info. - auto NI = Attr->alignments_begin(); - for (auto *E : Attr->aligneds()) { - E = E->IgnoreParenImpCasts(); - unsigned Pos; - QualType ParmTy; - if (isa<CXXThisExpr>(E)) { - Pos = ParamPositions[FD]; - ParmTy = E->getType(); - } else { - auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) - ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; - ParmTy = PVD->getType(); + while (FD) { + for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { + llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); + // Mark uniform parameters. + for (const Expr *E : Attr->uniforms()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + } else { + const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + } + ParamAttrs[Pos].Kind = Uniform; } - ParamAttrs[Pos].Alignment = - (*NI) ? (*NI)->EvaluateKnownConstInt(C) + // Get alignment info. + auto NI = Attr->alignments_begin(); + for (const Expr *E : Attr->aligneds()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + QualType ParmTy; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + ParmTy = E->getType(); + } else { + const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + ParmTy = PVD->getType(); + } + ParamAttrs[Pos].Alignment = + (*NI) + ? (*NI)->EvaluateKnownConstInt(C) : llvm::APSInt::getUnsigned( C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) .getQuantity()); - ++NI; - } - // Mark linear parameters. - auto SI = Attr->steps_begin(); - auto MI = Attr->modifiers_begin(); - for (auto *E : Attr->linears()) { - E = E->IgnoreParenImpCasts(); - unsigned Pos; - if (isa<CXXThisExpr>(E)) - Pos = ParamPositions[FD]; - else { - auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) - ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; + ++NI; } - auto &ParamAttr = ParamAttrs[Pos]; - ParamAttr.Kind = Linear; - if (*SI) { - if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, - Expr::SE_AllowSideEffects)) { - if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { - if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { - ParamAttr.Kind = LinearWithVarStride; - ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( - ParamPositions[StridePVD->getCanonicalDecl()]); + // Mark linear parameters. + auto SI = Attr->steps_begin(); + auto MI = Attr->modifiers_begin(); + for (const Expr *E : Attr->linears()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + } else { + const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + } + ParamAttrTy &ParamAttr = ParamAttrs[Pos]; + ParamAttr.Kind = Linear; + if (*SI) { + if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, + Expr::SE_AllowSideEffects)) { + if (const auto *DRE = + cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { + if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { + ParamAttr.Kind = LinearWithVarStride; + ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( + ParamPositions[StridePVD->getCanonicalDecl()]); + } } } } + ++SI; + ++MI; } - ++SI; - ++MI; + llvm::APSInt VLENVal; + if (const Expr *VLEN = Attr->getSimdlen()) + VLENVal = VLEN->EvaluateKnownConstInt(C); + OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); + if (CGM.getTriple().getArch() == llvm::Triple::x86 || + CGM.getTriple().getArch() == llvm::Triple::x86_64) + emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } - llvm::APSInt VLENVal; - if (const Expr *VLEN = Attr->getSimdlen()) - VLENVal = VLEN->EvaluateKnownConstInt(C); - OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) - emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + FD = FD->getPreviousDecl(); } } @@ -7926,8 +8787,9 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, addFieldToRecordDecl(C, RD, Int64Ty); RD->completeDefinition(); KmpDimTy = C.getRecordType(RD); - } else + } else { RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); + } Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); CGF.EmitNullInitialization(DimsAddr, KmpDimTy); @@ -7979,18 +8841,19 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, getThreadID(CGF, C->getLocStart()), CntAddr.getPointer()}; llvm::Value *RTLFn; - if (C->getDependencyKind() == OMPC_DEPEND_source) + if (C->getDependencyKind() == OMPC_DEPEND_source) { RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); - else { + } else { assert(C->getDependencyKind() == OMPC_DEPEND_sink); RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); } CGF.EmitRuntimeCall(RTLFn, Args); } -void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - SourceLocation Loc) const { +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *Callee, + ArrayRef<llvm::Value *> Args) const { + assert(Loc.isValid() && "Outlined function call location must be valid."); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { @@ -8005,8 +8868,7 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, void CGOpenMPRuntime::emitOutlinedFunctionCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> Args) const { - assert(Loc.isValid() && "Outlined function call location must be valid."); - emitCall(CGF, OutlinedFn, Args, Loc); + emitCall(CGF, Loc, OutlinedFn, Args); } Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, @@ -8014,3 +8876,303 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *TargetParam) const { return CGF.GetAddrOfLocalVar(NativeParam); } + +Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD) { + return Address::invalid(); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitCriticalRegion( + CodeGenFunction &CGF, StringRef CriticalName, + const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, + const Expr *Hint) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &MasterOpGen, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskgroupRegion( + CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitSingleRegion( + CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, + SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, + ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, + ArrayRef<const Expr *> AssignmentOps) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &OrderedOpGen, + SourceLocation Loc, + bool IsThreads) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind, + bool EmitChecks, + bool ForceSimpleCall) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForDispatchInit( + CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, + bool Ordered, const DispatchRTInput &DispatchValues) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForStaticInit( + CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, + const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitDistributeStaticInit( + CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, + SourceLocation Loc, + unsigned IVSize, + bool IVSigned) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind DKind) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, + SourceLocation Loc, + unsigned IVSize, bool IVSigned, + Address IL, Address LB, + Address UB, Address ST) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, + Address VDAddr, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( + const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, + CodeGenFunction *CGF) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( + CodeGenFunction &CGF, QualType VarType, StringRef Name) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, + ArrayRef<const Expr *> Vars, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, + const OMPTaskDataTy &Data) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, + llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, + const Expr *IfCond, const OMPTaskDataTy &Data) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { + assert(Options.SimpleReduction && "Only simple reduction is expected."); + CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, + ReductionOps, Options); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitCancellationPointCall( + CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind CancelRegion) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, + SourceLocation Loc, const Expr *IfCond, + OpenMPDirectiveKind CancelRegion) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( + const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, + llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { + return false; +} + +llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { + return nullptr; +} + +void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, + const Expr *NumTeams, + const Expr *ThreadLimit, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetDataCalls( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +const VarDecl * +CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address +CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + llvm_unreachable("Not supported in SIMD-only mode"); +} + diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 94a143841373..01ff0c20fd66 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -133,7 +133,7 @@ private: /// Base declarations for the reduction items. SmallVector<const VarDecl *, 4> BaseDecls; - /// Emits lvalue for shared expresion. + /// Emits lvalue for shared expression. LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); /// Emits upper bound for shared expression (if array section). LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E); @@ -191,21 +191,41 @@ public: } /// Returns the base declaration of the reduction item. const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } + /// Returns the base declaration of the reduction item. + const Expr *getRefExpr(unsigned N) const { return ClausesData[N].Ref; } /// Returns true if the initialization of the reduction item uses initializer /// from declare reduction construct. bool usesReductionInitializer(unsigned N) const; }; class CGOpenMPRuntime { +public: + /// Allows to disable automatic handling of functions used in target regions + /// as those marked as `omp declare target`. + class DisableAutoDeclareTargetRAII { + CodeGenModule &CGM; + bool SavedShouldMarkAsGlobal; + + public: + DisableAutoDeclareTargetRAII(CodeGenModule &CGM); + ~DisableAutoDeclareTargetRAII(); + }; + protected: CodeGenModule &CGM; + StringRef FirstSeparator, Separator; - /// \brief Creates offloading entry for the provided entry ID \a ID, + /// Constructor allowing to redefine the name separator for the variables. + explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, + StringRef Separator); + + /// Creates offloading entry for the provided entry ID \a ID, /// address \a Addr, size \a Size, and flags \a Flags. virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags = 0); + uint64_t Size, int32_t Flags, + llvm::GlobalValue::LinkageTypes Linkage); - /// \brief Helper to emit outlined function for 'target' directive. + /// Helper to emit outlined function for 'target' directive. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. /// \param OutlinedFn Outlined function value to be defined by this call. @@ -221,7 +241,7 @@ protected: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen + /// Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { /// ThenGen(); @@ -232,52 +252,56 @@ protected: const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen); - /// \brief Emits object of ident_t type with info for source location. + /// Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. /// llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags = 0); - /// \brief Returns pointer to ident_t type. + /// Returns pointer to ident_t type. llvm::Type *getIdentTyPointerTy(); - /// \brief Gets thread id value for the current thread. + /// Gets thread id value for the current thread. /// llvm::Value *getThreadID(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Get the function name of an outlined region. + /// Get the function name of an outlined region. // The name can be customized depending on the target. // virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } /// Emits \p Callee function call with arguments \p Args with location \p Loc. - void emitCall(CodeGenFunction &CGF, llvm::Value *Callee, - ArrayRef<llvm::Value *> Args = llvm::None, - SourceLocation Loc = SourceLocation()) const; + void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee, + ArrayRef<llvm::Value *> Args = llvm::None) const; + + /// Emits address of the word in a memory where current thread id is + /// stored. + virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); private: - /// \brief Default const ident_t object used for initialization of all other + /// Default const ident_t object used for initialization of all other /// ident_t objects. llvm::Constant *DefaultOpenMPPSource = nullptr; - /// \brief Map of flags and corresponding default locations. + /// Map of flags and corresponding default locations. typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy; OpenMPDefaultLocMapTy OpenMPDefaultLocMap; Address getOrCreateDefaultLocation(unsigned Flags); + QualType IdentQTy; llvm::StructType *IdentTy = nullptr; - /// \brief Map for SourceLocation and OpenMP runtime library debug locations. + /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy; OpenMPDebugLocMapTy OpenMPDebugLocMap; - /// \brief The type for a microtask which gets passed to __kmpc_fork_call(). + /// The type for a microtask which gets passed to __kmpc_fork_call(). /// Original representation is: /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...); llvm::FunctionType *Kmpc_MicroTy = nullptr; - /// \brief Stores debug location and ThreadID for the function. + /// Stores debug location and ThreadID for the function. struct DebugLocThreadIdTy { llvm::Value *DebugLoc; llvm::Value *ThreadID; }; - /// \brief Map of local debug location, ThreadId and functions. + /// Map of local debug location, ThreadId and functions. typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy> OpenMPLocThreadIDMapTy; OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap; @@ -295,20 +319,20 @@ private: IdentifierInfo *Out = nullptr; IdentifierInfo *Priv = nullptr; IdentifierInfo *Orig = nullptr; - /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32 + /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; - /// \brief An ordered map of auto-generated variables to their unique names. + /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// <critical_section_name> + ".var" for "omp critical" directives; 2) /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate /// variables. llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator> InternalVars; - /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); + /// Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); llvm::Type *KmpRoutineEntryPtrTy = nullptr; QualType KmpRoutineEntryPtrQTy; - /// \brief Type typedef struct kmp_task { + /// Type typedef struct kmp_task { /// void * shareds; /**< pointer to block of pointers to /// shared vars */ /// kmp_routine_entry_t routine; /**< pointer to routine to call for @@ -322,7 +346,7 @@ private: QualType SavedKmpTaskTQTy; /// Saved kmp_task_t for taskloop-based directive. QualType SavedKmpTaskloopTQTy; - /// \brief Type typedef struct kmp_depend_info { + /// Type typedef struct kmp_depend_info { /// kmp_intptr_t base_addr; /// size_t len; /// struct { @@ -337,7 +361,7 @@ private: /// kmp_int64 st; // stride /// }; QualType KmpDimTy; - /// \brief Type struct __tgt_offload_entry{ + /// Type struct __tgt_offload_entry{ /// void *addr; // Pointer to the offload entry info. /// // (function or global) /// char *name; // Name of the function or global. @@ -365,112 +389,195 @@ private: /// // entries (non inclusive). /// }; QualType TgtBinaryDescriptorQTy; - /// \brief Entity that registers the offloading constants that were emitted so + /// Entity that registers the offloading constants that were emitted so /// far. class OffloadEntriesInfoManagerTy { CodeGenModule &CGM; - /// \brief Number of entries registered so far. - unsigned OffloadingEntriesNum; + /// Number of entries registered so far. + unsigned OffloadingEntriesNum = 0; public: /// Base class of the entries info. class OffloadEntryInfo { public: - /// Kind of a given entry. Currently, only target regions are - /// supported. + /// Kind of a given entry. enum OffloadingEntryInfoKinds : unsigned { - // Entry is a target region. - OFFLOAD_ENTRY_INFO_TARGET_REGION = 0, - // Invalid entry info. - OFFLOAD_ENTRY_INFO_INVALID = ~0u + /// Entry is a target region. + OffloadingEntryInfoTargetRegion = 0, + /// Entry is a declare target variable. + OffloadingEntryInfoDeviceGlobalVar = 1, + /// Invalid entry info. + OffloadingEntryInfoInvalid = ~0u }; - OffloadEntryInfo() - : Flags(0), Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {} + protected: + OffloadEntryInfo() = delete; + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, - int32_t Flags) + uint32_t Flags) : Flags(Flags), Order(Order), Kind(Kind) {} + ~OffloadEntryInfo() = default; + public: bool isValid() const { return Order != ~0u; } unsigned getOrder() const { return Order; } OffloadingEntryInfoKinds getKind() const { return Kind; } - int32_t getFlags() const { return Flags; } - void setFlags(int32_t NewFlags) { Flags = NewFlags; } + uint32_t getFlags() const { return Flags; } + void setFlags(uint32_t NewFlags) { Flags = NewFlags; } + llvm::Constant *getAddress() const { + return cast_or_null<llvm::Constant>(Addr); + } + void setAddress(llvm::Constant *V) { + assert(!Addr.pointsToAliveValue() && "Address has been set before!"); + Addr = V; + } static bool classof(const OffloadEntryInfo *Info) { return true; } private: + /// Address of the entity that has to be mapped for offloading. + llvm::WeakTrackingVH Addr; + /// Flags associated with the device global. - int32_t Flags; + uint32_t Flags = 0u; /// Order this entry was emitted. - unsigned Order; + unsigned Order = ~0u; - OffloadingEntryInfoKinds Kind; + OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; }; - /// \brief Return true if a there are no entries defined. + /// Return true if a there are no entries defined. bool empty() const; - /// \brief Return number of entries defined so far. + /// Return number of entries defined so far. unsigned size() const { return OffloadingEntriesNum; } - OffloadEntriesInfoManagerTy(CodeGenModule &CGM) - : CGM(CGM), OffloadingEntriesNum(0) {} - - /// - /// Target region entries related. - /// - /// \brief Target region entries info. - class OffloadEntryInfoTargetRegion : public OffloadEntryInfo { - // \brief Address of the entity that has to be mapped for offloading. - llvm::Constant *Addr; - // \brief Address that can be used as the ID of the entry. - llvm::Constant *ID; + OffloadEntriesInfoManagerTy(CodeGenModule &CGM) : CGM(CGM) {} + + // + // Target region entries related. + // + + /// Kind of the target registry entry. + enum OMPTargetRegionEntryKind : uint32_t { + /// Mark the entry as target region. + OMPTargetRegionEntryTargetRegion = 0x0, + /// Mark the entry as a global constructor. + OMPTargetRegionEntryCtor = 0x02, + /// Mark the entry as a global destructor. + OMPTargetRegionEntryDtor = 0x04, + }; + + /// Target region entries info. + class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { + /// Address that can be used as the ID of the entry. + llvm::Constant *ID = nullptr; public: OffloadEntryInfoTargetRegion() - : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u, - /*Flags=*/0), - Addr(nullptr), ID(nullptr) {} + : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} explicit OffloadEntryInfoTargetRegion(unsigned Order, llvm::Constant *Addr, - llvm::Constant *ID, int32_t Flags) - : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order, Flags), - Addr(Addr), ID(ID) {} + llvm::Constant *ID, + OMPTargetRegionEntryKind Flags) + : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), + ID(ID) { + setAddress(Addr); + } - llvm::Constant *getAddress() const { return Addr; } llvm::Constant *getID() const { return ID; } - void setAddress(llvm::Constant *V) { - assert(!Addr && "Address as been set before!"); - Addr = V; - } void setID(llvm::Constant *V) { - assert(!ID && "ID as been set before!"); + assert(!ID && "ID has been set before!"); ID = V; } static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OFFLOAD_ENTRY_INFO_TARGET_REGION; + return Info->getKind() == OffloadingEntryInfoTargetRegion; } }; - /// \brief Initialize target region entry. + + /// Initialize target region entry. void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order); - /// \brief Register target region entry. + /// Register target region entry. void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, - int32_t Flags); - /// \brief Return true if a target region entry with the provided - /// information exists. + OMPTargetRegionEntryKind Flags); + /// Return true if a target region entry with the provided information + /// exists. bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const; /// brief Applies action \a Action on all registered entries. typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned, - OffloadEntryInfoTargetRegion &)> + const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy; void actOnTargetRegionEntriesInfo( const OffloadTargetRegionEntryInfoActTy &Action); + // + // Device global variable entries related. + // + + /// Kind of the global variable entry.. + enum OMPTargetGlobalVarEntryKind : uint32_t { + /// Mark the entry as a to declare target. + OMPTargetGlobalVarEntryTo = 0x0, + /// Mark the entry as a to declare target link. + OMPTargetGlobalVarEntryLink = 0x1, + }; + + /// Device global variable entries info. + class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { + /// Type of the global variable. + CharUnits VarSize; + llvm::GlobalValue::LinkageTypes Linkage; + + public: + OffloadEntryInfoDeviceGlobalVar() + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} + explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, + OMPTargetGlobalVarEntryKind Flags) + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} + explicit OffloadEntryInfoDeviceGlobalVar( + unsigned Order, llvm::Constant *Addr, CharUnits VarSize, + OMPTargetGlobalVarEntryKind Flags, + llvm::GlobalValue::LinkageTypes Linkage) + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), + VarSize(VarSize), Linkage(Linkage) { + setAddress(Addr); + } + + CharUnits getVarSize() const { return VarSize; } + void setVarSize(CharUnits Size) { VarSize = Size; } + llvm::GlobalValue::LinkageTypes getLinkage() const { return Linkage; } + void setLinkage(llvm::GlobalValue::LinkageTypes LT) { Linkage = LT; } + static bool classof(const OffloadEntryInfo *Info) { + return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; + } + }; + + /// Initialize device global variable entry. + void initializeDeviceGlobalVarEntryInfo(StringRef Name, + OMPTargetGlobalVarEntryKind Flags, + unsigned Order); + + /// Register device global variable entry. + void + registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, + CharUnits VarSize, + OMPTargetGlobalVarEntryKind Flags, + llvm::GlobalValue::LinkageTypes Linkage); + /// Checks if the variable with the given name has been registered already. + bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { + return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; + } + /// Applies action \a Action on all registered entries. + typedef llvm::function_ref<void(StringRef, + const OffloadEntryInfoDeviceGlobalVar &)> + OffloadDeviceGlobalVarEntryInfoActTy; + void actOnDeviceGlobalVarEntriesInfo( + const OffloadDeviceGlobalVarEntryInfoActTy &Action); + private: // Storage for target region entries kind. The storage is to be indexed by // file ID, device ID, parent function name and line number. @@ -484,75 +591,79 @@ private: OffloadEntriesTargetRegionPerDevice; typedef OffloadEntriesTargetRegionPerDevice OffloadEntriesTargetRegionTy; OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; + /// Storage for device global variable entries kind. The storage is to be + /// indexed by mangled name. + typedef llvm::StringMap<OffloadEntryInfoDeviceGlobalVar> + OffloadEntriesDeviceGlobalVarTy; + OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; }; OffloadEntriesInfoManagerTy OffloadEntriesInfoManager; - /// \brief Creates and registers offloading binary descriptor for the current + bool ShouldMarkAsGlobal = true; + llvm::SmallDenseSet<const FunctionDecl *> AlreadyEmittedTargetFunctions; + + /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); - /// \brief Creates all the offload entries in the current compilation unit + /// Creates all the offload entries in the current compilation unit /// along with the associated metadata. void createOffloadEntriesAndInfoMetadata(); - /// \brief Loads all the offload entries information from the host IR + /// Loads all the offload entries information from the host IR /// metadata. void loadOffloadInfoMetadata(); - /// \brief Returns __tgt_offload_entry type. + /// Returns __tgt_offload_entry type. QualType getTgtOffloadEntryQTy(); - /// \brief Returns __tgt_device_image type. + /// Returns __tgt_device_image type. QualType getTgtDeviceImageQTy(); - /// \brief Returns __tgt_bin_desc type. + /// Returns __tgt_bin_desc type. QualType getTgtBinaryDescriptorQTy(); - /// \brief Start scanning from statement \a S and and emit all target regions + /// Start scanning from statement \a S and and emit all target regions /// found along the way. /// \param S Starting statement. /// \param ParentName Name of the function declaration that is being scanned. void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName); - /// \brief Build type kmp_routine_entry_t (if not built yet). + /// Build type kmp_routine_entry_t (if not built yet). void emitKmpRoutineEntryT(QualType KmpInt32Ty); - /// \brief Returns pointer to kmpc_micro type. + /// Returns pointer to kmpc_micro type. llvm::Type *getKmpc_MicroPointerTy(); - /// \brief Returns specified OpenMP runtime function. + /// Returns specified OpenMP runtime function. /// \param Function OpenMP runtime function. /// \return Specified function. llvm::Constant *createRuntimeFunction(unsigned Function); - /// \brief Returns __kmpc_for_static_init_* runtime function for the specified + /// Returns __kmpc_for_static_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. llvm::Constant *createForStaticInitFunction(unsigned IVSize, bool IVSigned); - /// \brief Returns __kmpc_dispatch_init_* runtime function for the specified + /// Returns __kmpc_dispatch_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned); - /// \brief Returns __kmpc_dispatch_next_* runtime function for the specified + /// Returns __kmpc_dispatch_next_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned); - /// \brief Returns __kmpc_dispatch_fini_* runtime function for the specified + /// Returns __kmpc_dispatch_fini_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. llvm::Constant *createDispatchFiniFunction(unsigned IVSize, bool IVSigned); - /// \brief If the specified mangled name is not in the module, create and + /// If the specified mangled name is not in the module, create and /// return threadprivate cache object. This object is a pointer's worth of /// storage that's reserved for use by the OpenMP runtime. /// \param VD Threadprivate variable. /// \return Cache variable for the specified threadprivate. llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD); - /// \brief Emits address of the word in a memory where current thread id is - /// stored. - virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); - - /// \brief Gets (if variable with the given name already exist) or creates + /// Gets (if variable with the given name already exist) or creates /// internal global variable with the specified Name. The created variable has /// linkage CommonLinkage by default and is initialized by null value. /// \param Ty Type of the global variable. If it is exist already the type @@ -561,10 +672,13 @@ private: llvm::Constant *getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name); - /// \brief Set of threadprivate variables with the generated initializer. + /// Set of threadprivate variables with the generated initializer. llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition; - /// \brief Emits initialization code for the threadprivate variables. + /// Set of declare target variables with the generated initializer. + llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition; + + /// Emits initialization code for the threadprivate variables. /// \param VDAddr Address of the global variable \a VD. /// \param Ctor Pointer to a global init function for \a VD. /// \param CopyCtor Pointer to a global copy function for \a VD. @@ -574,7 +688,7 @@ private: llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc); - /// \brief Returns corresponding lock object for the specified critical region + /// Returns corresponding lock object for the specified critical region /// name. If the lock object does not exist it is created, otherwise the /// reference to the existing copy is returned. /// \param CriticalName Name of the critical region. @@ -586,7 +700,7 @@ private: llvm::Value *TaskEntry = nullptr; llvm::Value *NewTaskNewTaskTTy = nullptr; LValue TDBase; - RecordDecl *KmpTaskTQTyRD = nullptr; + const RecordDecl *KmpTaskTQTyRD = nullptr; llvm::Value *TaskDupFn = nullptr; }; /// Emit task region for the task directive. The task region is emitted in @@ -617,10 +731,14 @@ private: Address Shareds, const OMPTaskDataTy &Data); public: - explicit CGOpenMPRuntime(CodeGenModule &CGM); + explicit CGOpenMPRuntime(CodeGenModule &CGM) + : CGOpenMPRuntime(CGM, ".", ".") {} virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Get the platform-specific name separator. + std::string getName(ArrayRef<StringRef> Parts) const; + /// Emit code for the specified user defined reduction construct. virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D); @@ -628,7 +746,7 @@ public: virtual std::pair<llvm::Function *, llvm::Function *> getUserDefinedReduction(const OMPDeclareReductionDecl *D); - /// \brief Emits outlined function for the specified OpenMP parallel directive + /// Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). /// \param D OpenMP directive. @@ -640,7 +758,7 @@ public: const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); - /// \brief Emits outlined function for the specified OpenMP teams directive + /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). /// \param D OpenMP directive. @@ -652,7 +770,7 @@ public: const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); - /// \brief Emits outlined function for the OpenMP task directive \a D. This + /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* /// TaskT). /// \param D OpenMP directive. @@ -673,11 +791,11 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts); - /// \brief Cleans up references to the objects in finished function. + /// Cleans up references to the objects in finished function. /// - void functionFinished(CodeGenFunction &CGF); + virtual void functionFinished(CodeGenFunction &CGF); - /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. /// \param OutlinedFn Outlined function to be run in parallel threads. Type of @@ -692,7 +810,7 @@ public: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); - /// \brief Emits a critical region. + /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given /// critical region. @@ -702,24 +820,24 @@ public: SourceLocation Loc, const Expr *Hint = nullptr); - /// \brief Emits a master region. + /// Emits a master region. /// \param MasterOpGen Generator for the statement associated with the given /// master region. virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc); - /// \brief Emits code for a taskyield directive. + /// Emits code for a taskyield directive. virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Emit a taskgroup region. + /// Emit a taskgroup region. /// \param TaskgroupOpGen Generator for the statement associated with the /// given taskgroup region. virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc); - /// \brief Emits a single region. + /// Emits a single region. /// \param SingleOpGen Generator for the statement associated with the given /// single region. virtual void emitSingleRegion(CodeGenFunction &CGF, @@ -730,14 +848,14 @@ public: ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps); - /// \brief Emit an ordered region. + /// Emit an ordered region. /// \param OrderedOpGen Generator for the statement associated with the given /// ordered region. virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads); - /// \brief Emit an implicit/explicit barrier for OpenMP threads. + /// Emit an implicit/explicit barrier for OpenMP threads. /// \param Kind Directive for which this implicit barrier call must be /// generated. Must be OMPD_barrier for explicit barrier generation. /// \param EmitChecks true if need to emit checks for cancellation barriers. @@ -750,7 +868,7 @@ public: bool EmitChecks = true, bool ForceSimpleCall = false); - /// \brief Check if the specified \a ScheduleKind is static non-chunked. + /// Check if the specified \a ScheduleKind is static non-chunked. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule kind specified in the 'schedule' clause. /// \param Chunked True if chunk is specified in the clause. @@ -758,7 +876,7 @@ public: virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const; - /// \brief Check if the specified \a ScheduleKind is static non-chunked. + /// Check if the specified \a ScheduleKind is static non-chunked. /// This kind of distribute directive is emitted without outer loop. /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause. /// \param Chunked True if chunk is specified in the clause. @@ -766,7 +884,7 @@ public: virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const; - /// \brief Check if the specified \a ScheduleKind is dynamic. + /// Check if the specified \a ScheduleKind is dynamic. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause. /// @@ -839,7 +957,7 @@ public: : IVSize(IVSize), IVSigned(IVSigned), Ordered(Ordered), IL(IL), LB(LB), UB(UB), ST(ST), Chunk(Chunk) {} }; - /// \brief Call the appropriate runtime routine to initialize it before start + /// Call the appropriate runtime routine to initialize it before start /// of loop. /// /// This is used only in case of static schedule, when the user did not @@ -870,7 +988,7 @@ public: OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values); - /// \brief Call the appropriate runtime routine to notify that we finished + /// Call the appropriate runtime routine to notify that we finished /// iteration of the ordered loop with the dynamic scheduling. /// /// \param CGF Reference to current CodeGenFunction. @@ -882,7 +1000,7 @@ public: SourceLocation Loc, unsigned IVSize, bool IVSigned); - /// \brief Call the appropriate runtime routine to notify that we finished + /// Call the appropriate runtime routine to notify that we finished /// all the work with current loop. /// /// \param CGF Reference to current CodeGenFunction. @@ -911,7 +1029,7 @@ public: Address IL, Address LB, Address UB, Address ST); - /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. /// \param NumThreads An integer value of threads. @@ -919,13 +1037,13 @@ public: llvm::Value *NumThreads, SourceLocation Loc); - /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 + /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc); - /// \brief Returns address of the threadprivate variable for the current + /// Returns address of the threadprivate variable for the current /// thread. /// \param VD Threadprivate variable. /// \param VDAddr Address of the global variable \a VD. @@ -936,7 +1054,11 @@ public: Address VDAddr, SourceLocation Loc); - /// \brief Emit a code for initialization of threadprivate variable. It emits + /// Returns the address of the variable marked as declare target with link + /// clause. + virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD); + + /// Emit a code for initialization of threadprivate variable. It emits /// a call to runtime library which adds initial value to the newly created /// threadprivate variable (if it is not constant) and registers destructor /// for the variable (if any). @@ -949,6 +1071,14 @@ public: SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); + /// Emit a code for initialization of declare target variable. + /// \param VD Declare target variable. + /// \param Addr Address of the global variable \a VD. + /// \param PerformInit true if initialization expression is not constant. + virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD, + llvm::GlobalVariable *Addr, + bool PerformInit); + /// Creates artificial threadprivate variable with name \p Name and type \p /// VarType. /// \param VarType Type of the artificial threadprivate variable. @@ -957,12 +1087,12 @@ public: QualType VarType, StringRef Name); - /// \brief Emit flush of the variables specified in 'omp flush' directive. + /// Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, SourceLocation Loc); - /// \brief Emit task region for the task directive. The task region is + /// Emit task region for the task directive. The task region is /// emitted in several steps: /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, @@ -1029,7 +1159,7 @@ public: llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data); - /// \brief Emit code for the directive that does not require outlining. + /// Emit code for the directive that does not require outlining. /// /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). @@ -1048,7 +1178,8 @@ public: /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. - llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, + llvm::Value *emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc, + llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, @@ -1066,7 +1197,7 @@ public: bool SimpleReduction; OpenMPDirectiveKind ReductionKind; }; - /// \brief Emit a code for reduction clause. Next code should be emitted for + /// Emit a code for reduction clause. Next code should be emitted for /// reduction: /// \code /// @@ -1160,10 +1291,10 @@ public: llvm::Value *ReductionsPtr, LValue SharedLVal); - /// \brief Emit code for 'taskwait' directive. + /// Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Emit code for 'cancellation point' construct. + /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be /// emitted. /// @@ -1171,7 +1302,7 @@ public: SourceLocation Loc, OpenMPDirectiveKind CancelRegion); - /// \brief Emit code for 'cancel' construct. + /// Emit code for 'cancel' construct. /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. /// \param CancelRegion Region kind for which the cancel must be emitted. @@ -1180,7 +1311,7 @@ public: const Expr *IfCond, OpenMPDirectiveKind CancelRegion); - /// \brief Emit outilined function for 'target' directive. + /// Emit outilined function for 'target' directive. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. /// \param OutlinedFn Outlined function value to be defined by this call. @@ -1196,7 +1327,7 @@ public: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// \brief Emit the target offloading code associated with \a D. The emitted + /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of /// a failure it executes the host version outlined in \a OutlinedFn. /// \param D Directive to emit. @@ -1206,36 +1337,39 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - /// \param CapturedVars Values captured in the current region. virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device, - ArrayRef<llvm::Value *> CapturedVars); + const Expr *Device); - /// \brief Emit the target regions enclosed in \a GD function definition or + /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if /// \a GD was dealt with successfully. /// \param GD Function to scan. virtual bool emitTargetFunctions(GlobalDecl GD); - /// \brief Emit the global variable if it is a valid device global variable. + /// Emit the global variable if it is a valid device global variable. /// Returns true if \a GD was dealt with successfully. /// \param GD Variable declaration to emit. virtual bool emitTargetGlobalVariable(GlobalDecl GD); - /// \brief Emit the global \a GD if it is meaningful for the target. Returns + /// Checks if the provided global decl \a GD is a declare target variable and + /// registers it when emitting code for the host. + virtual void registerTargetGlobalVariable(const VarDecl *VD, + llvm::Constant *Addr); + + /// Emit the global \a GD if it is meaningful for the target. Returns /// if it was emitted successfully. /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); - /// \brief Creates the offloading descriptor in the event any target region + /// Creates the offloading descriptor in the event any target region /// was emitted in the current module and return the function that registers /// it. virtual llvm::Function *emitRegistrationFunction(); - /// \brief Emits code for teams call of the \a OutlinedFn with + /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. /// \param OutlinedFn Outlined function to be run by team masters. Type of @@ -1248,7 +1382,7 @@ public: SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars); - /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 + /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code /// for num_teams clause. /// \param NumTeams An integer expression of teams. @@ -1296,7 +1430,7 @@ public: bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } }; - /// \brief Emit the target data mapping code associated with \a D. + /// Emit the target data mapping code associated with \a D. /// \param D Directive to emit. /// \param IfCond Expression evaluated in if clause associated with the /// target directive, or null if no device clause is used. @@ -1310,7 +1444,7 @@ public: const RegionCodeGenTy &CodeGen, TargetDataInfo &Info); - /// \brief Emit the data mapping/movement code associated with the directive + /// Emit the data mapping/movement code associated with the directive /// \a D that should be of the form 'target [{enter|exit} data | update]'. /// \param D Directive to emit. /// \param IfCond Expression evaluated in if clause associated with the target @@ -1341,7 +1475,7 @@ public: /// Translates the native parameter of outlined function if this is required /// for target. - /// \param FD Field decl from captured record for the paramater. + /// \param FD Field decl from captured record for the parameter. /// \param NativeParam Parameter itself. virtual const VarDecl *translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const { @@ -1362,6 +1496,582 @@ public: emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> Args = llvm::None) const; + + /// Emits OpenMP-specific function prolog. + /// Required for device constructs. + virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {} + + /// Gets the OpenMP-specific address of the local variable. + virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD); + + /// Marks the declaration as alread emitted for the device code and returns + /// true, if it was marked already, and false, otherwise. + bool markAsGlobalTarget(GlobalDecl GD); + +}; + +/// Class supports emissionof SIMD-only code. +class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { +public: + explicit CGOpenMPSIMDRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) {} + ~CGOpenMPSIMDRuntime() override {} + + /// Emits outlined function for the specified OpenMP parallel directive + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + llvm::Value * + emitParallelOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; + + /// Emits outlined function for the specified OpenMP teams directive + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + llvm::Value * + emitTeamsOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; + + /// Emits outlined function for the OpenMP task directive \a D. This + /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* + /// TaskT). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param PartIDVar Variable for partition id in the current OpenMP untied + /// task region. + /// \param TaskTVar Variable for task_t argument. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + /// \param Tied true if task is generated for tied task, false otherwise. + /// \param NumberOfParts Number of parts in untied task. Ignored for tied + /// tasks. + /// + llvm::Value *emitTaskOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) override; + + /// Emits code for parallel or serial call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run in parallel threads. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// \param IfCond Condition in the associated 'if' clause, if it was + /// specified, nullptr otherwise. + /// + void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond) override; + + /// Emits a critical region. + /// \param CriticalName Name of the critical region. + /// \param CriticalOpGen Generator for the statement associated with the given + /// critical region. + /// \param Hint Value of the 'hint' clause (optional). + void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, + const RegionCodeGenTy &CriticalOpGen, + SourceLocation Loc, + const Expr *Hint = nullptr) override; + + /// Emits a master region. + /// \param MasterOpGen Generator for the statement associated with the given + /// master region. + void emitMasterRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &MasterOpGen, + SourceLocation Loc) override; + + /// Emits code for a taskyield directive. + void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override; + + /// Emit a taskgroup region. + /// \param TaskgroupOpGen Generator for the statement associated with the + /// given taskgroup region. + void emitTaskgroupRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &TaskgroupOpGen, + SourceLocation Loc) override; + + /// Emits a single region. + /// \param SingleOpGen Generator for the statement associated with the given + /// single region. + void emitSingleRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, + ArrayRef<const Expr *> CopyprivateVars, + ArrayRef<const Expr *> DestExprs, + ArrayRef<const Expr *> SrcExprs, + ArrayRef<const Expr *> AssignmentOps) override; + + /// Emit an ordered region. + /// \param OrderedOpGen Generator for the statement associated with the given + /// ordered region. + void emitOrderedRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &OrderedOpGen, + SourceLocation Loc, bool IsThreads) override; + + /// Emit an implicit/explicit barrier for OpenMP threads. + /// \param Kind Directive for which this implicit barrier call must be + /// generated. Must be OMPD_barrier for explicit barrier generation. + /// \param EmitChecks true if need to emit checks for cancellation barriers. + /// \param ForceSimpleCall true simple barrier call must be emitted, false if + /// runtime class decides which one to emit (simple or with cancellation + /// checks). + /// + void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks = true, + bool ForceSimpleCall = false) override; + + /// This is used for non static scheduled types and when the ordered + /// clause is present on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime + /// routine before start of the OpenMP loop to get the loop upper / lower + /// bounds \a LB and \a UB and stride \a ST. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the iteration variable. + /// \param Ordered true if loop is ordered, false otherwise. + /// \param DispatchValues struct containing llvm values for lower bound, upper + /// bound, and chunk expression. + /// For the default (nullptr) value, the chunk 1 will be used. + /// + void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, + unsigned IVSize, bool IVSigned, bool Ordered, + const DispatchRTInput &DispatchValues) override; + + /// Call the appropriate runtime routine to initialize it before start + /// of loop. + /// + /// This is used only in case of static schedule, when the user did not + /// specify a ordered clause on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime + /// routine before start of the OpenMP loop to get the loop upper / lower + /// bounds LB and UB and stride ST. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param DKind Kind of the directive. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// \param Values Input arguments for the construct. + /// + void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind, + const OpenMPScheduleTy &ScheduleKind, + const StaticRTInput &Values) override; + + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. + /// \param Values Input arguments for the construct. + /// + void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDistScheduleClauseKind SchedKind, + const StaticRTInput &Values) override; + + /// Call the appropriate runtime routine to notify that we finished + /// iteration of the ordered loop with the dynamic scheduling. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the iteration variable. + /// + void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, + unsigned IVSize, bool IVSigned) override; + + /// Call the appropriate runtime routine to notify that we finished + /// all the work with current loop. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param DKind Kind of the directive for which the static finish is emitted. + /// + void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind) override; + + /// Call __kmpc_dispatch_next( + /// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, + /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, + /// kmp_int[32|64] *p_stride); + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the iteration variable. + /// \param IL Address of the output variable in which the flag of the + /// last iteration is returned. + /// \param LB Address of the output variable in which the lower iteration + /// number is returned. + /// \param UB Address of the output variable in which the upper iteration + /// number is returned. + /// \param ST Address of the output variable in which the stride value is + /// returned. + llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc, + unsigned IVSize, bool IVSigned, Address IL, + Address LB, Address UB, Address ST) override; + + /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' + /// clause. + /// \param NumThreads An integer value of threads. + void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, + SourceLocation Loc) override; + + /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 + /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. + void emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) override; + + /// Returns address of the threadprivate variable for the current + /// thread. + /// \param VD Threadprivate variable. + /// \param VDAddr Address of the global variable \a VD. + /// \param Loc Location of the reference to threadprivate var. + /// \return Address of the threadprivate variable for the current thread. + Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, + Address VDAddr, SourceLocation Loc) override; + + /// Emit a code for initialization of threadprivate variable. It emits + /// a call to runtime library which adds initial value to the newly created + /// threadprivate variable (if it is not constant) and registers destructor + /// for the variable (if any). + /// \param VD Threadprivate variable. + /// \param VDAddr Address of the global variable \a VD. + /// \param Loc Location of threadprivate declaration. + /// \param PerformInit true if initialization expression is not constant. + llvm::Function * + emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, + SourceLocation Loc, bool PerformInit, + CodeGenFunction *CGF = nullptr) override; + + /// Creates artificial threadprivate variable with name \p Name and type \p + /// VarType. + /// \param VarType Type of the artificial threadprivate variable. + /// \param Name Name of the artificial threadprivate variable. + Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name) override; + + /// Emit flush of the variables specified in 'omp flush' directive. + /// \param Vars List of variables to flush. + void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, + SourceLocation Loc) override; + + /// Emit task region for the task directive. The task region is + /// emitted in several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, + /// kmp_task_t *new_task), where new_task is a resulting structure from + /// previous items. + /// \param D Current task directive. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param Data Additional data for task generation like tiednsee, final + /// state, list of privates etc. + void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, const Expr *IfCond, + const OMPTaskDataTy &Data) override; + + /// Emit task region for the taskloop directive. The taskloop region is + /// emitted in several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t + /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int + /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task + /// is a resulting structure from + /// previous items. + /// \param D Current task directive. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param Data Additional data for task generation like tiednsee, final + /// state, list of privates etc. + void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPLoopDirective &D, llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, const Expr *IfCond, + const OMPTaskDataTy &Data) override; + + /// Emit a code for reduction clause. Next code should be emitted for + /// reduction: + /// \code + /// + /// static kmp_critical_name lock = { 0 }; + /// + /// void reduce_func(void *lhs[<n>], void *rhs[<n>]) { + /// ... + /// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); + /// ... + /// } + /// + /// ... + /// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; + /// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), + /// RedList, reduce_func, &<lock>)) { + /// case 1: + /// ... + /// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); + /// ... + /// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + /// break; + /// case 2: + /// ... + /// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); + /// ... + /// break; + /// default:; + /// } + /// \endcode + /// + /// \param Privates List of private copies for original reduction arguments. + /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. + /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' + /// or 'operator binop(LHS, RHS)'. + /// \param Options List of options for reduction codegen: + /// WithNowait true if parent directive has also nowait clause, false + /// otherwise. + /// SimpleReduction Emit reduction operation only. Used for omp simd + /// directive on the host. + /// ReductionKind The kind of reduction to perform. + void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, + ReductionOptionsTy Options) override; + + /// Emit a code for initialization of task reduction clause. Next code + /// should be emitted for reduction: + /// \code + /// + /// _task_red_item_t red_data[n]; + /// ... + /// red_data[i].shar = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// \endcode + /// + /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. + /// \param Data Additional data for task generation like tiedness, final + /// state, list of privates, reductions etc. + llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + const OMPTaskDataTy &Data) override; + + /// Required to resolve existing problems in the runtime. Emits threadprivate + /// variables to store the size of the VLAs/array sections for + /// initializer/combiner/finalizer functions + emits threadprivate variable to + /// store the pointer to the original reduction item for the custom + /// initializer defined by declare reduction construct. + /// \param RCG Allows to reuse an existing data for the reductions. + /// \param N Reduction item for which fixups must be emitted. + void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) override; + + /// Get the address of `void *` type of the privatue copy of the reduction + /// item specified by the \p SharedLVal. + /// \param ReductionsPtr Pointer to the reduction data returned by the + /// emitTaskReductionInit function. + /// \param SharedLVal Address of the original reduction item. + Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) override; + + /// Emit code for 'taskwait' directive. + void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override; + + /// Emit code for 'cancellation point' construct. + /// \param CancelRegion Region kind for which the cancellation point must be + /// emitted. + /// + void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind CancelRegion) override; + + /// Emit code for 'cancel' construct. + /// \param IfCond Condition in the associated 'if' clause, if it was + /// specified, nullptr otherwise. + /// \param CancelRegion Region kind for which the cancel must be emitted. + /// + void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, + const Expr *IfCond, + OpenMPDirectiveKind CancelRegion) override; + + /// Emit outilined function for 'target' directive. + /// \param D Directive to emit. + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Code generation sequence for the \a D directive. + /// An outlined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + void emitTargetOutlinedFunction(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) override; + + /// Emit the target offloading code associated with \a D. The emitted + /// code attempts offloading the execution to the device, an the event of + /// a failure it executes the host version outlined in \a OutlinedFn. + /// \param D Directive to emit. + /// \param OutlinedFn Host version of the code to be offloaded. + /// \param OutlinedFnID ID of host version of the code to be offloaded. + /// \param IfCond Expression evaluated in if clause associated with the target + /// directive, or null if no if clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device) override; + + /// Emit the target regions enclosed in \a GD function definition or + /// the function itself in case it is a valid device function. Returns true if + /// \a GD was dealt with successfully. + /// \param GD Function to scan. + bool emitTargetFunctions(GlobalDecl GD) override; + + /// Emit the global variable if it is a valid device global variable. + /// Returns true if \a GD was dealt with successfully. + /// \param GD Variable declaration to emit. + bool emitTargetGlobalVariable(GlobalDecl GD) override; + + /// Emit the global \a GD if it is meaningful for the target. Returns + /// if it was emitted successfully. + /// \param GD Global to scan. + bool emitTargetGlobal(GlobalDecl GD) override; + + /// Creates the offloading descriptor in the event any target region + /// was emitted in the current module and return the function that registers + /// it. + llvm::Function *emitRegistrationFunction() override; + + /// Emits code for teams call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run by team masters. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// + void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) override; + + /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code + /// for num_teams clause. + /// \param NumTeams An integer expression of teams. + /// \param ThreadLimit An integer expression of threads. + void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc) override; + + /// Emit the target data mapping code associated with \a D. + /// \param D Directive to emit. + /// \param IfCond Expression evaluated in if clause associated with the + /// target directive, or null if no device clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + /// \param Info A record used to store information that needs to be preserved + /// until the region is closed. + void emitTargetDataCalls(CodeGenFunction &CGF, + const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device, const RegionCodeGenTy &CodeGen, + TargetDataInfo &Info) override; + + /// Emit the data mapping/movement code associated with the directive + /// \a D that should be of the form 'target [{enter|exit} data | update]'. + /// \param D Directive to emit. + /// \param IfCond Expression evaluated in if clause associated with the target + /// directive, or null if no if clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + void emitTargetDataStandAloneCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const Expr *IfCond, + const Expr *Device) override; + + /// Emit initialization for doacross loop nesting support. + /// \param D Loop-based construct used in doacross nesting construct. + void emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D) override; + + /// Emit code for doacross ordered directive with 'depend' clause. + /// \param C 'depend' clause with 'sink|source' dependency kind. + void emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) override; + + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the parameter. + /// \param NativeParam Parameter itself. + const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const override; + + /// Gets the address of the native argument basing on the address of the + /// target-specific parameter. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, + const VarDecl *TargetParam) const override; }; } // namespace CodeGen diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 7b2993cfd38d..036b5371fe0b 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -13,33 +13,35 @@ //===----------------------------------------------------------------------===// #include "CGOpenMPRuntimeNVPTX.h" -#include "clang/AST/DeclOpenMP.h" #include "CodeGenFunction.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/SmallPtrSet.h" using namespace clang; using namespace CodeGen; namespace { enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit, + /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, /// int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_kernel_init, - /// \brief Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_deinit, - /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// \brief Call to void __kmpc_spmd_kernel_deinit(); + /// Call to void __kmpc_spmd_kernel_deinit(); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, - /// \brief Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// Call to void __kmpc_kernel_prepare_parallel(void + /// *outlined_function, int16_t /// IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void - /// ***args, int16_t IsOMPRuntimeInitialized); + /// Call to bool __kmpc_kernel_parallel(void **outlined_function, + /// int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_parallel, - /// \brief Call to void __kmpc_kernel_end_parallel(); + /// Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 /// global_tid); @@ -47,19 +49,25 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 /// global_tid); OMPRTL_NVPTX__kmpc_end_serialized_parallel, - /// \brief Call to int32_t __kmpc_shuffle_int32(int32_t element, + /// Call to int32_t __kmpc_shuffle_int32(int32_t element, /// int16_t lane_offset, int16_t warp_size); OMPRTL_NVPTX__kmpc_shuffle_int32, - /// \brief Call to int64_t __kmpc_shuffle_int64(int64_t element, + /// Call to int64_t __kmpc_shuffle_int64(int64_t element, /// int16_t lane_offset, int16_t warp_size); OMPRTL_NVPTX__kmpc_shuffle_int64, - /// \brief Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32 + /// Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32 /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, - /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, + /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_simd_reduce_nowait, + /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, /// int32_t num_vars, size_t reduce_size, void *reduce_data, /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t /// lane_offset, int16_t shortCircuit), @@ -69,17 +77,38 @@ enum OpenMPRTLFunctionNVPTX { /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t /// index, int32_t width, int32_t reduce)) OMPRTL_NVPTX__kmpc_teams_reduce_nowait, - /// \brief Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); - OMPRTL_NVPTX__kmpc_end_reduce_nowait + /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); + OMPRTL_NVPTX__kmpc_end_reduce_nowait, + /// Call to void __kmpc_data_sharing_init_stack(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack, + /// Call to void __kmpc_data_sharing_init_stack_spmd(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, + /// Call to void* __kmpc_data_sharing_push_stack(size_t size, + /// int16_t UseSharedMemory); + OMPRTL_NVPTX__kmpc_data_sharing_push_stack, + /// Call to void __kmpc_data_sharing_pop_stack(void *a); + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, + /// Call to void __kmpc_begin_sharing_variables(void ***args, + /// size_t n_args); + OMPRTL_NVPTX__kmpc_begin_sharing_variables, + /// Call to void __kmpc_end_sharing_variables(); + OMPRTL_NVPTX__kmpc_end_sharing_variables, + /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) + OMPRTL_NVPTX__kmpc_get_shared_variables, + /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_parallel_level, + /// Call to int8_t __kmpc_is_spmd_exec_mode(); + OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { - llvm::Value *EnterCallee; + llvm::Value *EnterCallee = nullptr; ArrayRef<llvm::Value *> EnterArgs; - llvm::Value *ExitCallee; + llvm::Value *ExitCallee = nullptr; ArrayRef<llvm::Value *> ExitArgs; - bool Conditional; + bool Conditional = false; llvm::BasicBlock *ContBlock = nullptr; public: @@ -109,21 +138,21 @@ public: } }; -// A class to track the execution mode when codegening directives within -// a target region. The appropriate mode (generic/spmd) is set on entry -// to the target region and used by containing directives such as 'parallel' -// to emit optimized code. +/// A class to track the execution mode when codegening directives within +/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry +/// to the target region and used by containing directives such as 'parallel' +/// to emit optimized code. class ExecutionModeRAII { private: CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; public: - ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, - CGOpenMPRuntimeNVPTX::ExecutionMode NewMode) + ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD) : Mode(Mode) { SavedMode = Mode; - Mode = NewMode; + Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD + : CGOpenMPRuntimeNVPTX::EM_NonSPMD; } ~ExecutionModeRAII() { Mode = SavedMode; } }; @@ -149,6 +178,353 @@ enum NamedBarrier : unsigned { /// barrier. NB_Parallel = 1, }; + +/// Get the list of variables that can escape their declaration context. +class CheckVarsEscapingDeclContext final + : public ConstStmtVisitor<CheckVarsEscapingDeclContext> { + CodeGenFunction &CGF; + llvm::SetVector<const ValueDecl *> EscapedDecls; + llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls; + llvm::SmallPtrSet<const Decl *, 4> EscapedParameters; + RecordDecl *GlobalizedRD = nullptr; + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; + bool AllEscaped = false; + bool IsForCombinedParallelRegion = false; + + static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> + isDeclareTargetDeclaration(const ValueDecl *VD) { + for (const Decl *D : VD->redecls()) { + if (!D->hasAttrs()) + continue; + if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) + return Attr->getMapType(); + } + return llvm::None; + } + + void markAsEscaped(const ValueDecl *VD) { + // Do not globalize declare target variables. + if (!isa<VarDecl>(VD) || isDeclareTargetDeclaration(VD)) + return; + VD = cast<ValueDecl>(VD->getCanonicalDecl()); + // Variables captured by value must be globalized. + if (auto *CSI = CGF.CapturedStmtInfo) { + if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { + // Check if need to capture the variable that was already captured by + // value in the outer region. + if (!IsForCombinedParallelRegion) { + if (!FD->hasAttrs()) + return; + const auto *Attr = FD->getAttr<OMPCaptureKindAttr>(); + if (!Attr) + return; + if (!isOpenMPPrivate( + static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) || + Attr->getCaptureKind() == OMPC_map) + return; + } + if (!FD->getType()->isReferenceType()) { + assert(!VD->getType()->isVariablyModifiedType() && + "Parameter captured by value with variably modified type"); + EscapedParameters.insert(VD); + } else if (!IsForCombinedParallelRegion) { + return; + } + } + } + if ((!CGF.CapturedStmtInfo || + (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) && + VD->getType()->isReferenceType()) + // Do not globalize variables with reference type. + return; + if (VD->getType()->isVariablyModifiedType()) + EscapedVariableLengthDecls.insert(VD); + else + EscapedDecls.insert(VD); + } + + void VisitValueDecl(const ValueDecl *VD) { + if (VD->getType()->isLValueReferenceType()) + markAsEscaped(VD); + if (const auto *VarD = dyn_cast<VarDecl>(VD)) { + if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) { + const bool SavedAllEscaped = AllEscaped; + AllEscaped = VD->getType()->isLValueReferenceType(); + Visit(VarD->getInit()); + AllEscaped = SavedAllEscaped; + } + } + } + void VisitOpenMPCapturedStmt(const CapturedStmt *S, + ArrayRef<OMPClause *> Clauses, + bool IsCombinedParallelRegion) { + if (!S) + return; + for (const CapturedStmt::Capture &C : S->captures()) { + if (C.capturesVariable() && !C.capturesVariableByCopy()) { + const ValueDecl *VD = C.getCapturedVar(); + bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion; + if (IsCombinedParallelRegion) { + // Check if the variable is privatized in the combined construct and + // those private copies must be shared in the inner parallel + // directive. + IsForCombinedParallelRegion = false; + for (const OMPClause *C : Clauses) { + if (!isOpenMPPrivate(C->getClauseKind()) || + C->getClauseKind() == OMPC_reduction || + C->getClauseKind() == OMPC_linear || + C->getClauseKind() == OMPC_private) + continue; + ArrayRef<const Expr *> Vars; + if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C)) + Vars = PC->getVarRefs(); + else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C)) + Vars = PC->getVarRefs(); + else + llvm_unreachable("Unexpected clause."); + for (const auto *E : Vars) { + const Decl *D = + cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); + if (D == VD->getCanonicalDecl()) { + IsForCombinedParallelRegion = true; + break; + } + } + if (IsForCombinedParallelRegion) + break; + } + } + markAsEscaped(VD); + if (isa<OMPCapturedExprDecl>(VD)) + VisitValueDecl(VD); + IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion; + } + } + } + + typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy; + static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { + return P1.first > P2.first; + } + + void buildRecordForGlobalizedVars() { + assert(!GlobalizedRD && + "Record for globalized variables is built already."); + if (EscapedDecls.empty()) + return; + ASTContext &C = CGF.getContext(); + SmallVector<VarsDataTy, 4> GlobalizedVars; + for (const ValueDecl *D : EscapedDecls) + GlobalizedVars.emplace_back(C.getDeclAlign(D), D); + std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), + stable_sort_comparator); + // Build struct _globalized_locals_ty { + // /* globalized vars */ + // }; + GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); + GlobalizedRD->startDefinition(); + for (const auto &Pair : GlobalizedVars) { + const ValueDecl *VD = Pair.second; + QualType Type = VD->getType(); + if (Type->isLValueReferenceType()) + Type = C.getPointerType(Type.getNonReferenceType()); + else + Type = Type.getNonReferenceType(); + SourceLocation Loc = VD->getLocation(); + auto *Field = FieldDecl::Create( + C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, + C.getTrivialTypeSourceInfo(Type, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + GlobalizedRD->addDecl(Field); + if (VD->hasAttrs()) { + for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), + E(VD->getAttrs().end()); + I != E; ++I) + Field->addAttr(*I); + } + MappedDeclsFields.try_emplace(VD, Field); + } + GlobalizedRD->completeDefinition(); + } + +public: + CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {} + virtual ~CheckVarsEscapingDeclContext() = default; + void VisitDeclStmt(const DeclStmt *S) { + if (!S) + return; + for (const Decl *D : S->decls()) + if (const auto *VD = dyn_cast_or_null<ValueDecl>(D)) + VisitValueDecl(VD); + } + void VisitOMPExecutableDirective(const OMPExecutableDirective *D) { + if (!D) + return; + if (!D->hasAssociatedStmt()) + return; + if (const auto *S = + dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) { + // Do not analyze directives that do not actually require capturing, + // like `omp for` or `omp simd` directives. + llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind()); + if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) { + VisitStmt(S->getCapturedStmt()); + return; + } + VisitOpenMPCapturedStmt( + S, D->clauses(), + CaptureRegions.back() == OMPD_parallel && + isOpenMPDistributeDirective(D->getDirectiveKind())); + } + } + void VisitCapturedStmt(const CapturedStmt *S) { + if (!S) + return; + for (const CapturedStmt::Capture &C : S->captures()) { + if (C.capturesVariable() && !C.capturesVariableByCopy()) { + const ValueDecl *VD = C.getCapturedVar(); + markAsEscaped(VD); + if (isa<OMPCapturedExprDecl>(VD)) + VisitValueDecl(VD); + } + } + } + void VisitLambdaExpr(const LambdaExpr *E) { + if (!E) + return; + for (const LambdaCapture &C : E->captures()) { + if (C.capturesVariable()) { + if (C.getCaptureKind() == LCK_ByRef) { + const ValueDecl *VD = C.getCapturedVar(); + markAsEscaped(VD); + if (E->isInitCapture(&C) || isa<OMPCapturedExprDecl>(VD)) + VisitValueDecl(VD); + } + } + } + } + void VisitBlockExpr(const BlockExpr *E) { + if (!E) + return; + for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) { + if (C.isByRef()) { + const VarDecl *VD = C.getVariable(); + markAsEscaped(VD); + if (isa<OMPCapturedExprDecl>(VD) || VD->isInitCapture()) + VisitValueDecl(VD); + } + } + } + void VisitCallExpr(const CallExpr *E) { + if (!E) + return; + for (const Expr *Arg : E->arguments()) { + if (!Arg) + continue; + if (Arg->isLValue()) { + const bool SavedAllEscaped = AllEscaped; + AllEscaped = true; + Visit(Arg); + AllEscaped = SavedAllEscaped; + } else { + Visit(Arg); + } + } + Visit(E->getCallee()); + } + void VisitDeclRefExpr(const DeclRefExpr *E) { + if (!E) + return; + const ValueDecl *VD = E->getDecl(); + if (AllEscaped) + markAsEscaped(VD); + if (isa<OMPCapturedExprDecl>(VD)) + VisitValueDecl(VD); + else if (const auto *VarD = dyn_cast<VarDecl>(VD)) + if (VarD->isInitCapture()) + VisitValueDecl(VD); + } + void VisitUnaryOperator(const UnaryOperator *E) { + if (!E) + return; + if (E->getOpcode() == UO_AddrOf) { + const bool SavedAllEscaped = AllEscaped; + AllEscaped = true; + Visit(E->getSubExpr()); + AllEscaped = SavedAllEscaped; + } else { + Visit(E->getSubExpr()); + } + } + void VisitImplicitCastExpr(const ImplicitCastExpr *E) { + if (!E) + return; + if (E->getCastKind() == CK_ArrayToPointerDecay) { + const bool SavedAllEscaped = AllEscaped; + AllEscaped = true; + Visit(E->getSubExpr()); + AllEscaped = SavedAllEscaped; + } else { + Visit(E->getSubExpr()); + } + } + void VisitExpr(const Expr *E) { + if (!E) + return; + bool SavedAllEscaped = AllEscaped; + if (!E->isLValue()) + AllEscaped = false; + for (const Stmt *Child : E->children()) + if (Child) + Visit(Child); + AllEscaped = SavedAllEscaped; + } + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + + /// Returns the record that handles all the escaped local variables and used + /// instead of their original storage. + const RecordDecl *getGlobalizedRecord() { + if (!GlobalizedRD) + buildRecordForGlobalizedVars(); + return GlobalizedRD; + } + + /// Returns the field in the globalized record for the escaped variable. + const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const { + assert(GlobalizedRD && + "Record for globalized variables must be generated already."); + auto I = MappedDeclsFields.find(VD); + if (I == MappedDeclsFields.end()) + return nullptr; + return I->getSecond(); + } + + /// Returns the list of the escaped local variables/parameters. + ArrayRef<const ValueDecl *> getEscapedDecls() const { + return EscapedDecls.getArrayRef(); + } + + /// Checks if the escaped local variable is actually a parameter passed by + /// value. + const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const { + return EscapedParameters; + } + + /// Returns the list of the escaped variables with the variably modified + /// types. + ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const { + return EscapedVariableLengthDecls.getArrayRef(); + } +}; } // anonymous namespace /// Get the GPU warp size. @@ -223,12 +599,12 @@ static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) { /// CTA. The threads in the last warp are reserved for master execution. /// For the 'spmd' execution mode, all threads in a CTA are part of the team. static llvm::Value *getThreadLimit(CodeGenFunction &CGF, - bool IsInSpmdExecutionMode = false) { + bool IsInSPMDExecutionMode = false) { CGBuilderTy &Bld = CGF.Builder; - return IsInSpmdExecutionMode + return IsInSPMDExecutionMode ? getNVPTXNumThreads(CGF) - : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), - "thread_limit"); + : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), + "thread_limit"); } /// Get the thread id of the OMP master thread. @@ -243,96 +619,295 @@ static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { llvm::Value *NumThreads = getNVPTXNumThreads(CGF); // We assume that the warp size is a power of 2. - llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); + llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); - return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)), + return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)), Bld.CreateNot(Mask), "master_tid"); } CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( - CodeGenModule &CGM) - : WorkerFn(nullptr), CGFI(nullptr) { + CodeGenModule &CGM, SourceLocation Loc) + : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()), + Loc(Loc) { createWorkerFunction(CGM); } void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( CodeGenModule &CGM) { // Create an worker function with no arguments. - CGFI = &CGM.getTypes().arrangeNullaryFunction(); WorkerFn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage, - /* placeholder */ "_worker", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI); + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + /*placeholder=*/"_worker", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI); + WorkerFn->setDoesNotRecurse(); } -bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const { - return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; +CGOpenMPRuntimeNVPTX::ExecutionMode +CGOpenMPRuntimeNVPTX::getExecutionMode() const { + return CurrentExecutionMode; +} + +static CGOpenMPRuntimeNVPTX::DataSharingMode +getDataSharingMode(CodeGenModule &CGM) { + return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeNVPTX::CUDA + : CGOpenMPRuntimeNVPTX::Generic; +} + +/// Checks if the \p Body is the \a CompoundStmt and returns its child statement +/// iff there is only one. +static const Stmt *getSingleCompoundChild(const Stmt *Body) { + if (const auto *C = dyn_cast<CompoundStmt>(Body)) + if (C->size() == 1) + return C->body_front(); + return Body; +} + +/// Check if the parallel directive has an 'if' clause with non-constant or +/// false condition. Also, check if the number of threads is strictly specified +/// and run those directives in non-SPMD mode. +static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, + const OMPExecutableDirective &D) { + if (D.hasClausesOfKind<OMPNumThreadsClause>()) + return true; + for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { + OpenMPDirectiveKind NameModifier = C->getNameModifier(); + if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown) + continue; + const Expr *Cond = C->getCondition(); + bool Result; + if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result) + return true; + } + return false; } -static CGOpenMPRuntimeNVPTX::ExecutionMode -getExecutionModeForDirective(CodeGenModule &CGM, - const OMPExecutableDirective &D) { +/// Check for inner (nested) SPMD construct, if any +static bool hasNestedSPMDDirective(ASTContext &Ctx, + const OMPExecutableDirective &D) { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = CS->getCapturedStmt()->IgnoreContainers(); + const Stmt *ChildStmt = getSingleCompoundChild(Body); + + if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); + switch (D.getDirectiveKind()) { + case OMPD_target: + if (isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) + return true; + if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + if (isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NND)) + return true; + if (DKind == OMPD_distribute) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Body); + if (!ChildStmt) + return false; + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + return isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NND); + } + } + } + } + return false; + case OMPD_target_teams: + if (isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) + return true; + if (DKind == OMPD_distribute) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + return isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NND); + } + } + return false; + case OMPD_target_teams_distribute: + return isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NestedDir); + case OMPD_target_simd: + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_unknown: + llvm_unreachable("Unexpected directive."); + } + } + + return false; +} + +static bool supportsSPMDExecutionMode(ASTContext &Ctx, + const OMPExecutableDirective &D) { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); switch (DirectiveKind) { case OMPD_target: case OMPD_target_teams: - return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; + case OMPD_target_teams_distribute: + return hasNestedSPMDDirective(Ctx, D); case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: - return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; - default: - llvm_unreachable("Unsupported directive on NVPTX device."); + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + return !hasParallelIfNumThreadsClause(Ctx, D); + case OMPD_target_simd: + case OMPD_target_teams_distribute_simd: + return false; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_unknown: + break; } - llvm_unreachable("Unsupported directive on NVPTX device."); + llvm_unreachable( + "Unknown programming model for OpenMP directive on NVPTX target."); } -void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, +void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, - CGOpenMPRuntimeNVPTX::ExecutionMode::Generic); + ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false); EntryFunctionState EST; - WorkerFunctionState WST(CGM); + WorkerFunctionState WST(CGM, D.getLocStart()); Work.clear(); WrapperFunctionsMap.clear(); // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { - CGOpenMPRuntimeNVPTX &RT; CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; public: - NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, - CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) - : RT(RT), EST(EST), WST(WST) {} + : EST(EST), WST(WST) {} void Enter(CodeGenFunction &CGF) override { - RT.emitGenericEntryHeader(CGF, EST, WST); + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) + .emitNonSPMDEntryHeader(CGF, EST, WST); } void Exit(CodeGenFunction &CGF) override { - RT.emitGenericEntryFooter(CGF, EST); + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) + .emitNonSPMDEntryFooter(CGF, EST); } - } Action(*this, EST, WST); + } Action(EST, WST); CodeGen.setAction(Action); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - // Create the worker function - emitWorkerFunction(WST); - // Now change the name of the worker function to correspond to this target // region's entry function. - WST.WorkerFn->setName(OutlinedFn->getName() + "_worker"); + WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker")); + + // Create the worker function + emitWorkerFunction(WST); } // Setup NVPTX threads for master-worker OpenMP scheme. -void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, +void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, WorkerFunctionState &WST) { CGBuilderTy &Bld = CGF.Builder; @@ -342,20 +917,22 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); EST.ExitBB = CGF.createBasicBlock(".exit"); - auto *IsWorker = + llvm::Value *IsWorker = Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF)); Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); - emitCall(CGF, WST.WorkerFn); + emitCall(CGF, WST.Loc, WST.WorkerFn); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); - auto *IsMaster = + llvm::Value *IsMaster = Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB); CGF.EmitBlock(MasterBB); + IsInTargetMasterThreadRegion = true; + // SEQUENTIAL (MASTER) REGION START // First action in sequential region: // Initialize the state of the OpenMP runtime library on the GPU. // TODO: Optimize runtime initialization and pass in correct value. @@ -363,10 +940,23 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, Bld.getInt16(/*RequiresOMPRuntime=*/1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); + + // For data sharing, we need to initialize the stack. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + + emitGenericVarsProlog(CGF, WST.Loc); } -void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF, +void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST) { + IsInTargetMasterThreadRegion = false; + if (!CGF.HaveInsertPoint()) + return; + + emitGenericVarsEpilog(CGF); + if (!EST.ExitBB) EST.ExitBB = CGF.createBasicBlock(".exit"); @@ -388,14 +978,13 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF, EST.ExitBB = nullptr; } -void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, +void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, - CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd); + ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true); EntryFunctionState EST; // Emit target region as a standalone region. @@ -410,10 +999,10 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, const OMPExecutableDirective &D) : RT(RT), EST(EST), D(D) {} void Enter(CodeGenFunction &CGF) override { - RT.emitSpmdEntryHeader(CGF, EST, D); + RT.emitSPMDEntryHeader(CGF, EST, D); } void Exit(CodeGenFunction &CGF) override { - RT.emitSpmdEntryFooter(CGF, EST); + RT.emitSPMDEntryFooter(CGF, EST); } } Action(*this, EST, D); CodeGen.setAction(Action); @@ -421,10 +1010,10 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, IsOffloadEntry, CodeGen); } -void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( +void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D) { - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // Setup BBs in entry function. llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); @@ -433,18 +1022,30 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( // Initialize the OMP state in the runtime; called by all active threads. // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters // based on code analysis of the target region. - llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true), + llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), /*RequiresOMPRuntime=*/Bld.getInt16(1), /*RequiresDataSharing=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + + // For data sharing, we need to initialize the stack. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + CGF.EmitBranch(ExecuteBB); CGF.EmitBlock(ExecuteBB); + + IsInTargetMasterThreadRegion = true; } -void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, +void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST) { + IsInTargetMasterThreadRegion = false; + if (!CGF.HaveInsertPoint()) + return; + if (!EST.ExitBB) EST.ExitBB = CGF.createBasicBlock(".exit"); @@ -468,19 +1069,21 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, // 'generic', the runtime reserves one warp for the master, otherwise, all // warps participate in parallel work. static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, - CGOpenMPRuntimeNVPTX::ExecutionMode Mode) { - (void)new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode")); + bool Mode) { + auto *GVMode = + new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1), + Twine(Name, "_exec_mode")); + CGM.addCompilerUsedGlobal(GVMode); } void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { ASTContext &Ctx = CGM.getContext(); CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {}); + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {}, + WST.Loc, WST.Loc); emitWorkerLoop(CGF, WST); CGF.FinishFunction(); } @@ -519,19 +1122,16 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0)); CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); - // Set up shared arguments - Address SharedArgs = - CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args"); // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(), + llvm::Value *Args[] = {WorkFn.getPointer(), /*RequiresOMPRuntime=*/Bld.getInt16(1)}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. - llvm::Value *ShouldTerminate = - Bld.CreateIsNull(Bld.CreateLoad(WorkFn), "should_terminate"); + llvm::Value *WorkID = Bld.CreateLoad(WorkFn); + llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate"); Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB); // Activate requested workers. @@ -543,13 +1143,10 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Signal start of parallel region. CGF.EmitBlock(ExecuteBB); - // Current context - ASTContext &Ctx = CGF.getContext(); - // Process work items: outlined parallel functions. - for (auto *W : Work) { + for (llvm::Function *W : Work) { // Try to match this outlined function. - auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy); + llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy); llvm::Value *WorkFnMatch = Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match"); @@ -562,23 +1159,33 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.EmitBlock(ExecuteFNBB); // Insert call to work function via shared wrapper. The shared - // wrapper takes exactly three arguments: + // wrapper takes two arguments: // - the parallelism level; - // - the master thread ID; - // - the list of references to shared arguments. - // - // TODO: Assert that the function is a wrapper function.s - Address Capture = CGF.EmitLoadOfPointer(SharedArgs, - Ctx.getPointerType( - Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>()); - emitCall(CGF, W, {Bld.getInt16(/*ParallelLevel=*/0), - getMasterThreadID(CGF), Capture.getPointer()}); + // - the thread ID; + emitCall(CGF, WST.Loc, W, + {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)}); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); CGF.EmitBlock(CheckNextBB); } + // Default case: call to outlined function through pointer if the target + // region makes a declare target call that may contain an orphaned parallel + // directive. + auto *ParallelFnTy = + llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty}, + /*isVarArg=*/false) + ->getPointerTo(); + llvm::Value *WorkFnCast = Bld.CreateBitCast(WorkID, ParallelFnTy); + // Insert call to work function via shared wrapper. The shared + // wrapper takes two arguments: + // - the parallelism level; + // - the thread ID; + emitCall(CGF, WST.Loc, WorkFnCast, + {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)}); + // Go to end of parallel region. + CGF.EmitBranch(TerminateBB); // Signal end of parallel region. CGF.EmitBlock(TerminateBB); @@ -597,7 +1204,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.EmitBlock(ExitBB); } -/// \brief Returns specified OpenMP runtime function for the current OpenMP +/// Returns specified OpenMP runtime function for the current OpenMP /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. /// \return Specified function. @@ -609,7 +1216,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t // RequiresOMPRuntime); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); break; @@ -617,7 +1224,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { case OMPRTL_NVPTX__kmpc_kernel_deinit: { // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; @@ -626,44 +1233,40 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); break; } case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: { // Build void __kmpc_spmd_kernel_deinit(); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit"); break; } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t - /// IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty, - CGM.Int16Ty}; - llvm::FunctionType *FnTy = + /// void *outlined_function, int16_t IsOMPRuntimeInitialized); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, void - /// ***args, int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty}; + /// Build bool __kmpc_kernel_parallel(void **outlined_function, + /// int16_t IsOMPRuntimeInitialized); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { /// Build void __kmpc_kernel_end_parallel(); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); break; @@ -672,7 +1275,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); break; @@ -681,7 +1284,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; @@ -690,7 +1293,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build int32_t __kmpc_shuffle_int32(int32_t element, // int16_t lane_offset, int16_t warp_size); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); break; @@ -699,7 +1302,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build int64_t __kmpc_shuffle_int64(int64_t element, // int16_t lane_offset, int16_t warp_size); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); break; @@ -725,12 +1328,39 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { CGM.VoidPtrTy, ShuffleReduceFnTy->getPointerTo(), InterWarpCopyFnTy->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); break; } + case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: { + // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t Algorithm Version), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait"); + break; + } case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, // int32_t num_vars, size_t reduce_size, void *reduce_data, @@ -768,7 +1398,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { InterWarpCopyFnTy->getPointerTo(), CopyToScratchpadFnTy->getPointerTo(), LoadReduceFnTy->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait"); @@ -777,32 +1407,103 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); llvm::Type *TypeParams[] = {CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); break; } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { + /// Build void __kmpc_data_sharing_init_stack(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { + /// Build void __kmpc_data_sharing_init_stack_spmd(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { + // Build void *__kmpc_data_sharing_push_stack(size_t size, + // int16_t UseSharedMemory); + llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { + // Build void __kmpc_data_sharing_pop_stack(void *a); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, + /*Name=*/"__kmpc_data_sharing_pop_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { + /// Build void __kmpc_begin_sharing_variables(void ***args, + /// size_t n_args); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_end_sharing_variables: { + /// Build void __kmpc_end_sharing_variables(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_get_shared_variables: { + /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_parallel_level: { + // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); + break; + } + case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { + // Build int8_t __kmpc_is_spmd_exec_mode(); + auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); + break; + } } return RTLFn; } void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t) { - auto *F = dyn_cast<llvm::Function>(Addr); + uint64_t Size, int32_t, + llvm::GlobalValue::LinkageTypes) { // TODO: Add support for global variables on the device after declare target // support. - if (!F) + if (!isa<llvm::Function>(Addr)) return; - llvm::Module *M = F->getParent(); - llvm::LLVMContext &Ctx = M->getContext(); + llvm::Module &M = CGM.getModule(); + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); // Get "nvvm.annotations" metadata node - llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"), + llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; // Append metadata to nvvm.annotations @@ -818,27 +1519,19 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); - CGOpenMPRuntimeNVPTX::ExecutionMode Mode = - getExecutionModeForDirective(CGM, D); - switch (Mode) { - case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic: - emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); - break; - case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd: - emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); + if (Mode) + emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - break; - case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown: - llvm_unreachable( - "Unknown programming model for OpenMP directive on NVPTX target."); - } + else + emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + CodeGen); setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) { + : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); } @@ -846,9 +1539,8 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) { - // Do nothing in case of Spmd mode and L0 parallel. - // TODO: If in Spmd mode and L1 parallel emit the clause. - if (isInSpmdExecutionMode()) + // Do nothing in case of SPMD mode and L0 parallel. + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) return; CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); @@ -857,9 +1549,8 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { - // Do nothing in case of Spmd mode and L0 parallel. - // TODO: If in Spmd mode and L1 parallel emit the clause. - if (isInSpmdExecutionMode()) + // Do nothing in case of SPMD mode and L0 parallel. + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) return; CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); @@ -873,13 +1564,33 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + // Emit target region as a standalone region. + class NVPTXPrePostActionTy : public PrePostActionTy { + bool &IsInParallelRegion; + bool PrevIsInParallelRegion; - auto *OutlinedFun = cast<llvm::Function>( - CGOpenMPRuntime::emitParallelOutlinedFunction( + public: + NVPTXPrePostActionTy(bool &IsInParallelRegion) + : IsInParallelRegion(IsInParallelRegion) {} + void Enter(CodeGenFunction &CGF) override { + PrevIsInParallelRegion = IsInParallelRegion; + IsInParallelRegion = true; + } + void Exit(CodeGenFunction &CGF) override { + IsInParallelRegion = PrevIsInParallelRegion; + } + } Action(IsInParallelRegion); + CodeGen.setAction(Action); + bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; + IsInTargetMasterThreadRegion = false; + auto *OutlinedFun = + cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - if (!isInSpmdExecutionMode()) { + IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; + if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD && + !IsInParallelRegion) { llvm::Function *WrapperFun = - createDataSharingWrapper(OutlinedFun, D); + createParallelDataSharingWrapper(OutlinedFun, D); WrapperFunctionsMap[OutlinedFun] = WrapperFun; } @@ -889,7 +1600,24 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + SourceLocation Loc = D.getLocStart(); + // Emit target region as a standalone region. + class NVPTXPrePostActionTy : public PrePostActionTy { + SourceLocation &Loc; + + public: + NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {} + void Enter(CodeGenFunction &CGF) override { + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) + .emitGenericVarsProlog(CGF, Loc); + } + void Exit(CodeGenFunction &CGF) override { + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) + .emitGenericVarsEpilog(CGF); + } + } Action(Loc); + CodeGen.setAction(Action); llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); @@ -900,6 +1628,119 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( return OutlinedFun; } +void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, + SourceLocation Loc) { + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + return; + + CGBuilderTy &Bld = CGF.Builder; + + const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); + if (I == FunctionGlobalizedDecls.end()) + return; + if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) { + QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); + + // Recover pointer to this function's global record. The runtime will + // handle the specifics of the allocation of the memory. + // Use actual memory size of the record including the padding + // for alignment purposes. + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(RecTy).getQuantity(); + unsigned GlobalRecordSize = + CGM.getContext().getTypeSizeInChars(RecTy).getQuantity(); + GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); + // TODO: allow the usage of shared memory to be controlled by + // the user, for now, default to global. + llvm::Value *GlobalRecordSizeArg[] = { + llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), + CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), + GlobalRecordSizeArg); + llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); + LValue Base = + CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy); + I->getSecond().GlobalRecordAddr = GlobalRecValue; + + // Emit the "global alloca" which is a GEP from the global declaration + // record using the pointer returned by the runtime. + for (auto &Rec : I->getSecond().LocalVarData) { + bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); + llvm::Value *ParValue; + if (EscapedParam) { + const auto *VD = cast<VarDecl>(Rec.first); + LValue ParLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); + ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); + } + const FieldDecl *FD = Rec.second.first; + LValue VarAddr = CGF.EmitLValueForField(Base, FD); + Rec.second.second = VarAddr.getAddress(); + if (EscapedParam) { + const auto *VD = cast<VarDecl>(Rec.first); + CGF.EmitStoreOfScalar(ParValue, VarAddr); + I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); + } + } + } + for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { + // Recover pointer to this function's global record. The runtime will + // handle the specifics of the allocation of the memory. + // Use actual memory size of the record including the padding + // for alignment purposes. + CGBuilderTy &Bld = CGF.Builder; + llvm::Value *Size = CGF.getTypeSize(VD->getType()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + Size = Bld.CreateNUWAdd( + Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1)); + llvm::Value *AlignVal = + llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity()); + Size = Bld.CreateUDiv(Size, AlignVal); + Size = Bld.CreateNUWMul(Size, AlignVal); + // TODO: allow the usage of shared memory to be controlled by + // the user, for now, default to global. + llvm::Value *GlobalRecordSizeArg[] = { + Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), + GlobalRecordSizeArg); + llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); + LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(), + CGM.getContext().getDeclAlign(VD), + AlignmentSource::Decl); + I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD), + Base.getAddress()); + I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue); + } + I->getSecond().MappedParams->apply(CGF); +} + +void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) { + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + return; + + const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); + if (I != FunctionGlobalizedDecls.end()) { + I->getSecond().MappedParams->restore(CGF); + if (!CGF.HaveInsertPoint()) + return; + for (llvm::Value *Addr : + llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + Addr); + } + if (I->getSecond().GlobalRecordAddr) { + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); + } + } +} + void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, @@ -908,12 +1749,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -925,66 +1766,102 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall( if (!CGF.HaveInsertPoint()) return; - if (isInSpmdExecutionMode()) - emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) + emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); else - emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); } -void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( +void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { llvm::Function *Fn = cast<llvm::Function>(OutlinedFn); - llvm::Function *WFn = WrapperFunctionsMap[Fn]; - assert(WFn && "Wrapper function does not exist!"); // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &) { - CGBuilderTy &Bld = CGF.Builder; + Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( + /*DestWidth=*/32, /*Signed=*/1), + ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + // ThreadId for serialized parallels is 0. + Address ThreadIDAddr = ZeroAddr; + auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); + }; + auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + + RegionCodeGenTy RCG(CodeGen); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *Args[] = {RTLoc, ThreadID}; + NVPTXActionTy Action( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + Args, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + Args); + RCG.setAction(Action); + RCG(CGF); + }; + + auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF, + PrePostActionTy &Action) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *WFn = WrapperFunctionsMap[Fn]; + assert(WFn && "Wrapper function does not exist!"); llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); + // Prepare for parallel region. Indicate the outlined function. + llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + Args); + + // Create a private scope that will globalize the arguments + // passed from the outside of the target region. + CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); + + // There's somehting to share. if (!CapturedVars.empty()) { - // There's somehting to share, add the attribute - CGF.CurFn->addFnAttr("has-nvptx-shared-depot"); // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = - CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, - "shared_args"); + CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs"); llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); - // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = {ID, SharedArgsPtr, - Bld.getInt32(CapturedVars.size()), - /*RequiresOMPRuntime=*/Bld.getInt16(1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), - Args); + llvm::Value *DataSharingArgs[] = { + SharedArgsPtr, + llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_begin_sharing_variables), + DataSharingArgs); + // Store variable address in a list of references to pass to workers. unsigned Idx = 0; ASTContext &Ctx = CGF.getContext(); + Address SharedArgListAddress = CGF.EmitLoadOfPointer( + SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy)) + .castAs<PointerType>()); for (llvm::Value *V : CapturedVars) { - Address Dst = Bld.CreateConstInBoundsGEP( - CGF.EmitLoadOfPointer(SharedArgs, - Ctx.getPointerType( - Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>()), - Idx, CGF.getPointerSize()); - llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy); + Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, + CGF.getPointerSize()); + llvm::Value *PtrV; + if (V->getType()->isIntegerTy()) + PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); + else + PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy); CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false, - Ctx.getPointerType(Ctx.VoidPtrTy)); - Idx++; + Ctx.getPointerType(Ctx.VoidPtrTy)); + ++Idx; } - } else { - // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = { - ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), - /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), - Args); } // Activate workers. This barrier is used by the master to signal @@ -999,96 +1876,332 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( // The master waits at this barrier until all workers are done. syncCTAThreads(CGF); + if (!CapturedVars.empty()) + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); + // Remember for post-processing in worker loop. Work.emplace_back(WFn); }; - auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto *ThreadID = getThreadID(CGF, Loc); - llvm::Value *Args[] = {RTLoc, ThreadID}; - - auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF, - PrePostActionTy &Action) { - Action.Enter(CGF); - - llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); - }; - + auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen, + &ThreadIDAddr](CodeGenFunction &CGF, + PrePostActionTy &Action) { RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), - Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), - Args); - RCG.setAction(Action); - RCG(CGF); + if (IsInParallelRegion) { + SeqGen(CGF, Action); + } else if (IsInTargetMasterThreadRegion) { + L0ParallelGen(CGF, Action); + } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) { + RCG(CGF); + } else { + // Check for master and then parallelism: + // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) { + // Serialized execution. + // } else if (master) { + // Worker call. + // } else { + // Outlined function call. + // } + CGBuilderTy &Bld = CGF.Builder; + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); + llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); + llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); + llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); + llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ParallelCheckBB); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *PL = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + {RTLoc, ThreadID}); + llvm::Value *Res = Bld.CreateIsNotNull(PL); + Bld.CreateCondBr(Res, SeqBB, MasterCheckBB); + CGF.EmitBlock(SeqBB); + SeqGen(CGF, Action); + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(MasterCheckBB); + llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then"); + llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); + llvm::Value *IsMaster = + Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); + Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock); + CGF.EmitBlock(MasterThenBB); + L0ParallelGen(CGF, Action); + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ElseBlock); + // In the worker need to use the real thread id. + ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + RCG(CGF); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + // Emit the continuation block for code after the if. + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + } }; - if (IfCond) - emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen); - else { + if (IfCond) { + emitOMPIfClause(CGF, IfCond, LNParallelGen, SeqGen); + } else { CodeGenFunction::RunCleanupsScope Scope(CGF); - RegionCodeGenTy ThenRCG(L0ParallelGen); + RegionCodeGenTy ThenRCG(LNParallelGen); ThenRCG(CGF); } } -void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( +void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { // Just call the outlined function to execute the parallel region. // OutlinedFn(>id, &zero, CapturedStruct); // - // TODO: Do something with IfCond when support for the 'if' clause - // is added on Spmd target directives. llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); + + Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( + /*DestWidth=*/32, /*Signed=*/1), + ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + // ThreadId for serialized parallels is 0. + Address ThreadIDAddr = ZeroAddr; + auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr, + &ThreadIDAddr](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); + }; + auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + + RegionCodeGenTy RCG(CodeGen); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *Args[] = {RTLoc, ThreadID}; + + NVPTXActionTy Action( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + Args, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + Args); + RCG.setAction(Action); + RCG(CGF); + }; + + if (IsInTargetMasterThreadRegion) { + // In the worker need to use the real thread id. + ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + RegionCodeGenTy RCG(CodeGen); + RCG(CGF); + } else { + // If we are not in the target region, it is definitely L2 parallelism or + // more, because for SPMD mode we always has L1 parallel level, sowe don't + // need to check for orphaned directives. + RegionCodeGenTy RCG(SeqGen); + RCG(CGF); + } +} + +void CGOpenMPRuntimeNVPTX::emitCriticalRegion( + CodeGenFunction &CGF, StringRef CriticalName, + const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, + const Expr *Hint) { + llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop"); + llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test"); + llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync"); + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); + + // Fetch team-local id of the thread. + llvm::Value *ThreadID = getNVPTXThreadID(CGF); + + // Get the width of the team. + llvm::Value *TeamWidth = getNVPTXNumThreads(CGF); + + // Initialize the counter variable for the loop. + QualType Int32Ty = + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0); + Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter"); + LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty); + CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal, + /*isInit=*/true); + + // Block checks if loop counter exceeds upper bound. + CGF.EmitBlock(LoopBB); + llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc); + llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth); + CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB); + + // Block tests which single thread should execute region, and which threads + // should go straight to synchronisation point. + CGF.EmitBlock(TestBB); + CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc); + llvm::Value *CmpThreadToCounter = + CGF.Builder.CreateICmpEQ(ThreadID, CounterVal); + CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB); + + // Block emits the body of the critical region. + CGF.EmitBlock(BodyBB); + + // Output the critical statement. + CriticalOpGen(CGF); + + // After the body surrounded by the critical region, the single executing + // thread will jump to the synchronisation point. + // Block waits for all threads in current team to finish then increments the + // counter variable and returns to the loop. + CGF.EmitBlock(SyncBB); + getNVPTXCTABarrier(CGF); + + llvm::Value *IncCounterVal = + CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); + CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal); + CGF.EmitBranch(LoopBB); + + // Block that is reached when all threads in the team complete the region. + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); +} + +/// Cast value to the specified type. +static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val, + QualType ValTy, QualType CastTy, + SourceLocation Loc) { + assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() && + "Cast type must sized."); + assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() && + "Val type must sized."); + llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy); + if (ValTy == CastTy) + return Val; + if (CGF.getContext().getTypeSizeInChars(ValTy) == + CGF.getContext().getTypeSizeInChars(CastTy)) + return CGF.Builder.CreateBitCast(Val, LLVMCastTy); + if (CastTy->isIntegerType() && ValTy->isIntegerType()) + return CGF.Builder.CreateIntCast(Val, LLVMCastTy, + CastTy->hasSignedIntegerRepresentation()); + Address CastItem = CGF.CreateMemTemp(CastTy); + Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace())); + CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy); + return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc); } /// This function creates calls to one of two shuffle functions to copy /// variables between lanes in a warp. static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, - QualType ElemTy, llvm::Value *Elem, - llvm::Value *Offset) { - auto &CGM = CGF.CGM; - auto &C = CGM.getContext(); - auto &Bld = CGF.Builder; + QualType ElemType, + llvm::Value *Offset, + SourceLocation Loc) { + CodeGenModule &CGM = CGF.CGM; + CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeNVPTX &RT = *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime())); - unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity(); - assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction."); + CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); + assert(Size.getQuantity() <= 8 && + "Unsupported bitwidth in shuffle instruction."); - OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4 + OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 ? OMPRTL_NVPTX__kmpc_shuffle_int32 : OMPRTL_NVPTX__kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. - auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; - auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy); - auto *WarpSize = CGF.EmitScalarConversion( - getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true), - C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation()); + QualType CastTy = CGF.getContext().getIntTypeForBitwidth( + Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1); + llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc); + llvm::Value *WarpSize = + Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); - auto *ShuffledVal = - CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn), - {ElemCast, Offset, WarpSize}); + llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( + RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); - return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy)); + return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); +} + +static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, + Address DestAddr, QualType ElemType, + llvm::Value *Offset, SourceLocation Loc) { + CGBuilderTy &Bld = CGF.Builder; + + CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); + // Create the loop over the big sized data. + // ptr = (void*)Elem; + // ptrEnd = (void*) Elem + 1; + // Step = 8; + // while (ptr + Step < ptrEnd) + // shuffle((int64_t)*ptr); + // Step = 4; + // while (ptr + Step < ptrEnd) + // shuffle((int32_t)*ptr); + // ... + Address ElemPtr = DestAddr; + Address Ptr = SrcAddr; + Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast( + Bld.CreateConstGEP(SrcAddr, 1, Size), CGF.VoidPtrTy); + for (int IntSize = 8; IntSize >= 1; IntSize /= 2) { + if (Size < CharUnits::fromQuantity(IntSize)) + continue; + QualType IntType = CGF.getContext().getIntTypeForBitwidth( + CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)), + /*Signed=*/1); + llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType); + Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo()); + ElemPtr = + Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo()); + if (Size.getQuantity() / IntSize > 1) { + llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond"); + llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit"); + llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock(); + CGF.EmitBlock(PreCondBB); + llvm::PHINode *PhiSrc = + Bld.CreatePHI(Ptr.getType(), /*NumReservedValues=*/2); + PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB); + llvm::PHINode *PhiDest = + Bld.CreatePHI(ElemPtr.getType(), /*NumReservedValues=*/2); + PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB); + Ptr = Address(PhiSrc, Ptr.getAlignment()); + ElemPtr = Address(PhiDest, ElemPtr.getAlignment()); + llvm::Value *PtrDiff = Bld.CreatePtrDiff( + PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast( + Ptr.getPointer(), CGF.VoidPtrTy)); + Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)), + ThenBB, ExitBB); + CGF.EmitBlock(ThenBB); + llvm::Value *Res = createRuntimeShuffleFunction( + CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), + IntType, Offset, Loc); + CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); + Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); + ElemPtr = + Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); + PhiSrc->addIncoming(Ptr.getPointer(), ThenBB); + PhiDest->addIncoming(ElemPtr.getPointer(), ThenBB); + CGF.EmitBranch(PreCondBB); + CGF.EmitBlock(ExitBB); + } else { + llvm::Value *Res = createRuntimeShuffleFunction( + CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), + IntType, Offset, Loc); + CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); + Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); + ElemPtr = + Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); + } + Size = Size % IntSize; + } } namespace { @@ -1119,19 +2232,19 @@ static void emitReductionListCopy( ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) { - auto &CGM = CGF.CGM; - auto &C = CGM.getContext(); - auto &Bld = CGF.Builder; + CodeGenModule &CGM = CGF.CGM; + ASTContext &C = CGM.getContext(); + CGBuilderTy &Bld = CGF.Builder; - auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; - auto *ScratchpadIndex = CopyOptions.ScratchpadIndex; - auto *ScratchpadWidth = CopyOptions.ScratchpadWidth; + llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; + llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex; + llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth; // Iterates, element-by-element, through the source Reduce list and // make a copy. unsigned Idx = 0; unsigned Size = Privates.size(); - for (auto &Private : Privates) { + for (const Expr *Private : Privates) { Address SrcElementAddr = Address::invalid(); Address DestElementAddr = Address::invalid(); Address DestElementPtrAddr = Address::invalid(); @@ -1150,10 +2263,9 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. @@ -1169,62 +2281,49 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); - llvm::Value *DestElementPtr = - CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()); - Address DestElemAddr = - Address(DestElementPtr, C.getTypeAlignInChars(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast( - DestElemAddr, CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = CGF.EmitLoadOfPointer( + DestElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); break; } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. - unsigned ElementSizeInChars = - C.getTypeSizeInChars(Private->getType()).getQuantity(); - auto *CurrentOffset = - Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), - ScratchpadIndex); - auto *ScratchPadElemAbsolutePtrVal = - Bld.CreateAdd(DestBase.getPointer(), CurrentOffset); + llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); + llvm::Value *CurrentOffset = + Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); + llvm::Value *ScratchPadElemAbsolutePtrVal = + Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); - Address ScratchpadPtr = - Address(ScratchPadElemAbsolutePtrVal, - C.getTypeAlignInChars(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast( - ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); IncrScratchpadDest = true; break; } case ScratchpadToThread: { // Step 1.1: Get the address for the src element in the scratchpad. // address = base + index * ElementSizeInChars. - unsigned ElementSizeInChars = - C.getTypeSizeInChars(Private->getType()).getQuantity(); - auto *CurrentOffset = - Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), - ScratchpadIndex); - auto *ScratchPadElemAbsolutePtrVal = - Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset); + llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); + llvm::Value *CurrentOffset = + Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); + llvm::Value *ScratchPadElemAbsolutePtrVal = + Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, @@ -1246,21 +2345,30 @@ static void emitReductionListCopy( // element as this is required in all directions SrcElementAddr = Bld.CreateElementBitCast( SrcElementAddr, CGF.ConvertTypeForMem(Private->getType())); - llvm::Value *Elem = - CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false, - Private->getType(), SourceLocation()); + DestElementAddr = Bld.CreateElementBitCast(DestElementAddr, + SrcElementAddr.getElementType()); // Now that all active lanes have read the element in the // Reduce list, shuffle over the value from the remote lane. if (ShuffleInElement) { - Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem, - RemoteLaneOffset); + shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(), + RemoteLaneOffset, Private->getExprLoc()); + } else { + if (Private->getType()->isScalarType()) { + llvm::Value *Elem = + CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false, + Private->getType(), Private->getExprLoc()); + // Store the source element value to the dest element address. + CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, + Private->getType()); + } else { + CGF.EmitAggregateCopy( + CGF.MakeAddrLValue(DestElementAddr, Private->getType()), + CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), + Private->getType(), AggValueSlot::DoesNotOverlap); + } } - // Store the source element value to the dest element address. - CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, - Private->getType()); - // Step 3.1: Modify reference in dest Reduce list as needed. // Modifying the reference in Reduce list to point to the newly // created element. The element is live in the current function @@ -1279,22 +2387,20 @@ static void emitReductionListCopy( if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) { llvm::Value *ScratchpadBasePtr = IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer(); - unsigned ElementSizeInChars = - C.getTypeSizeInChars(Private->getType()).getQuantity(); - ScratchpadBasePtr = Bld.CreateAdd( + llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); + ScratchpadBasePtr = Bld.CreateNUWAdd( ScratchpadBasePtr, - Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get( - CGM.SizeTy, ElementSizeInChars))); + Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars)); // Take care of global memory alignment for performance - ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr, - llvm::ConstantInt::get(CGM.SizeTy, 1)); - ScratchpadBasePtr = Bld.CreateSDiv( + ScratchpadBasePtr = Bld.CreateNUWSub( + ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateUDiv( ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); - ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr, - llvm::ConstantInt::get(CGM.SizeTy, 1)); - ScratchpadBasePtr = Bld.CreateMul( + ScratchpadBasePtr = Bld.CreateNUWAdd( + ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateNUWMul( ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); @@ -1304,7 +2410,7 @@ static void emitReductionListCopy( SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); } - Idx++; + ++Idx; } } @@ -1319,27 +2425,31 @@ static void emitReductionListCopy( /// local = local @ remote /// else /// local = remote -static llvm::Value * -emitReduceScratchpadFunction(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, llvm::Value *ReduceFn) { - auto &C = CGM.getContext(); - auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); +static llvm::Value *emitReduceScratchpadFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { + ASTContext &C = CGM.getContext(); + QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); // Destination of the copy. - ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); // Base address of the scratchpad array, with each element storing a // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); // A source index into the scratchpad array. - ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other); + ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, + ImplicitParamDecl::Other); // Row width of an element in the scratchpad array, typically // the number of teams. - ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other); + ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, + ImplicitParamDecl::Other); // If should_reduce == 1, then it's load AND reduce, // If should_reduce == 0 (or otherwise), then it only loads (+ copy). // The latter case is used for initialization. - ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other); + ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + Int32Ty, ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); @@ -1348,47 +2458,44 @@ emitReduceScratchpadFunction(CodeGenModule &CGM, Args.push_back(&WidthArg); Args.push_back(&ShouldReduceArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_load_and_reduce", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - // We don't need debug information in this function as nothing here refers to - // user code. - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // Get local Reduce list pointer. Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address ReduceListAddr( Bld.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()), + C.VoidPtrTy, Loc), CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), CGF.getPointerAlign()); Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( - AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc); Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); - llvm::Value *IndexVal = - Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, - Int32Ty, SourceLocation()), - CGM.SizeTy, /*isSigned=*/true); + llvm::Value *IndexVal = Bld.CreateIntCast( + CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc), + CGM.SizeTy, /*isSigned=*/true); Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); - llvm::Value *WidthVal = - Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, - Int32Ty, SourceLocation()), - CGM.SizeTy, /*isSigned=*/true); + llvm::Value *WidthVal = Bld.CreateIntCast( + CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc), + CGM.SizeTy, /*isSigned=*/true); Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg); llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar( - AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation()); + AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc); // The absolute ptr address to the base addr of the next element to copy. llvm::Value *CumulativeElemBasePtr = @@ -1411,7 +2518,7 @@ emitReduceScratchpadFunction(CodeGenModule &CGM, llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1)); + llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal); Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); CGF.EmitBlock(ThenBB); @@ -1421,7 +2528,8 @@ emitReduceScratchpadFunction(CodeGenModule &CGM, ReduceListAddr.getPointer(), CGF.VoidPtrTy); llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( RemoteReduceList.getPointer(), CGF.VoidPtrTy); - CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr}); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr}); Bld.CreateBr(MergeBB); CGF.EmitBlock(ElseBB); @@ -1445,22 +2553,27 @@ emitReduceScratchpadFunction(CodeGenModule &CGM, /// static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy) { + QualType ReductionArrayTy, + SourceLocation Loc) { - auto &C = CGM.getContext(); - auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + ASTContext &C = CGM.getContext(); + QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); // Source of the copy. - ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); // Base address of the scratchpad array, with each element storing a // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); // A destination index into the scratchpad array, typically the team // identifier. - ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other); + ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, + ImplicitParamDecl::Other); // Row width of an element in the scratchpad array, typically // the number of teams. - ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other); + ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, + ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); @@ -1468,36 +2581,34 @@ static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, Args.push_back(&IndexArg); Args.push_back(&WidthArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_copy_to_scratchpad", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - // We don't need debug information in this function as nothing here refers to - // user code. - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address SrcDataAddr( Bld.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()), + C.VoidPtrTy, Loc), CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), CGF.getPointerAlign()); Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( - AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc); Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); - llvm::Value *IndexVal = - Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, - Int32Ty, SourceLocation()), - CGF.SizeTy, /*isSigned=*/true); + llvm::Value *IndexVal = Bld.CreateIntCast( + CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc), + CGF.SizeTy, /*isSigned=*/true); Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); llvm::Value *WidthVal = @@ -1534,35 +2645,36 @@ static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, /// sync static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy) { - auto &C = CGM.getContext(); - auto &M = CGM.getModule(); + QualType ReductionArrayTy, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); + llvm::Module &M = CGM.getModule(); // ReduceList: thread local Reduce list. // At the stage of the computation when this function is called, partially // aggregated values reside in the first lane of every active warp. - ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); // NumWarps: number of warps active in the parallel region. This could // be smaller than 32 (max warps in a CTA) for partial block reduction. - ImplicitParamDecl NumWarpsArg(C, + ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getIntTypeForBitwidth(32, /* Signed */ true), ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); Args.push_back(&NumWarpsArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_inter_warp_copy_func", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - // We don't need debug information in this function as nothing here refers to - // user code. - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // This array is used as a medium to transfer, one reduce element at a time, // the data from the first lane of every warp to lanes in the first warp @@ -1571,7 +2683,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // for reduced latency, as well as to have a distinct copy for concurrently // executing target regions. The array is declared with common linkage so // as to be shared across compilation units. - const char *TransferMediumName = + StringRef TransferMediumName = "__openmp_nvptx_data_transfer_temporary_storage"; llvm::GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); @@ -1584,14 +2696,15 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::Constant::getNullValue(Ty), TransferMediumName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, SharedAddressSpace); + CGM.addCompilerUsedGlobal(TransferMedium); } // Get the CUDA thread id of the current OpenMP thread on the GPU. - auto *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = getNVPTXThreadID(CGF); // nvptx_lane_id = nvptx_id % warpsize - auto *LaneID = getNVPTXLaneID(CGF); + llvm::Value *LaneID = getNVPTXLaneID(CGF); // nvptx_warp_id = nvptx_id / warpsize - auto *WarpID = getNVPTXWarpID(CGF); + llvm::Value *WarpID = getNVPTXWarpID(CGF); Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address LocalReduceList( @@ -1602,7 +2715,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, CGF.getPointerAlign()); unsigned Idx = 0; - for (auto &Private : Privates) { + for (const Expr *Private : Privates) { // // Warp master copies reduce element to transfer medium in __shared__ // memory. @@ -1612,8 +2725,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); // if (lane_id == 0) - auto IsWarpMaster = - Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master"); + llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); CGF.EmitBlock(ThenBB); @@ -1627,9 +2739,6 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); ElemPtr = Bld.CreateElementBitCast( ElemPtr, CGF.ConvertTypeForMem(Private->getType())); - // elem = *elemptr - llvm::Value *Elem = CGF.EmitLoadOfScalar( - ElemPtr, /*Volatile=*/false, Private->getType(), SourceLocation()); // Get pointer to location in transfer medium. // MediumPtr = &medium[warp_id] @@ -1641,8 +2750,19 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, MediumPtr = Bld.CreateElementBitCast( MediumPtr, CGF.ConvertTypeForMem(Private->getType())); + // elem = *elemptr //*MediumPtr = elem - Bld.CreateStore(Elem, MediumPtr); + if (Private->getType()->isScalarType()) { + llvm::Value *Elem = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, + Private->getType(), Loc); + // Store the source element value to the dest element address. + CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/false, + Private->getType()); + } else { + CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()), + CGF.MakeAddrLValue(MediumPtr, Private->getType()), + Private->getType(), AggValueSlot::DoesNotOverlap); + } Bld.CreateBr(MergeBB); @@ -1655,7 +2775,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation()); - auto *NumActiveThreads = Bld.CreateNSWMul( + llvm::Value *NumActiveThreads = Bld.CreateNSWMul( NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads"); // named_barrier_sync(ParallelBarrierID, num_active_threads) syncParallelThreads(CGF, NumActiveThreads); @@ -1668,7 +2788,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); // Up to 32 threads in warp 0 are active. - auto IsActiveThread = + llvm::Value *IsActiveThread = Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); @@ -1682,8 +2802,6 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // SrcMediumVal = *SrcMediumPtr; SrcMediumPtr = Bld.CreateElementBitCast( SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType())); - llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar( - SrcMediumPtr, /*Volatile=*/false, Private->getType(), SourceLocation()); // TargetElemPtr = (type[i]*)(SrcDataAddr[i]) Address TargetElemPtrPtr = @@ -1696,8 +2814,17 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, TargetElemPtr, CGF.ConvertTypeForMem(Private->getType())); // *TargetElemPtr = SrcMediumVal; - CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false, - Private->getType()); + if (Private->getType()->isScalarType()) { + llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar( + SrcMediumPtr, /*Volatile=*/false, Private->getType(), Loc); + CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false, + Private->getType()); + } else { + CGF.EmitAggregateCopy( + CGF.MakeAddrLValue(SrcMediumPtr, Private->getType()), + CGF.MakeAddrLValue(TargetElemPtr, Private->getType()), + Private->getType(), AggValueSlot::DoesNotOverlap); + } Bld.CreateBr(W0MergeBB); CGF.EmitBlock(W0ElseBB); @@ -1708,7 +2835,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // While warp 0 copies values from transfer medium, all other warps must // wait. syncParallelThreads(CGF, NumActiveThreads); - Idx++; + ++Idx; } CGF.FinishFunction(); @@ -1781,39 +2908,40 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, /// (2k+1)th thread is ignored in the value aggregation. Therefore /// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so /// that the contiguity assumption still holds. -static llvm::Value * -emitShuffleAndReduceFunction(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, llvm::Value *ReduceFn) { - auto &C = CGM.getContext(); +static llvm::Value *emitShuffleAndReduceFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { + ASTContext &C = CGM.getContext(); // Thread local Reduce list used to host the values of data to be reduced. - ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); // Current lane id; could be logical. - ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other); + ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy, + ImplicitParamDecl::Other); // Offset of the remote source lane relative to the current lane. - ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy, - ImplicitParamDecl::Other); + ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.ShortTy, ImplicitParamDecl::Other); // Algorithm version. This is expected to be known at compile time. - ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other); + ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.ShortTy, ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); Args.push_back(&LaneIDArg); Args.push_back(&RemoteLaneOffsetArg); Args.push_back(&AlgoVerArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - // We don't need debug information in this function as nothing here refers to - // user code. - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address LocalReduceList( @@ -1870,21 +2998,19 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM, // When AlgoVer==2, the third conjunction has only the second part to be // evaluated during runtime. Other conjunctions evaluates to false // during compile time. - auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0)); + llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal); - auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); - auto CondAlgo1 = Bld.CreateAnd( + llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); + llvm::Value *CondAlgo1 = Bld.CreateAnd( Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal)); - auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); - auto CondAlgo2 = Bld.CreateAnd( - Algo2, - Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)), - Bld.getInt16(0))); + llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); + llvm::Value *CondAlgo2 = Bld.CreateAnd( + Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)))); CondAlgo2 = Bld.CreateAnd( CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0))); - auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); + llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); CondReduce = Bld.CreateOr(CondReduce, CondAlgo2); llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); @@ -1898,7 +3024,8 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM, LocalReduceList.getPointer(), CGF.VoidPtrTy); llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( RemoteReduceList.getPointer(), CGF.VoidPtrTy); - CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr}); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr}); Bld.CreateBr(MergeBB); CGF.EmitBlock(ElseBB); @@ -1909,7 +3036,7 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM, // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local // Reduce list. Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); - auto CondCopy = Bld.CreateAnd( + llvm::Value *CondCopy = Bld.CreateAnd( Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal)); llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then"); @@ -2182,16 +3309,22 @@ void CGOpenMPRuntimeNVPTX::emitReduction( bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - // FIXME: Add support for simd reduction. - assert((TeamsReduction || ParallelReduction) && + bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind); + assert((TeamsReduction || ParallelReduction || SimdReduction) && "Invalid reduction selection in emitReduction."); - auto &C = CGM.getContext(); + if (Options.SimpleReduction) { + CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, + ReductionOps, Options); + return; + } + + ASTContext &C = CGM.getContext(); // 1. Build a list of reduction variables. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; auto Size = RHSExprs.size(); - for (auto *E : Privates) { + for (const Expr *E : Privates) { if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; @@ -2219,7 +3352,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction( llvm::Value *Size = CGF.Builder.CreateIntCast( CGF.getVLASize( CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .first, + .NumElts, CGF.SizeTy, /*isSigned=*/false); CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), Elem); @@ -2227,41 +3360,44 @@ void CGOpenMPRuntimeNVPTX::emitReduction( } // 2. Emit reduce_func(). - auto *ReductionFn = emitReductionFunction( - CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, - LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ReductionFn = emitReductionFunction( + CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), + Privates, LHSExprs, RHSExprs, ReductionOps); // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), // RedList, shuffle_reduce_func, interwarp_copy_func); - auto *ThreadId = getThreadID(CGF, Loc); - auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( ReductionList.getPointer(), CGF.VoidPtrTy); - auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction( - CGM, Privates, ReductionArrayTy, ReductionFn); - auto *InterWarpCopyFn = - emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy); + llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( + CGM, Privates, ReductionArrayTy, ReductionFn, Loc); + llvm::Value *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); - llvm::Value *Res = nullptr; - if (ParallelReduction) { - llvm::Value *Args[] = {ThreadId, - CGF.Builder.getInt32(RHSExprs.size()), - ReductionArrayTySize, - RL, - ShuffleAndReduceFn, - InterWarpCopyFn}; + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn}; + llvm::Value *Res = nullptr; + if (ParallelReduction) Res = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), Args); - } + else if (SimdReduction) + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait), + Args); if (TeamsReduction) { - auto *ScratchPadCopyFn = - emitCopyToScratchpad(CGM, Privates, ReductionArrayTy); - auto *LoadAndReduceFn = emitReduceScratchpadFunction( - CGM, Privates, ReductionArrayTy, ReductionFn); + llvm::Value *ScratchPadCopyFn = + emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc); + llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction( + CGM, Privates, ReductionArrayTy, ReductionFn, Loc); llvm::Value *Args[] = {ThreadId, CGF.Builder.getInt32(RHSExprs.size()), @@ -2277,25 +3413,26 @@ void CGOpenMPRuntimeNVPTX::emitReduction( } // 5. Build switch(res) - auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); - auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); + llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + llvm::SwitchInst *SwInst = + CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); // 6. Build case 1: where we have reduced values in the master // thread in each team. // __kmpc_end_reduce{_nowait}(<gtid>); // break; - auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); // Add emission of __kmpc_end_reduce{_nowait}(<gtid>); llvm::Value *EndArgs[] = {ThreadId}; - auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps, + auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps, this](CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); ++IPriv; @@ -2334,11 +3471,10 @@ CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, enum { NVPTX_local_addr = 5 }; QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr)); ArgType = QC.apply(CGM.getContext(), ArgType); - if (isa<ImplicitParamDecl>(NativeParam)) { + if (isa<ImplicitParamDecl>(NativeParam)) return ImplicitParamDecl::Create( CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); - } return ParmVarDecl::Create( CGM.getContext(), const_cast<DeclContext *>(NativeParam->getDeclContext()), @@ -2397,8 +3533,8 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( continue; } llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo( - /*AddrSpace=*/0)); + NativeArg, + NativeArg->getType()->getPointerElementType()->getPointerTo()); TargetArgs.emplace_back( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); } @@ -2409,10 +3545,10 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( /// and controls the arguments which are passed to this function. /// The wrapper ensures that the outlined function is called /// with the correct arguments when data is shared. -llvm::Function *CGOpenMPRuntimeNVPTX::createDataSharingWrapper( +llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) { ASTContext &Ctx = CGM.getContext(); - const auto &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const auto &CS = *D.getCapturedStmt(OMPD_parallel); // Create a function that takes as argument the source thread. FunctionArgList WrapperArgs; @@ -2420,76 +3556,200 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createDataSharingWrapper( Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false); QualType Int32QTy = Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false); - QualType Int32PtrQTy = Ctx.getPointerType(Int32QTy); - QualType VoidPtrPtrQTy = Ctx.getPointerType(Ctx.VoidPtrTy); - ImplicitParamDecl ParallelLevelArg(Ctx, Int16QTy, ImplicitParamDecl::Other); - ImplicitParamDecl WrapperArg(Ctx, Int32QTy, ImplicitParamDecl::Other); - ImplicitParamDecl SharedArgsList(Ctx, VoidPtrPtrQTy, - ImplicitParamDecl::Other); + ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getLocStart(), + /*Id=*/nullptr, Int16QTy, + ImplicitParamDecl::Other); + ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getLocStart(), + /*Id=*/nullptr, Int32QTy, + ImplicitParamDecl::Other); WrapperArgs.emplace_back(&ParallelLevelArg); WrapperArgs.emplace_back(&WrapperArg); - WrapperArgs.emplace_back(&SharedArgsList); - auto &CGFI = + const CGFunctionInfo &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setLinkage(llvm::GlobalValue::InternalLinkage); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); - CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs); + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs, + D.getLocStart(), D.getLocStart()); const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // Get the array of arguments. SmallVector<llvm::Value *, 8> Args; - // TODO: suppport SIMD and pass actual values - Args.emplace_back(llvm::ConstantPointerNull::get( - CGM.Int32Ty->getPointerTo())); - Args.emplace_back(llvm::ConstantPointerNull::get( - CGM.Int32Ty->getPointerTo())); + Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer()); + Args.emplace_back(ZeroAddr.getPointer()); CGBuilderTy &Bld = CGF.Builder; auto CI = CS.capture_begin(); - // Load the start of the array - auto SharedArgs = - CGF.EmitLoadOfPointer(CGF.GetAddrOfLocalVar(&SharedArgsList), - VoidPtrPtrQTy->castAs<PointerType>()); - - // For each captured variable - for (unsigned I = 0; I < CS.capture_size(); ++I, ++CI, ++CurField) { - // Name of captured variable - StringRef Name; - if (CI->capturesThis()) - Name = "this"; - else - Name = CI->getCapturedVar()->getName(); - - // We retrieve the CLANG type of the argument. We use it to create - // an alloca which will give us the LLVM type. - QualType ElemTy = CurField->getType(); - // If this is a capture by copy the element type has to be the pointer to - // the data. - if (CI->capturesVariableByCopy()) - ElemTy = Ctx.getPointerType(ElemTy); - - // Get shared address of the captured variable. - Address ArgAddress = Bld.CreateConstInBoundsGEP( - SharedArgs, I, CGF.getPointerSize()); - Address TypedArgAddress = Bld.CreateBitCast( - ArgAddress, CGF.ConvertTypeForMem(Ctx.getPointerType(ElemTy))); - llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedArgAddress, - /*Volatile=*/false, Int32PtrQTy, SourceLocation()); - Args.emplace_back(Arg); - } - - emitCall(CGF, OutlinedParallelFn, Args); + // Use global memory for data sharing. + // Handle passing of global args to workers. + Address GlobalArgs = + CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); + llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); + llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), + DataSharingArgs); + + // Retrieve the shared variables from the list of references returned + // by the runtime. Pass the variables to the outlined function. + Address SharedArgListAddress = Address::invalid(); + if (CS.capture_size() > 0 || + isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { + SharedArgListAddress = CGF.EmitLoadOfPointer( + GlobalArgs, CGF.getContext() + .getPointerType(CGF.getContext().getPointerType( + CGF.getContext().VoidPtrTy)) + .castAs<PointerType>()); + } + unsigned Idx = 0; + if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { + Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, + CGF.getPointerSize()); + Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( + Src, CGF.SizeTy->getPointerTo()); + llvm::Value *LB = CGF.EmitLoadOfScalar( + TypedAddress, + /*Volatile=*/false, + CGF.getContext().getPointerType(CGF.getContext().getSizeType()), + cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc()); + Args.emplace_back(LB); + ++Idx; + Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, + CGF.getPointerSize()); + TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( + Src, CGF.SizeTy->getPointerTo()); + llvm::Value *UB = CGF.EmitLoadOfScalar( + TypedAddress, + /*Volatile=*/false, + CGF.getContext().getPointerType(CGF.getContext().getSizeType()), + cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc()); + Args.emplace_back(UB); + ++Idx; + } + if (CS.capture_size() > 0) { + ASTContext &CGFContext = CGF.getContext(); + for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { + QualType ElemTy = CurField->getType(); + Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx, + CGF.getPointerSize()); + Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( + Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy))); + llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, + /*Volatile=*/false, + CGFContext.getPointerType(ElemTy), + CI->getLocation()); + if (CI->capturesVariableByCopy() && + !CI->getCapturedVar()->getType()->isAnyPointerType()) { + Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(), + CI->getLocation()); + } + Args.emplace_back(Arg); + } + } + + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedParallelFn, Args); CGF.FinishFunction(); return Fn; } + +void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, + const Decl *D) { + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + return; + + assert(D && "Expected function or captured|block decl."); + assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && + "Function is registered already."); + const Stmt *Body = nullptr; + bool NeedToDelayGlobalization = false; + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + Body = FD->getBody(); + } else if (const auto *BD = dyn_cast<BlockDecl>(D)) { + Body = BD->getBody(); + } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) { + Body = CD->getBody(); + NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP; + } + if (!Body) + return; + CheckVarsEscapingDeclContext VarChecker(CGF); + VarChecker.Visit(Body); + const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord(); + ArrayRef<const ValueDecl *> EscapedVariableLengthDecls = + VarChecker.getEscapedVariableLengthDecls(); + if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty()) + return; + auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; + I->getSecond().MappedParams = + llvm::make_unique<CodeGenFunction::OMPMapVars>(); + I->getSecond().GlobalRecord = GlobalizedVarsRecord; + I->getSecond().EscapedParameters.insert( + VarChecker.getEscapedParameters().begin(), + VarChecker.getEscapedParameters().end()); + I->getSecond().EscapedVariableLengthDecls.append( + EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end()); + DeclToAddrMapTy &Data = I->getSecond().LocalVarData; + for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { + assert(VD->isCanonicalDecl() && "Expected canonical declaration"); + const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); + Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid()))); + } + if (!NeedToDelayGlobalization) { + emitGenericVarsProlog(CGF, D->getLocStart()); + struct GlobalizationScope final : EHScopeStack::Cleanup { + GlobalizationScope() = default; + + void Emit(CodeGenFunction &CGF, Flags flags) override { + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) + .emitGenericVarsEpilog(CGF); + } + }; + CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup); + } +} + +Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD) { + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + return Address::invalid(); + + VD = VD->getCanonicalDecl(); + auto I = FunctionGlobalizedDecls.find(CGF.CurFn); + if (I == FunctionGlobalizedDecls.end()) + return Address::invalid(); + auto VDI = I->getSecond().LocalVarData.find(VD); + if (VDI != I->getSecond().LocalVarData.end()) + return VDI->second.second; + if (VD->hasAttrs()) { + for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()), + E(VD->attr_end()); + IT != E; ++IT) { + auto VDI = I->getSecond().LocalVarData.find( + cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl()) + ->getCanonicalDecl()); + if (VDI != I->getSecond().LocalVarData.end()) + return VDI->second.second; + } + } + return Address::invalid(); +} + +void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) { + FunctionGlobalizedDecls.erase(CGF.CurFn); + CGOpenMPRuntime::functionFinished(CGF); +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 5d13408318a5..f83e99f8a3b7 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -24,8 +24,18 @@ namespace clang { namespace CodeGen { class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { +public: + /// Defines the execution mode. + enum ExecutionMode { + /// SPMD execution mode (all threads are worker threads). + EM_SPMD, + /// Non-SPMD execution mode (1 master thread, others are workers). + EM_NonSPMD, + /// Unknown execution mode (orphaned directive). + EM_Unknown, + }; private: - // Parallel outlined function work for workers to execute. + /// Parallel outlined function work for workers to execute. llvm::SmallVector<llvm::Function *, 16> Work; struct EntryFunctionState { @@ -35,48 +45,56 @@ private: class WorkerFunctionState { public: llvm::Function *WorkerFn; - const CGFunctionInfo *CGFI; + const CGFunctionInfo &CGFI; + SourceLocation Loc; - WorkerFunctionState(CodeGenModule &CGM); + WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc); private: void createWorkerFunction(CodeGenModule &CGM); }; - bool isInSpmdExecutionMode() const; + ExecutionMode getExecutionMode() const; - /// \brief Emit the worker function for the current target region. + /// Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); - /// \brief Helper for worker function. Emit body of worker loop. + /// Helper for worker function. Emit body of worker loop. void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); - /// \brief Helper for generic target entry function. Guide the master and + /// Helper for non-SPMD target entry function. Guide the master and /// worker threads to their respective locations. - void emitGenericEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, WorkerFunctionState &WST); - /// \brief Signal termination of OMP execution for generic target entry + /// Signal termination of OMP execution for non-SPMD target entry /// function. - void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + + /// Helper for generic variables globalization prolog. + void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc); + + /// Helper for generic variables globalization epilog. + void emitGenericVarsEpilog(CodeGenFunction &CGF); - /// \brief Helper for Spmd mode target directive's entry function. - void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + /// Helper for SPMD mode target directive's entry function. + void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D); - /// \brief Signal termination of Spmd mode execution. - void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + /// Signal termination of SPMD mode execution. + void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); // // Base class overrides. // - /// \brief Creates offloading entry for the provided entry ID \a ID, + /// Creates offloading entry for the provided entry ID \a ID, /// address \a Addr, size \a Size, and flags \a Flags. void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags = 0) override; + uint64_t Size, int32_t Flags, + llvm::GlobalValue::LinkageTypes Linkage) override; - /// \brief Emit outlined function specialized for the Fork-Join + /// Emit outlined function specialized for the Fork-Join /// programming model for applicable target directives on the NVPTX device. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. @@ -85,12 +103,12 @@ private: /// \param IsOffloadEntry True if the outlined function is an offload entry. /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. - void emitGenericKernel(const OMPExecutableDirective &D, StringRef ParentName, + void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// \brief Emit outlined function specialized for the Single Program + /// Emit outlined function specialized for the Single Program /// Multiple Data programming model for applicable target directives on the /// NVPTX device. /// \param D Directive to emit. @@ -101,12 +119,12 @@ private: /// \param CodeGen Object containing the target statements. /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. - void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName, + void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// \brief Emit outlined function for 'target' directive on the NVPTX + /// Emit outlined function for 'target' directive on the NVPTX /// device. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. @@ -122,22 +140,22 @@ private: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override; - /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. - /// This call is for the Generic Execution Mode. + /// This call is for the Non-SPMD Execution Mode. /// \param OutlinedFn Outlined function to be run in parallel threads. Type of /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). /// \param CapturedVars A pointer to the record with the references to /// variables used in \a OutlinedFn function. /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. - void emitGenericParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); - /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. /// This call is for a parallel directive within an SPMD target directive. @@ -148,13 +166,13 @@ private: /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. /// - void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); protected: - /// \brief Get the function name of an outlined region. + /// Get the function name of an outlined region. // The name can be customized depending on the target. // StringRef getOutlinedHelperName() const override { @@ -164,13 +182,13 @@ protected: public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); - /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 + /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override; - /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. /// \param NumThreads An integer value of threads. @@ -178,7 +196,7 @@ public: llvm::Value *NumThreads, SourceLocation Loc) override; - /// \brief This function ought to emit, in the general case, a call to + /// This function ought to emit, in the general case, a call to // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed // as these numbers are obtained through the PTX grid and block configuration. /// \param NumTeams An integer expression of teams. @@ -186,7 +204,7 @@ public: void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override; - /// \brief Emits inlined function for the specified OpenMP parallel + /// Emits inlined function for the specified OpenMP parallel // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -201,7 +219,7 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override; - /// \brief Emits inlined function for the specified OpenMP teams + /// Emits inlined function for the specified OpenMP teams // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -216,7 +234,7 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override; - /// \brief Emits code for teams call of the \a OutlinedFn with + /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. /// \param OutlinedFn Outlined function to be run by team masters. Type of @@ -228,7 +246,7 @@ public: SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) override; - /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. /// \param OutlinedFn Outlined function to be run in parallel threads. Type of @@ -242,6 +260,16 @@ public: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; + /// Emits a critical region. + /// \param CriticalName Name of the critical region. + /// \param CriticalOpGen Generator for the statement associated with the given + /// critical region. + /// \param Hint Value of the 'hint' clause (optional). + void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, + const RegionCodeGenTy &CriticalOpGen, + SourceLocation Loc, + const Expr *Hint = nullptr) override; + /// Emit a code for reduction clause. /// /// \param Privates List of private copies for original reduction arguments. @@ -270,7 +298,7 @@ public: /// Translates the native parameter of outlined function if this is required /// for target. - /// \param FD Field decl from captured record for the paramater. + /// \param FD Field decl from captured record for the parameter. /// \param NativeParam Parameter itself. const VarDecl *translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override; @@ -288,23 +316,41 @@ public: CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> Args = llvm::None) const override; - /// Target codegen is specialized based on two programming models: the - /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' - /// model for constructs like 'target parallel' that support it. - enum ExecutionMode { - /// Single Program Multiple Data. - Spmd, - /// Generic codegen to support fork-join model. + /// Emits OpenMP-specific function prolog. + /// Required for device constructs. + void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override; + + /// Gets the OpenMP-specific address of the local variable. + Address getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD) override; + + /// Target codegen is specialized based on two data-sharing modes: CUDA, in + /// which the local variables are actually global threadlocal, and Generic, in + /// which the local variables are placed in global memory if they may escape + /// their declaration context. + enum DataSharingMode { + /// CUDA data sharing mode. + CUDA, + /// Generic data-sharing mode. Generic, - Unknown, }; + /// Cleans up references to the objects in finished function. + /// + void functionFinished(CodeGenFunction &CGF) override; + private: - // Track the execution mode when codegening directives within a target - // region. The appropriate mode (generic/spmd) is set on entry to the - // target region and used by containing directives such as 'parallel' - // to emit optimized code. - ExecutionMode CurrentExecutionMode; + /// Track the execution mode when codegening directives within a target + /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the + /// target region and used by containing directives such as 'parallel' + /// to emit optimized code. + ExecutionMode CurrentExecutionMode = EM_Unknown; + + /// true if we're emitting the code for the target region and next parallel + /// region is L0 for sure. + bool IsInTargetMasterThreadRegion = false; + /// true if we're definitely in the parallel region. + bool IsInParallelRegion = false; /// Map between an outlined function and its wrapper. llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap; @@ -313,9 +359,26 @@ private: /// and controls the parameters which are passed to this function. /// The wrapper ensures that the outlined function is called /// with the correct arguments when data is shared. - llvm::Function * - createDataSharingWrapper(llvm::Function *OutlinedParallelFn, - const OMPExecutableDirective &D); + llvm::Function *createParallelDataSharingWrapper( + llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D); + + /// The map of local variables to their addresses in the global memory. + using DeclToAddrMapTy = llvm::MapVector<const Decl *, + std::pair<const FieldDecl *, Address>>; + /// Set of the parameters passed by value escaping OpenMP context. + using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; + struct FunctionData { + DeclToAddrMapTy LocalVarData; + EscapedParamsTy EscapedParameters; + llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; + llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; + const RecordDecl *GlobalRecord = nullptr; + llvm::Value *GlobalRecordAddr = nullptr; + std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; + }; + /// Maps the function to the list of the globalized variables with their + /// addresses. + llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; }; } // CodeGen namespace. diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h index 7b9c27d1d772..41084294ab9a 100644 --- a/lib/CodeGen/CGRecordLayout.h +++ b/lib/CodeGen/CGRecordLayout.h @@ -23,7 +23,7 @@ namespace llvm { namespace clang { namespace CodeGen { -/// \brief Structure with information about how a bitfield should be accessed. +/// Structure with information about how a bitfield should be accessed. /// /// Often we layout a sequence of bitfields as a contiguous sequence of bits. /// When the AST record layout does this, we represent it in the LLVM IR's type @@ -92,7 +92,7 @@ struct CGBitFieldInfo { void print(raw_ostream &OS) const; void dump() const; - /// \brief Given a bit-field decl, build an appropriate helper object for + /// Given a bit-field decl, build an appropriate helper object for /// accessing that field (which is expected to have the given offset and /// size). static CGBitFieldInfo MakeInfo(class CodeGenTypes &Types, @@ -156,31 +156,31 @@ public: IsZeroInitializable(IsZeroInitializable), IsZeroInitializableAsBase(IsZeroInitializableAsBase) {} - /// \brief Return the "complete object" LLVM type associated with + /// Return the "complete object" LLVM type associated with /// this record. llvm::StructType *getLLVMType() const { return CompleteObjectType; } - /// \brief Return the "base subobject" LLVM type associated with + /// Return the "base subobject" LLVM type associated with /// this record. llvm::StructType *getBaseSubobjectLLVMType() const { return BaseSubobjectType; } - /// \brief Check whether this struct can be C++ zero-initialized + /// Check whether this struct can be C++ zero-initialized /// with a zeroinitializer. bool isZeroInitializable() const { return IsZeroInitializable; } - /// \brief Check whether this struct can be C++ zero-initialized + /// Check whether this struct can be C++ zero-initialized /// with a zeroinitializer when considered as a base subobject. bool isZeroInitializableAsBase() const { return IsZeroInitializableAsBase; } - /// \brief Return llvm::StructType element number that corresponds to the + /// Return llvm::StructType element number that corresponds to the /// field FD. unsigned getLLVMFieldNo(const FieldDecl *FD) const { FD = FD->getCanonicalDecl(); @@ -193,14 +193,14 @@ public: return NonVirtualBases.lookup(RD); } - /// \brief Return the LLVM field index corresponding to the given + /// Return the LLVM field index corresponding to the given /// virtual base. Only valid when operating on the complete object. unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const { assert(CompleteObjectVirtualBases.count(base) && "Invalid virtual base!"); return CompleteObjectVirtualBases.lookup(base); } - /// \brief Return the BitFieldInfo that corresponds to the field FD. + /// Return the BitFieldInfo that corresponds to the field FD. const CGBitFieldInfo &getBitFieldInfo(const FieldDecl *FD) const { FD = FD->getCanonicalDecl(); assert(FD->isBitField() && "Invalid call for non-bit-field decl!"); diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index 1644ab4c0725..4ee6c8e71457 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -62,7 +62,7 @@ namespace { /// because LLVM reads from the complete type it can generate incorrect code /// if we do not clip the tail padding off of the bitfield in the complete /// layout. This introduces a somewhat awkward extra unnecessary clip stage. -/// The location of the clip is stored internally as a sentinal of type +/// The location of the clip is stored internally as a sentinel of type /// SCISSOR. If LLVM were updated to read base types (which it probably /// should because locations of things such as VBases are bogus in the llvm /// type anyway) then we could eliminate the SCISSOR. @@ -74,7 +74,7 @@ namespace { struct CGRecordLowering { // MemberInfo is a helper structure that contains information about a record // member. In additional to the standard member types, there exists a - // sentinal member type that ensures correct rounding. + // sentinel member type that ensures correct rounding. struct MemberInfo { CharUnits Offset; enum InfoKind { VFPtr, VBPtr, Field, Base, VBase, Scissor } Kind; @@ -95,7 +95,7 @@ struct CGRecordLowering { // The constructor. CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D, bool Packed); // Short helper routines. - /// \brief Constructs a MemberInfo instance from an offset and llvm::Type *. + /// Constructs a MemberInfo instance from an offset and llvm::Type *. MemberInfo StorageInfo(CharUnits Offset, llvm::Type *Data) { return MemberInfo(Offset, MemberInfo::Field, Data); } @@ -118,19 +118,19 @@ struct CGRecordLowering { return !Context.getTargetInfo().getCXXABI().isMicrosoft(); } - /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments. + /// Wraps llvm::Type::getIntNTy with some implicit arguments. llvm::Type *getIntNType(uint64_t NumBits) { return llvm::Type::getIntNTy(Types.getLLVMContext(), (unsigned)llvm::alignTo(NumBits, 8)); } - /// \brief Gets an llvm type of size NumBytes and alignment 1. + /// Gets an llvm type of size NumBytes and alignment 1. llvm::Type *getByteArrayType(CharUnits NumBytes) { assert(!NumBytes.isZero() && "Empty byte arrays aren't allowed."); llvm::Type *Type = llvm::Type::getInt8Ty(Types.getLLVMContext()); return NumBytes == CharUnits::One() ? Type : (llvm::Type *)llvm::ArrayType::get(Type, NumBytes.getQuantity()); } - /// \brief Gets the storage type for a field decl and handles storage + /// Gets the storage type for a field decl and handles storage /// for itanium bitfields that are smaller than their declared type. llvm::Type *getStorageType(const FieldDecl *FD) { llvm::Type *Type = Types.ConvertTypeForMem(FD->getType()); @@ -139,7 +139,7 @@ struct CGRecordLowering { return getIntNType(std::min(FD->getBitWidthValue(Context), (unsigned)Context.toBits(getSize(Type)))); } - /// \brief Gets the llvm Basesubobject type from a CXXRecordDecl. + /// Gets the llvm Basesubobject type from a CXXRecordDecl. llvm::Type *getStorageType(const CXXRecordDecl *RD) { return Types.getCGRecordLayout(RD).getBaseSubobjectLLVMType(); } @@ -168,7 +168,7 @@ struct CGRecordLowering { // Layout routines. void setBitFieldInfo(const FieldDecl *FD, CharUnits StartOffset, llvm::Type *StorageType); - /// \brief Lowers an ASTRecordLayout to a llvm type. + /// Lowers an ASTRecordLayout to a llvm type. void lower(bool NonVirtualBaseType); void lowerUnion(); void accumulateFields(); @@ -177,18 +177,18 @@ struct CGRecordLowering { void accumulateBases(); void accumulateVPtrs(); void accumulateVBases(); - /// \brief Recursively searches all of the bases to find out if a vbase is + /// Recursively searches all of the bases to find out if a vbase is /// not the primary vbase of some base class. bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query); void calculateZeroInit(); - /// \brief Lowers bitfield storage types to I8 arrays for bitfields with tail + /// Lowers bitfield storage types to I8 arrays for bitfields with tail /// padding that is or can potentially be used. void clipTailPadding(); - /// \brief Determines if we need a packed llvm struct. + /// Determines if we need a packed llvm struct. void determinePacked(bool NVBaseType); - /// \brief Inserts padding everwhere it's needed. + /// Inserts padding everywhere it's needed. void insertPadding(); - /// \brief Fills out the structures that are ultimately consumed. + /// Fills out the structures that are ultimately consumed. void fillOutputFields(); // Input memoization fields. CodeGenTypes &Types; @@ -214,12 +214,13 @@ private: }; } // namespace { -CGRecordLowering::CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D, bool Packed) - : Types(Types), Context(Types.getContext()), D(D), - RD(dyn_cast<CXXRecordDecl>(D)), - Layout(Types.getContext().getASTRecordLayout(D)), - DataLayout(Types.getDataLayout()), IsZeroInitializable(true), - IsZeroInitializableAsBase(true), Packed(Packed) {} +CGRecordLowering::CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D, + bool Packed) + : Types(Types), Context(Types.getContext()), D(D), + RD(dyn_cast<CXXRecordDecl>(D)), + Layout(Types.getContext().getASTRecordLayout(D)), + DataLayout(Types.getDataLayout()), IsZeroInitializable(true), + IsZeroInitializableAsBase(true), Packed(Packed) {} void CGRecordLowering::setBitFieldInfo( const FieldDecl *FD, CharUnits StartOffset, llvm::Type *StorageType) { @@ -294,8 +295,7 @@ void CGRecordLowering::lowerUnion() { // been doing and cause lit tests to change. for (const auto *Field : D->fields()) { if (Field->isBitField()) { - // Skip 0 sized bitfields. - if (Field->getBitWidthValue(Context) == 0) + if (Field->isZeroLengthBitField(Context)) continue; llvm::Type *FieldType = getStorageType(Field); if (LayoutSize < getSize(FieldType)) @@ -380,7 +380,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, for (; Field != FieldEnd; ++Field) { uint64_t BitOffset = getFieldBitOffset(*Field); // Zero-width bitfields end runs. - if (Field->getBitWidthValue(Context) == 0) { + if (Field->isZeroLengthBitField(Context)) { Run = FieldEnd; continue; } @@ -404,19 +404,20 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, return; } - // Check if current Field is better as a single field run. When current field + // Check if OffsetInRecord is better as a single field run. When OffsetInRecord // has legal integer width, and its bitfield offset is naturally aligned, it // is better to make the bitfield a separate storage component so as it can be // accessed directly with lower cost. - auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) { + auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord, + uint64_t StartBitOffset) { if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) return false; - unsigned Width = Field->getBitWidthValue(Context); - if (!DataLayout.isLegalInteger(Width)) + if (!DataLayout.isLegalInteger(OffsetInRecord)) return false; - // Make sure Field is natually aligned if it is treated as an IType integer. - if (getFieldBitOffset(*Field) % - Context.toBits(getAlignment(getIntNType(Width))) != + // Make sure StartBitOffset is natually aligned if it is treated as an + // IType integer. + if (StartBitOffset % + Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != 0) return false; return true; @@ -431,26 +432,31 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, if (Field == FieldEnd) break; // Any non-zero-length bitfield can start a new run. - if (Field->getBitWidthValue(Context) != 0) { + if (!Field->isZeroLengthBitField(Context)) { Run = Field; StartBitOffset = getFieldBitOffset(*Field); Tail = StartBitOffset + Field->getBitWidthValue(Context); - StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run); + StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset, + StartBitOffset); } ++Field; continue; } // If the start field of a new run is better as a single run, or - // if current field is better as a single run, or - // if current field has zero width bitfield, or + // if current field (or consecutive fields) is better as a single run, or + // if current field has zero width bitfield and either + // UseZeroLengthBitfieldAlignment or UseBitFieldTypeAlignment is set to + // true, or // if the offset of current field is inconsistent with the offset of // previous field plus its offset, // skip the block below and go ahead to emit the storage. // Otherwise, try to add bitfields to the run. if (!StartFieldAsSingleRun && Field != FieldEnd && - !IsBetterAsSingleFieldRun(Field) && - Field->getBitWidthValue(Context) != 0 && + !IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset) && + (!Field->isZeroLengthBitField(Context) || + (!Context.getTargetInfo().useZeroLengthBitfieldAlignment() && + !Context.getTargetInfo().useBitFieldTypeAlignment())) && Tail == getFieldBitOffset(*Field)) { Tail += Field->getBitWidthValue(Context); ++Field; @@ -626,7 +632,7 @@ void CGRecordLowering::determinePacked(bool NVBaseType) { // non-virtual sub-object and an unpacked complete object or vise versa. if (NVSize % NVAlignment) Packed = true; - // Update the alignment of the sentinal. + // Update the alignment of the sentinel. if (!Packed) Members.back().Data = getIntNType(Context.toBits(Alignment)); } @@ -785,8 +791,7 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, } // Verify that the LLVM and AST field offsets agree. - llvm::StructType *ST = - dyn_cast<llvm::StructType>(RL->getLLVMType()); + llvm::StructType *ST = RL->getLLVMType(); const llvm::StructLayout *SL = getDataLayout().getStructLayout(ST); const ASTRecordLayout &AST_RL = getContext().getASTRecordLayout(D); @@ -808,7 +813,7 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, continue; // Don't inspect zero-length bitfields. - if (FD->getBitWidthValue(getContext()) == 0) + if (FD->isZeroLengthBitField(getContext())) continue; const CGBitFieldInfo &Info = RL->getBitFieldInfo(FD); diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 91fa49a46ef1..79662ec0099f 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -74,6 +74,15 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { // Generate a stoppoint if we are emitting debug info. EmitStopPoint(S); + // Ignore all OpenMP directives except for simd if OpenMP with Simd is + // enabled. + if (getLangOpts().OpenMP && getLangOpts().OpenMPSimd) { + if (const auto *D = dyn_cast<OMPExecutableDirective>(S)) { + EmitSimpleOMPExecutableDirective(*D); + return; + } + } + switch (S->getStmtClass()) { case Stmt::NoStmtClass: case Stmt::CXXCatchStmtClass: @@ -599,7 +608,7 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { EmitStmt(S.getInit()); if (S.getConditionVariable()) - EmitAutoVarDecl(*S.getConditionVariable()); + EmitDecl(*S.getConditionVariable()); // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. @@ -696,7 +705,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, RunCleanupsScope ConditionScope(*this); if (S.getConditionVariable()) - EmitAutoVarDecl(*S.getConditionVariable()); + EmitDecl(*S.getConditionVariable()); // Evaluate the conditional in the while header. C99 6.8.5.1: The // evaluation of the controlling expression takes place before each @@ -768,11 +777,6 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, // Emit the body of the loop. llvm::BasicBlock *LoopBody = createBasicBlock("do.body"); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopBody, CGM.getContext(), DoAttrs, - SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); - EmitBlockWithFallThrough(LoopBody, &S); { RunCleanupsScope BodyScope(*this); @@ -781,6 +785,11 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, EmitBlock(LoopCond.getBlock()); + const SourceRange &R = S.getSourceRange(); + LoopStack.push(LoopBody, CGM.getContext(), DoAttrs, + SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); + // C99 6.8.5.2: "The evaluation of the controlling expression takes place // after each execution of the loop body." @@ -856,7 +865,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, // If the for statement has a condition scope, emit the local variable // declaration. if (S.getConditionVariable()) { - EmitAutoVarDecl(*S.getConditionVariable()); + EmitDecl(*S.getConditionVariable()); } llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); @@ -996,7 +1005,9 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { if (RV.isScalar()) { Builder.CreateStore(RV.getScalarVal(), ReturnValue); } else if (RV.isAggregate()) { - EmitAggregateCopy(ReturnValue, RV.getAggregateAddress(), Ty); + LValue Dest = MakeAddrLValue(ReturnValue, Ty); + LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty); + EmitAggregateCopy(Dest, Src, Ty, overlapForReturnValue()); } else { EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty), /*init*/ true); @@ -1026,7 +1037,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { Builder.ClearInsertionPoint(); } - // Emit the result value, even if unused, to evalute the side effects. + // Emit the result value, even if unused, to evaluate the side effects. const Expr *RV = S.getRetValue(); // Treat block literals in a return expression as if they appeared @@ -1074,11 +1085,12 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { /*isInit*/ true); break; case TEK_Aggregate: - EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, - Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + EmitAggExpr(RV, AggValueSlot::forAddr( + ReturnValue, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + overlapForReturnValue())); break; } } @@ -1563,7 +1575,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // Emit the condition variable if needed inside the entire cleanup scope // used by this special case for constant folded switches. if (S.getConditionVariable()) - EmitAutoVarDecl(*S.getConditionVariable()); + EmitDecl(*S.getConditionVariable()); // At this point, we are no longer "within" a switch instance, so // we can temporarily enforce this to ensure that any embedded case @@ -1592,7 +1604,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { EmitStmt(S.getInit()); if (S.getConditionVariable()) - EmitAutoVarDecl(*S.getConditionVariable()); + EmitDecl(*S.getConditionVariable()); llvm::Value *CondV = EmitScalarExpr(S.getCond()); // Create basic block to hold stuff that comes after switch @@ -1915,7 +1927,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Simplify the output constraint. std::string OutputConstraint(S.getOutputConstraint(i)); OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1, - getTarget()); + getTarget(), &OutputConstraintInfos); const Expr *OutExpr = S.getOutputExpr(i); OutExpr = OutExpr->IgnoreParenNoopCasts(getContext()); @@ -2122,7 +2134,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect); - llvm::CallInst *Result = Builder.CreateCall(IA, Args); + llvm::CallInst *Result = + Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index f9861735832b..0d343f84c71f 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -29,12 +29,13 @@ namespace { class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { - if (auto *CPI = OMPClauseWithPreInit::get(C)) { - if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { + if (const auto *CPI = OMPClauseWithPreInit::get(C)) { + if (const auto *PreInit = + cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { CGF.EmitVarDecl(cast<VarDecl>(*I)); - else { + } else { CodeGenFunction::AutoVarEmission Emission = CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); CGF.EmitAutoVarCleanups(Emission); @@ -53,34 +54,35 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { } public: - OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, - bool AsInlined = false, bool EmitPreInitStmt = true) + OMPLexicalScope( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, + const bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { if (EmitPreInitStmt) emitPreInitStmt(CGF, S); - if (AsInlined) { - if (S.hasAssociatedStmt()) { - auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); - for (auto &C : CS->captures()) { - if (C.capturesVariable() || C.capturesVariableByCopy()) { - auto *VD = C.getCapturedVar(); - assert(VD == VD->getCanonicalDecl() && - "Canonical decl must be captured."); - DeclRefExpr DRE(const_cast<VarDecl *>(VD), - isCapturedVar(CGF, VD) || - (CGF.CapturedStmtInfo && - InlinedShareds.isGlobalVarCaptured(VD)), - VD->getType().getNonReferenceType(), VK_LValue, - SourceLocation()); - InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); - }); - } - } - (void)InlinedShareds.Privatize(); + if (!CapturedRegion.hasValue()) + return; + assert(S.hasAssociatedStmt() && + "Expected associated statement for inlined directive."); + const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); + for (const auto &C : CS->captures()) { + if (C.capturesVariable() || C.capturesVariableByCopy()) { + auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); + DeclRefExpr DRE( + const_cast<VarDecl *>(VD), + isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && + InlinedShareds.isGlobalVarCaptured(VD)), + VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); + InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); } } + (void)InlinedShareds.Privatize(); } }; @@ -96,9 +98,8 @@ class OMPParallelScope final : public OMPLexicalScope { public: OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : OMPLexicalScope(CGF, S, - /*AsInlined=*/false, - /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + EmitPreInitStmt(S)) {} }; /// Lexical scope for OpenMP teams construct, that handles correct codegen @@ -112,29 +113,26 @@ class OMPTeamsScope final : public OMPLexicalScope { public: OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : OMPLexicalScope(CGF, S, - /*AsInlined=*/false, - /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + EmitPreInitStmt(S)) {} }; /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { - CodeGenFunction::OMPPrivateScope PreCondScope(CGF); - for (auto *E : S.counters()) { + CodeGenFunction::OMPMapVars PreCondVars; + for (const auto *E : S.counters()) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - (void)PreCondScope.addPrivate(VD, [&CGF, VD]() { - return CGF.CreateMemTemp(VD->getType().getNonReferenceType()); - }); + (void)PreCondVars.setVarAddr( + CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); } - (void)PreCondScope.Privatize(); - if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { - if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { - for (const auto *I : PreInits->decls()) - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } + (void)PreCondVars.apply(CGF); + if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { + for (const auto *I : PreInits->decls()) + CGF.EmitVarDecl(cast<VarDecl>(*I)); } + PreCondVars.restore(CGF); } public: @@ -144,6 +142,72 @@ public: } }; +class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { + CodeGenFunction::OMPPrivateScope InlinedShareds; + + static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { + return CGF.LambdaCaptureFields.lookup(VD) || + (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && + cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); + } + +public: + OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), + InlinedShareds(CGF) { + for (const auto *C : S.clauses()) { + if (const auto *CPI = OMPClauseWithPreInit::get(C)) { + if (const auto *PreInit = + cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { + for (const Expr *E : UDP->varlists()) { + const Decl *D = cast<DeclRefExpr>(E)->getDecl(); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } + } + } + if (!isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPPrivateClause(S, InlinedShareds); + if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { + if (const Expr *E = TG->getReductionRef()) + CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); + } + const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); + while (CS) { + for (auto &C : CS->captures()) { + if (C.capturesVariable() || C.capturesVariableByCopy()) { + auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + isCapturedVar(CGF, VD) || + (CGF.CapturedStmtInfo && + InlinedShareds.isGlobalVarCaptured(VD)), + VD->getType().getNonReferenceType(), VK_LValue, + C.getLocation()); + InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + } + CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); + } + (void)InlinedShareds.Privatize(); + } +}; + } // namespace static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, @@ -151,8 +215,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen); LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { - if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { - if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { + if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { + if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { OrigVD = OrigVD->getCanonicalDecl(); bool IsCaptured = LambdaCaptureFields.lookup(OrigVD) || @@ -167,23 +231,23 @@ LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { } llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { - auto &C = getContext(); + ASTContext &C = getContext(); llvm::Value *Size = nullptr; auto SizeInChars = C.getTypeSizeInChars(Ty); if (SizeInChars.isZero()) { // getTypeSizeInChars() returns 0 for a VLA. - while (auto *VAT = C.getAsVariableArrayType(Ty)) { - llvm::Value *ArraySize; - std::tie(ArraySize, Ty) = getVLASize(VAT); - Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; + while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { + VlaSizePair VlaSize = getVLASize(VAT); + Ty = VlaSize.Type; + Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) + : VlaSize.NumElts; } SizeInChars = C.getTypeSizeInChars(Ty); if (SizeInChars.isZero()) return llvm::ConstantInt::get(SizeTy, /*V=*/0); - Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); - } else - Size = CGM.getSize(SizeInChars); - return Size; + return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); + } + return CGM.getSize(SizeInChars); } void CodeGenFunction::GenerateOpenMPCapturedVars( @@ -195,27 +259,26 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( E = S.capture_init_end(); I != E; ++I, ++CurField, ++CurCap) { if (CurField->hasCapturedVLAType()) { - auto VAT = CurField->getCapturedVLAType(); - auto *Val = VLASizeMap[VAT->getSizeExpr()]; + const VariableArrayType *VAT = CurField->getCapturedVLAType(); + llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; CapturedVars.push_back(Val); - } else if (CurCap->capturesThis()) + } else if (CurCap->capturesThis()) { CapturedVars.push_back(CXXThisValue); - else if (CurCap->capturesVariableByCopy()) { - llvm::Value *CV = - EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); + } else if (CurCap->capturesVariableByCopy()) { + llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); // If the field is not a pointer, we need to save the actual value // and load it as a void pointer. if (!CurField->getType()->isAnyPointerType()) { - auto &Ctx = getContext(); - auto DstAddr = CreateMemTemp( + ASTContext &Ctx = getContext(); + Address DstAddr = CreateMemTemp( Ctx.getUIntPtrType(), - Twine(CurCap->getCapturedVar()->getName()) + ".casted"); + Twine(CurCap->getCapturedVar()->getName(), ".casted")); LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); - auto *SrcAddrVal = EmitScalarConversion( + llvm::Value *SrcAddrVal = EmitScalarConversion( DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), - Ctx.getPointerType(CurField->getType()), SourceLocation()); + Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); LValue SrcLV = MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); @@ -223,7 +286,7 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( EmitStoreThroughLValue(RValue::get(CV), SrcLV); // Load the value using the destination type pointer. - CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); + CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); } CapturedVars.push_back(CV); } else { @@ -233,15 +296,16 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( } } -static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, - StringRef Name, LValue AddrLV, +static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, + QualType DstType, StringRef Name, + LValue AddrLV, bool isReferenceType = false) { ASTContext &Ctx = CGF.getContext(); - auto *CastedPtr = CGF.EmitScalarConversion( + llvm::Value *CastedPtr = CGF.EmitScalarConversion( AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), - Ctx.getPointerType(DstType), SourceLocation()); - auto TmpAddr = + Ctx.getPointerType(DstType), Loc); + Address TmpAddr = CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) .getAddress(); @@ -249,27 +313,26 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, // reference instead of the reference of the value. if (isReferenceType) { QualType RefType = Ctx.getLValueReferenceType(DstType); - auto *RefVal = TmpAddr.getPointer(); - TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); - auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); - CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); + llvm::Value *RefVal = TmpAddr.getPointer(); + TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref")); + LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); + CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true); } return TmpAddr; } static QualType getCanonicalParamType(ASTContext &C, QualType T) { - if (T->isLValueReferenceType()) { + if (T->isLValueReferenceType()) return C.getLValueReferenceType( getCanonicalParamType(C, T.getNonReferenceType()), /*SpelledAsLValue=*/false); - } if (T->isPointerType()) return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); - if (auto *A = T->getAsArrayTypeUnsafe()) { - if (auto *VLA = dyn_cast<VariableArrayType>(A)) + if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { + if (const auto *VLA = dyn_cast<VariableArrayType>(A)) return getCanonicalParamType(C, VLA->getElementType()); - else if (!A->isVariablyModifiedType()) + if (!A->isVariablyModifiedType()) return C.getCanonicalType(T); } return C.getCanonicalParamType(T); @@ -329,7 +392,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); } - for (auto *FD : RD->fields()) { + for (const FieldDecl *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; VarDecl *CapVar = nullptr; @@ -339,18 +402,17 @@ static llvm::Function *emitOutlinedFunctionPrologue( // uintptr. This is necessary given that the runtime library is only able to // deal with pointers. We can pass in the same way the VLA type sizes to the // outlined function. - if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || - I->capturesVariableArrayType()) { - if (FO.UIntPtrCastRequired) - ArgType = Ctx.getUIntPtrType(); - } + if (FO.UIntPtrCastRequired && + ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || + I->capturesVariableArrayType())) + ArgType = Ctx.getUIntPtrType(); if (I->capturesVariable() || I->capturesVariableByCopy()) { CapVar = I->getCapturedVar(); II = CapVar->getIdentifier(); - } else if (I->capturesThis()) + } else if (I->capturesThis()) { II = &Ctx.Idents.get("this"); - else { + } else { assert(I->capturesVariableArrayType()); II = &Ctx.Idents.get("vla"); } @@ -387,19 +449,20 @@ static llvm::Function *emitOutlinedFunctionPrologue( CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Function *F = + auto *F = llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, FO.FunctionName, &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) F->setDoesNotThrow(); + F->setDoesNotRecurse(); // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, FO.S->getLocStart(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); - for (auto *FD : RD->fields()) { + for (const FieldDecl *FD : RD->fields()) { // Do not map arguments if we emit function with non-original types. Address LocalAddr(Address::invalid()); if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { @@ -431,23 +494,23 @@ static llvm::Function *emitOutlinedFunctionPrologue( AlignmentSource::Decl); if (FD->hasCapturedVLAType()) { if (FO.UIntPtrCastRequired) { - ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), - Args[Cnt]->getName(), - ArgLVal), - FD->getType(), AlignmentSource::Decl); + ArgLVal = CGF.MakeAddrLValue( + castValueFromUintptr(CGF, I->getLocation(), FD->getType(), + Args[Cnt]->getName(), ArgLVal), + FD->getType(), AlignmentSource::Decl); } - auto *ExprArg = - CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); - auto VAT = FD->getCapturedVLAType(); - VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); + llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + const VariableArrayType *VAT = FD->getCapturedVLAType(); + VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); } else if (I->capturesVariable()) { - auto *Var = I->getCapturedVar(); + const VarDecl *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { if (ArgLVal.getType()->isLValueReferenceType()) { ArgAddr = CGF.EmitLoadOfReference(ArgLVal); - } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + } else if (!VarTy->isVariablyModifiedType() || + !VarTy->isPointerType()) { assert(ArgLVal.getType()->isPointerType()); ArgAddr = CGF.EmitLoadOfPointer( ArgAddr, ArgLVal.getType()->castAs<PointerType>()); @@ -461,20 +524,19 @@ static llvm::Function *emitOutlinedFunctionPrologue( } else if (I->capturesVariableByCopy()) { assert(!FD->getType()->isAnyPointerType() && "Not expecting a captured pointer."); - auto *Var = I->getCapturedVar(); + const VarDecl *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); LocalAddrs.insert( {Args[Cnt], - {Var, - FO.UIntPtrCastRequired - ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), - ArgLVal, VarTy->isReferenceType()) - : ArgLVal.getAddress()}}); + {Var, FO.UIntPtrCastRequired + ? castValueFromUintptr(CGF, I->getLocation(), + FD->getType(), Args[Cnt]->getName(), + ArgLVal, VarTy->isReferenceType()) + : ArgLVal.getAddress()}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); - CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) - .getScalarVal(); + CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); } ++Cnt; @@ -524,6 +586,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { /*RegisterCastedArgsOnly=*/true, CapturedStmtInfo->getHelperName()); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; Args.clear(); LocalAddrs.clear(); VLASizes.clear(); @@ -539,16 +602,16 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { I->second.second, I->second.first ? I->second.first->getType() : Arg->getType(), AlignmentSource::Decl); - CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); } else { auto EI = VLASizes.find(Arg); - if (EI != VLASizes.end()) + if (EI != VLASizes.end()) { CallArg = EI->second.second; - else { + } else { LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), Arg->getType(), AlignmentSource::Decl); - CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); } } CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); @@ -564,28 +627,28 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { //===----------------------------------------------------------------------===// void CodeGenFunction::EmitOMPAggregateAssign( Address DestAddr, Address SrcAddr, QualType OriginalType, - const llvm::function_ref<void(Address, Address)> &CopyGen) { + const llvm::function_ref<void(Address, Address)> CopyGen) { // Perform element-by-element initialization. QualType ElementTy; // Drill down to the base element type on both arrays. - auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); - auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); + const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); + llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - auto SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); + llvm::Value *SrcBegin = SrcAddr.getPointer(); + llvm::Value *DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); + llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); // The basic structure here is a while-do loop. - auto BodyBB = createBasicBlock("omp.arraycpy.body"); - auto DoneBB = createBasicBlock("omp.arraycpy.done"); - auto IsEmpty = + llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); + llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); + llvm::Value *IsEmpty = Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. - auto EntryBB = Builder.GetInsertBlock(); + llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); EmitBlock(BodyBB); CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); @@ -608,12 +671,12 @@ void CodeGenFunction::EmitOMPAggregateAssign( CopyGen(DestElementCurrent, SrcElementCurrent); // Shift the address forward by one element. - auto DestElementNext = Builder.CreateConstGEP1_32( + llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - auto SrcElementNext = Builder.CreateConstGEP1_32( + llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. - auto Done = + llvm::Value *Done = Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); Builder.CreateCondBr(Done, DoneBB, BodyBB); DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); @@ -627,10 +690,12 @@ void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { if (OriginalType->isArrayType()) { - auto *BO = dyn_cast<BinaryOperator>(Copy); + const auto *BO = dyn_cast<BinaryOperator>(Copy); if (BO && BO->getOpcode() == BO_Assign) { // Perform simple memcpy for simple copying. - EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); + LValue Dest = MakeAddrLValue(DestAddr, OriginalType); + LValue Src = MakeAddrLValue(SrcAddr, OriginalType); + EmitAggregateAssign(Dest, Src, OriginalType); } else { // For arrays with complex element types perform element by element // copying. @@ -641,11 +706,8 @@ void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, // destination and source variables to corresponding array // elements. CodeGenFunction::OMPPrivateScope Remap(*this); - Remap.addPrivate(DestVD, [DestElement]() -> Address { - return DestElement; - }); - Remap.addPrivate( - SrcVD, [SrcElement]() -> Address { return SrcElement; }); + Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); + Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); (void)Remap.Privatize(); EmitIgnoredExpr(Copy); }); @@ -653,8 +715,8 @@ void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, } else { // Remap pseudo source variable to private copy. CodeGenFunction::OMPPrivateScope Remap(*this); - Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); - Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); + Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); + Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); (void)Remap.Privatize(); // Emit copying of the whole variable. EmitIgnoredExpr(Copy); @@ -673,17 +735,21 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); } llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; - CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); + llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); + // Force emission of the firstprivate copy if the directive does not emit + // outlined function, like omp for, omp simd, omp distribute etc. + bool MustEmitFirstprivateCopy = + CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto InitsRef = C->inits().begin(); - for (auto IInit : C->private_copies()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); bool ThisFirstprivateIsLastprivate = Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; - auto *CapFD = CapturesInfo.lookup(OrigVD); - auto *FD = CapturedStmtInfo->lookup(OrigVD); - if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && + const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); + if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && !FD->getType()->isReferenceType()) { EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); ++IRef; @@ -693,54 +759,61 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, FirstprivateIsLastprivate = FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); + const auto *VDInit = + cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); bool IsRegistered; DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); + LValue OriginalLVal = EmitLValue(&DRE); QualType Type = VD->getType(); if (Type->isArrayType()) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. - IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - auto Emission = EmitAutoVarAlloca(*VD); - auto *Init = VD->getInit(); - if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { - // Perform simple memcpy. - EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, - Type); - } else { - EmitOMPAggregateAssign( - Emission.getAllocatedAddress(), OriginalAddr, Type, - [this, VDInit, Init](Address DestElement, - Address SrcElement) { - // Clean up any temporaries needed by the initialization. - RunCleanupsScope InitScope(*this); - // Emit initialization for single element. - setAddrOfLocalVar(VDInit, SrcElement); - EmitAnyExprToMem(Init, DestElement, - Init->getType().getQualifiers(), - /*IsInitializer*/ false); - LocalDeclMap.erase(VDInit); - }); - } - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); + IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { + AutoVarEmission Emission = EmitAutoVarAlloca(*VD); + const Expr *Init = VD->getInit(); + if (!isa<CXXConstructExpr>(Init) || + isTrivialInitializer(Init)) { + // Perform simple memcpy. + LValue Dest = + MakeAddrLValue(Emission.getAllocatedAddress(), Type); + EmitAggregateAssign(Dest, OriginalLVal, Type); + } else { + EmitOMPAggregateAssign( + Emission.getAllocatedAddress(), OriginalLVal.getAddress(), + Type, + [this, VDInit, Init](Address DestElement, + Address SrcElement) { + // Clean up any temporaries needed by the + // initialization. + RunCleanupsScope InitScope(*this); + // Emit initialization for single element. + setAddrOfLocalVar(VDInit, SrcElement); + EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + LocalDeclMap.erase(VDInit); + }); + } + EmitAutoVarCleanups(Emission); + return Emission.getAllocatedAddress(); + }); } else { - IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - // Emit private VarDecl with copy init. - // Remap temp VDInit variable to the address of the original - // variable - // (for proper handling of captured global variables). - setAddrOfLocalVar(VDInit, OriginalAddr); - EmitDecl(*VD); - LocalDeclMap.erase(VDInit); - return GetAddrOfLocalVar(VD); - }); + Address OriginalAddr = OriginalLVal.getAddress(); + IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, VDInit, OriginalAddr, VD]() { + // Emit private VarDecl with copy init. + // Remap temp VDInit variable to the address of the original + // variable (for proper handling of captured global variables). + setAddrOfLocalVar(VDInit, OriginalAddr); + EmitDecl(*VD); + LocalDeclMap.erase(VDInit); + return GetAddrOfLocalVar(VD); + }); } assert(IsRegistered && "firstprivate var already registered as private"); @@ -762,16 +835,15 @@ void CodeGenFunction::EmitOMPPrivateClause( llvm::DenseSet<const VarDecl *> EmittedAsPrivate; for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { auto IRef = C->varlist_begin(); - for (auto IInit : C->private_copies()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [&]() -> Address { - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); - }); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; @@ -794,8 +866,8 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); auto IDestRef = C->destination_exprs().begin(); - for (auto *AssignOp : C->assignment_ops()) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *AssignOp : C->assignment_ops()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); QualType Type = VD->getType(); if (CopiedVars.insert(VD->getCanonicalDecl()).second) { // Get the address of the master variable. If we are emitting code with @@ -826,12 +898,15 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { Builder.CreateCondBr( Builder.CreateICmpNE( Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), - Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), + Builder.CreatePtrToInt(PrivateAddr.getPointer(), + CGM.IntPtrTy)), CopyBegin, CopyEnd); EmitBlock(CopyBegin); } - auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); - auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); + const auto *SrcVD = + cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); + const auto *DestVD = + cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); } ++IRef; @@ -854,8 +929,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( bool HasAtLeastOneLastprivate = false; llvm::DenseSet<const VarDecl *> SIMDLCVs; if (isOpenMPSimdDirective(D.getDirectiveKind())) { - auto *LoopDirective = cast<OMPLoopDirective>(&D); - for (auto *C : LoopDirective->counters()) { + const auto *LoopDirective = cast<OMPLoopDirective>(&D); + for (const Expr *C : LoopDirective->counters()) { SIMDLCVs.insert( cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); } @@ -863,19 +938,21 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { HasAtLeastOneLastprivate = true; - if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) + if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && + !getLangOpts().OpenMPSimd) break; auto IRef = C->varlist_begin(); auto IDestRef = C->destination_exprs().begin(); - for (auto *IInit : C->private_copies()) { + for (const Expr *IInit : C->private_copies()) { // Keep the address of the original variable for future update at the end // of the loop. - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); // Taskloops do not require additional initialization, it is done in // runtime support library. if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { - auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); - PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { + const auto *DestVD = + cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); + PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { DeclRefExpr DRE( const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( @@ -887,8 +964,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // not generated. Initialization of this variable will happen in codegen // for 'firstprivate' clause. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); @@ -926,10 +1003,10 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( } llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; - if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { + if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { auto IC = LoopDirective->counters().begin(); - for (auto F : LoopDirective->finals()) { - auto *D = + for (const Expr *F : LoopDirective->finals()) { + const auto *D = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); if (NoFinals) AlreadyEmittedVars.insert(D); @@ -942,23 +1019,26 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); auto IDestRef = C->destination_exprs().begin(); - for (auto *AssignOp : C->assignment_ops()) { - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *AssignOp : C->assignment_ops()) { + const auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); QualType Type = PrivateVD->getType(); - auto *CanonicalVD = PrivateVD->getCanonicalDecl(); + const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); if (AlreadyEmittedVars.insert(CanonicalVD).second) { // If lastprivate variable is a loop control variable for loop-based // directive, update its value before copyin back to original // variable. - if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) + if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) EmitIgnoredExpr(FinalExpr); - auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); - auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); + const auto *SrcVD = + cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); + const auto *DestVD = + cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); // Get the address of the original variable. Address OriginalAddr = GetAddrOfLocalVar(DestVD); // Get the address of the private variable. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); - if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) + if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), getNaturalTypeAlignment(RefTy->getPointeeType())); @@ -968,7 +1048,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( ++ISrcRef; ++IDestRef; } - if (auto *PostUpdate = C->getPostUpdateExpr()) + if (const Expr *PostUpdate = C->getPostUpdateExpr()) EmitIgnoredExpr(PostUpdate); } if (IsLastIterCond) @@ -990,7 +1070,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto IRed = C->reduction_ops().begin(); auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); - for (const auto *Ref : C->varlists()) { + for (const Expr *Ref : C->varlists()) { Shareds.emplace_back(Ref); Privates.emplace_back(*IPriv); ReductionOps.emplace_back(*IRed); @@ -1007,12 +1087,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto ILHS = LHSs.begin(); auto IRHS = RHSs.begin(); auto IPriv = Privates.begin(); - for (const auto *IRef : Shareds) { - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + for (const Expr *IRef : Shareds) { + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); // Emit private VarDecl with reduction init. RedCG.emitSharedLValue(*this, Count); RedCG.emitAggregateType(*this, Count); - auto Emission = EmitAutoVarAlloca(*PrivateVD); + AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), RedCG.getSharedLValue(Count), [&Emission](CodeGenFunction &CGF) { @@ -1023,32 +1103,31 @@ void CodeGenFunction::EmitOMPReductionClauseInit( Address BaseAddr = RedCG.adjustPrivateAddress( *this, Count, Emission.getAllocatedAddress()); bool IsRegistered = PrivateScope.addPrivate( - RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); QualType Type = PrivateVD->getType(); bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { return RedCG.getSharedLValue(Count).getAddress(); }); - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || isa<ArraySubscriptExpr>(IRef)) { // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { return RedCG.getSharedLValue(Count).getAddress(); }); - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); @@ -1063,10 +1142,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit( OriginalAddr = Builder.CreateElementBitCast( OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } + PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); PrivateScope.addPrivate( - LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); - PrivateScope.addPrivate( - RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + RHSVD, [this, PrivateVD, RHSVD, IsArray]() { return IsArray ? Builder.CreateElementBitCast( GetAddrOfLocalVar(PrivateVD), @@ -1100,9 +1178,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( if (HasAtLeastOneReduction) { bool WithNowait = D.getSingleClause<OMPNowaitClause>() || isOpenMPParallelDirective(D.getDirectiveKind()) || - D.getDirectiveKind() == OMPD_simd; - bool SimpleReduction = D.getDirectiveKind() == OMPD_simd || - D.getDirectiveKind() == OMPD_distribute_simd; + ReductionKind == OMPD_simd; + bool SimpleReduction = ReductionKind == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( @@ -1113,17 +1190,17 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( static void emitPostUpdateForReductionClause( CodeGenFunction &CGF, const OMPExecutableDirective &D, - const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { if (!CGF.HaveInsertPoint()) return; llvm::BasicBlock *DoneBB = nullptr; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - if (auto *PostUpdate = C->getPostUpdateExpr()) { + if (const Expr *PostUpdate = C->getPostUpdateExpr()) { if (!DoneBB) { - if (auto *Cond = CondGen(CGF)) { + if (llvm::Value *Cond = CondGen(CGF)) { // If the first post-update expression is found, emit conditional // block if it was requested. - auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); + llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); CGF.EmitBlock(ThenBB); @@ -1151,12 +1228,14 @@ static void emitCommonOMPParallelDirective( OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + llvm::Value *OutlinedFn = + CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); - auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), - /*IgnoreResultAssign*/ true); + llvm::Value *NumThreads = + CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign=*/true); CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( CGF, NumThreads, NumThreadsClause->getLocStart()); } @@ -1192,7 +1271,8 @@ static void emitEmptyBoundParameters(CodeGenFunction &, void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); @@ -1207,34 +1287,33 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, emitEmptyBoundParameters); - emitPostUpdateForReductionClause( - *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit) { RunCleanupsScope BodyScope(*this); // Update counters values on current iteration. - for (auto I : D.updates()) { - EmitIgnoredExpr(I); - } + for (const Expr *UE : D.updates()) + EmitIgnoredExpr(UE); // Update the linear variables. // In distribute directives only loop counters may be marked as linear, no // need to generate the code for them. if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { - for (auto *U : C->updates()) - EmitIgnoredExpr(U); + for (const Expr *UE : C->updates()) + EmitIgnoredExpr(UE); } } // On a continue in the body, jump to the end. - auto Continue = getJumpDestInCurrentScope("omp.body.continue"); + JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); // Emit loop body. EmitStmt(D.getBody()); @@ -1246,24 +1325,24 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, void CodeGenFunction::EmitOMPInnerLoop( const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, - const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, - const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { + const llvm::function_ref<void(CodeGenFunction &)> BodyGen, + const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); - const SourceRange &R = S.getSourceRange(); + const SourceRange R = S.getSourceRange(); LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. - auto ExitBlock = LoopExit.getBlock(); + llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); if (RequiresCleanup) ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); - auto LoopBody = createBasicBlock("omp.inner.for.body"); + llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); // Emit condition. EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); @@ -1276,7 +1355,7 @@ void CodeGenFunction::EmitOMPInnerLoop( incrementProfileCounter(&S); // Create a block for the increment. - auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); + JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); BodyGen(*this); @@ -1298,12 +1377,13 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { // Emit inits for the linear variables. bool HasLinears = false; for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { - for (auto *Init : C->inits()) { + for (const Expr *Init : C->inits()) { HasLinears = true; - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); - if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); + if (const auto *Ref = + dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { AutoVarEmission Emission = EmitAutoVarAlloca(*VD); - auto *OrigVD = cast<VarDecl>(Ref->getDecl()); + const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, VD->getInit()->getType(), VK_LValue, @@ -1312,13 +1392,14 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { VD->getType()), /*capturedByInit=*/false); EmitAutoVarCleanups(Emission); - } else + } else { EmitVarDecl(*VD); + } } // Emit the linear steps for the linear clauses. // If a step is not constant, it is pre-calculated before the loop. - if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) - if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { + if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) + if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); // Emit calculation of the linear step. EmitIgnoredExpr(CS); @@ -1329,36 +1410,36 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { void CodeGenFunction::EmitOMPLinearClauseFinal( const OMPLoopDirective &D, - const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { if (!HaveInsertPoint()) return; llvm::BasicBlock *DoneBB = nullptr; // Emit the final values of the linear variables. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { auto IC = C->varlist_begin(); - for (auto *F : C->finals()) { + for (const Expr *F : C->finals()) { if (!DoneBB) { - if (auto *Cond = CondGen(*this)) { + if (llvm::Value *Cond = CondGen(*this)) { // If the first post-update expression is found, emit conditional // block if it was requested. - auto *ThenBB = createBasicBlock(".omp.linear.pu"); + llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); DoneBB = createBasicBlock(".omp.linear.pu.done"); Builder.CreateCondBr(Cond, ThenBB, DoneBB); EmitBlock(ThenBB); } } - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); Address OrigAddr = EmitLValue(&DRE).getAddress(); CodeGenFunction::OMPPrivateScope VarScope(*this); - VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); + VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); (void)VarScope.Privatize(); EmitIgnoredExpr(F); ++IC; } - if (auto *PostUpdate = C->getPostUpdateExpr()) + if (const Expr *PostUpdate = C->getPostUpdateExpr()) EmitIgnoredExpr(PostUpdate); } if (DoneBB) @@ -1371,12 +1452,12 @@ static void emitAlignedClause(CodeGenFunction &CGF, return; for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { unsigned ClauseAlignment = 0; - if (auto AlignmentExpr = Clause->getAlignment()) { - auto AlignmentCI = + if (const Expr *AlignmentExpr = Clause->getAlignment()) { + auto *AlignmentCI = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); } - for (auto E : Clause->varlists()) { + for (const Expr *E : Clause->varlists()) { unsigned Alignment = ClauseAlignment; if (Alignment == 0) { // OpenMP [2.8.1, Description] @@ -1403,28 +1484,28 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( if (!HaveInsertPoint()) return; auto I = S.private_counters().begin(); - for (auto *E : S.counters()) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); - (void)LoopScope.addPrivate(VD, [&]() -> Address { - // Emit var without initialization. - if (!LocalDeclMap.count(PrivateVD)) { - auto VarEmission = EmitAutoVarAlloca(*PrivateVD); - EmitAutoVarCleanups(VarEmission); - } - DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), - /*RefersToEnclosingVariableOrCapture=*/false, - (*I)->getType(), VK_LValue, (*I)->getExprLoc()); - return EmitLValue(&DRE).getAddress(); + for (const Expr *E : S.counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); + // Emit var without initialization. + AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); + EmitAutoVarCleanups(VarEmission); + LocalDeclMap.erase(PrivateVD); + (void)LoopScope.addPrivate(VD, [&VarEmission]() { + return VarEmission.getAllocatedAddress(); }); if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || VD->hasGlobalStorage()) { - (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { + (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { DeclRefExpr DRE(const_cast<VarDecl *>(VD), LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), E->getType(), VK_LValue, E->getExprLoc()); return EmitLValue(&DRE).getAddress(); }); + } else { + (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { + return VarEmission.getAllocatedAddress(); + }); } ++I; } @@ -1440,7 +1521,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); (void)PreCondScope.Privatize(); // Get initial values of real counters. - for (auto I : S.inits()) { + for (const Expr *I : S.inits()) { CGF.EmitIgnoredExpr(I); } } @@ -1454,20 +1535,20 @@ void CodeGenFunction::EmitOMPLinearClause( return; llvm::DenseSet<const VarDecl *> SIMDLCVs; if (isOpenMPSimdDirective(D.getDirectiveKind())) { - auto *LoopDirective = cast<OMPLoopDirective>(&D); - for (auto *C : LoopDirective->counters()) { + const auto *LoopDirective = cast<OMPLoopDirective>(&D); + for (const Expr *C : LoopDirective->counters()) { SIMDLCVs.insert( cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); } } for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { auto CurPrivate = C->privates().begin(); - for (auto *E : C->varlists()) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - auto *PrivateVD = + for (const Expr *E : C->varlists()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); if (!SIMDLCVs.count(VD->getCanonicalDecl())) { - bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { + bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { // Emit private VarDecl with copy init. EmitVarDecl(*PrivateVD); return GetAddrOfLocalVar(PrivateVD); @@ -1475,8 +1556,9 @@ void CodeGenFunction::EmitOMPLinearClause( assert(IsRegistered && "linear var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; - } else + } else { EmitVarDecl(*PrivateVD); + } ++CurPrivate; } } @@ -1490,7 +1572,7 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), /*ignoreResult=*/true); - llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried @@ -1500,12 +1582,12 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), /*ignoreResult=*/true); - llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. - CGF.LoopStack.setParallel(false); + CGF.LoopStack.setParallel(/*Enable=*/false); } } @@ -1513,46 +1595,45 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic) { // Walk clauses and process safelen/lastprivate. LoopStack.setParallel(!IsMonotonic); - LoopStack.setVectorizeEnable(true); + LoopStack.setVectorizeEnable(); emitSimdlenSafelenClause(*this, D, IsMonotonic); } void CodeGenFunction::EmitOMPSimdFinal( const OMPLoopDirective &D, - const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { if (!HaveInsertPoint()) return; llvm::BasicBlock *DoneBB = nullptr; auto IC = D.counters().begin(); auto IPC = D.private_counters().begin(); - for (auto F : D.finals()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); - auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); + for (const Expr *F : D.finals()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); + const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || OrigVD->hasGlobalStorage() || CED) { if (!DoneBB) { - if (auto *Cond = CondGen(*this)) { + if (llvm::Value *Cond = CondGen(*this)) { // If the first post-update expression is found, emit conditional // block if it was requested. - auto *ThenBB = createBasicBlock(".omp.final.then"); + llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); DoneBB = createBasicBlock(".omp.final.done"); Builder.CreateCondBr(Cond, ThenBB, DoneBB); EmitBlock(ThenBB); } } Address OrigAddr = Address::invalid(); - if (CED) + if (CED) { OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); - else { + } else { DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), /*RefersToEnclosingVariableOrCapture=*/false, (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); OrigAddr = EmitLValue(&DRE).getAddress(); } OMPPrivateScope VarScope(*this); - VarScope.addPrivate(OrigVD, - [OrigAddr]() -> Address { return OrigAddr; }); + VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); (void)VarScope.Privatize(); EmitIgnoredExpr(F); } @@ -1570,6 +1651,14 @@ static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, CGF.EmitStopPoint(&S); } +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, PrePostActionTy &Action) { Action.Enter(CGF); @@ -1581,6 +1670,12 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, // <Final counter/linear vars updates>; // } // + if (isOpenMPDistributeDirective(S.getDirectiveKind()) || + isOpenMPWorksharingDirective(S.getDirectiveKind()) || + isOpenMPTaskLoopDirective(S.getDirectiveKind())) { + (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); + (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); + } // Emit: if (PreCond) - begin. // If the condition constant folds and can be elided, avoid emitting the @@ -1591,7 +1686,7 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, if (!CondConstant) return; } else { - auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); + llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); ContBlock = CGF.createBasicBlock("simd.if.end"); emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, CGF.getProfileCount(&S)); @@ -1601,14 +1696,14 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, // Emit the loop iteration variable. const Expr *IVExpr = S.getIterationVariable(); - const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); CGF.EmitVarDecl(*IVDecl); CGF.EmitIgnoredExpr(S.getInit()); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on // each iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); // Emit calculation of the iterations count. CGF.EmitIgnoredExpr(S.getCalcLastIteration()); @@ -1633,17 +1728,15 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); - CGF.EmitOMPSimdFinal( - S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); // Emit final copy of the lastprivate variables at the end of loops. if (HasLastprivateClause) CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); - emitPostUpdateForReductionClause( - CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); } - CGF.EmitOMPLinearClauseFinal( - S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); @@ -1655,7 +1748,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -1665,18 +1758,18 @@ void CodeGenFunction::EmitOMPOuterLoop( const CodeGenFunction::OMPLoopArguments &LoopArgs, const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { - auto &RT = CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); + JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); // Start the loop with a block that tests the condition. - auto CondBlock = createBasicBlock("omp.dispatch.cond"); + llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); - const SourceRange &R = S.getSourceRange(); + const SourceRange R = S.getSourceRange(); LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); @@ -1698,11 +1791,11 @@ void CodeGenFunction::EmitOMPOuterLoop( // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. - auto ExitBlock = LoopExit.getBlock(); + llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); if (LoopScope.requiresCleanups()) ExitBlock = createBasicBlock("omp.dispatch.cleanup"); - auto LoopBody = createBasicBlock("omp.dispatch.body"); + llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -1716,7 +1809,7 @@ void CodeGenFunction::EmitOMPOuterLoop( EmitIgnoredExpr(LoopArgs.Init); // Create a block for the increment. - auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); + JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); // Generate !llvm.loop.parallel metadata for loads and stores for loops @@ -1769,7 +1862,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, const OMPLoopArguments &LoopArgs, const CodeGenDispatchBoundsTy &CGDispatchBounds) { - auto &RT = CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). const bool DynamicOrOrdered = @@ -1835,7 +1928,8 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); + const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = + CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); llvm::Value *LBVal = DispatchBounds.first; llvm::Value *UBVal = DispatchBounds.second; CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, @@ -1878,7 +1972,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, const CodeGenLoopTy &CodeGenLoopContent) { - auto &RT = CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); // Emit outer loop. // Same behavior as a OMPForOuterLoop, except that schedule cannot be @@ -1933,14 +2027,6 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( emitEmptyOrdered); } -/// Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast<VarDecl>(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - static std::pair<LValue, LValue> emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -1958,14 +2044,18 @@ emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, // the current ones. LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); - llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); + llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( + PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); PrevLBVal = CGF.EmitScalarConversion( PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), - LS.getIterationVariable()->getType(), SourceLocation()); - llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); + LS.getIterationVariable()->getType(), + LS.getPrevLowerBoundVariable()->getExprLoc()); + llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( + PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); PrevUBVal = CGF.EmitScalarConversion( PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), - LS.getIterationVariable()->getType(), SourceLocation()); + LS.getIterationVariable()->getType(), + LS.getPrevUpperBoundVariable()->getExprLoc()); CGF.EmitStoreOfScalar(PrevLBVal, LB); CGF.EmitStoreOfScalar(PrevUBVal, UB); @@ -1991,10 +2081,10 @@ emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, // is not normalized as each team only executes its own assigned // distribute chunk QualType IteratorTy = IVExpr->getType(); - llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, - SourceLocation()); - llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, - SourceLocation()); + llvm::Value *LBVal = + CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart()); + llvm::Value *UBVal = + CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart()); return {LBVal, UBVal}; } @@ -2004,13 +2094,13 @@ static void emitDistributeParallelForDistributeInnerBoundParams( const auto &Dir = cast<OMPLoopDirective>(S); LValue LB = CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); - auto LBCast = CGF.Builder.CreateIntCast( + llvm::Value *LBCast = CGF.Builder.CreateIntCast( CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); CapturedVars.push_back(LBCast); LValue UB = CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); - auto UBCast = CGF.Builder.CreateIntCast( + llvm::Value *UBCast = CGF.Builder.CreateIntCast( CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); CapturedVars.push_back(UBCast); } @@ -2020,7 +2110,8 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit) { auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); bool HasCancel = false; if (!isOpenMPSimdDirective(S.getDirectiveKind())) { if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) @@ -2051,7 +2142,7 @@ void CodeGenFunction::EmitOMPDistributeParallelForDirective( CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, S.getDistInc()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_parallel); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } @@ -2061,7 +2152,7 @@ void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, S.getDistInc()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_parallel); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } @@ -2070,7 +2161,7 @@ void CodeGenFunction::EmitOMPDistributeSimdDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -2096,28 +2187,6 @@ void CodeGenFunction::EmitOMPTargetSimdDirective( emitCommonOMPTargetDirective(*this, S, CodeGen); } -void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( - const OMPTargetTeamsDistributeParallelForDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( - const OMPTargetTeamsDistributeParallelForSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams_distribute_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2135,20 +2204,20 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( const CodeGenLoopBoundsTy &CodeGenLoopBounds, const CodeGenDispatchBoundsTy &CGDispatchBounds) { // Emit the loop iteration variable. - auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); - auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); + const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); + const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); EmitVarDecl(*IVDecl); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on each // iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); // Emit calculation of the iterations count. EmitIgnoredExpr(S.getCalcLastIteration()); } - auto &RT = CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); bool HasLastprivateClause; // Check pre-condition. @@ -2163,7 +2232,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( if (!CondConstant) return false; } else { - auto *ThenBlock = createBasicBlock("omp.precond.then"); + llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); ContBlock = createBasicBlock("omp.precond.end"); emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, getProfileCount(&S)); @@ -2171,8 +2240,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( incrementProfileCounter(&S); } + RunCleanupsScope DoacrossCleanupScope(*this); bool Ordered = false; - if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { + if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { if (OrderedClause->getNumForLoops()) RT.emitDoacrossInit(*this, S); else @@ -2213,11 +2283,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // Detect the loop schedule kind and chunk. llvm::Value *Chunk = nullptr; OpenMPScheduleTy ScheduleKind; - if (auto *C = S.getSingleClause<OMPScheduleClause>()) { + if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { ScheduleKind.Schedule = C->getScheduleKind(); ScheduleKind.M1 = C->getFirstScheduleModifier(); ScheduleKind.M2 = C->getSecondScheduleModifier(); - if (const auto *Ch = C->getChunkSize()) { + if (const Expr *Ch = C->getChunkSize()) { Chunk = EmitScalarExpr(Ch); Chunk = EmitScalarConversion(Chunk, Ch->getType(), S.getIterationVariable()->getType(), @@ -2245,7 +2315,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( UB.getAddress(), ST.getAddress()); RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); - auto LoopExit = + JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); @@ -2282,7 +2352,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, - [&](CodeGenFunction &CGF) -> llvm::Value * { + [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); @@ -2293,7 +2363,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( : /*Parallel only*/ OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( - *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { + *this, S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); @@ -2303,14 +2373,15 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( S, isOpenMPSimdDirective(S.getDirectiveKind()), Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } - EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { + EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); + DoacrossCleanupScope.ForceCleanup(); // We're now done with the loop, so jump to the continuation block. if (ContBlock) { EmitBranch(ContBlock); - EmitBlock(ContBlock, true); + EmitBlock(ContBlock, /*IsFinished=*/true); } } return HasLastprivateClause; @@ -2321,7 +2392,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( /// of the associated 'for' or 'distribute' loop. static std::pair<LValue, LValue> emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { - const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const auto &LS = cast<OMPLoopDirective>(S); LValue LB = EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); LValue UB = @@ -2336,7 +2407,7 @@ emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { static std::pair<llvm::Value *, llvm::Value *> emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, Address LB, Address UB) { - const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const auto &LS = cast<OMPLoopDirective>(S); const Expr *IVExpr = LS.getIterationVariable(); const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); @@ -2354,15 +2425,14 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { emitDispatchForLoopBounds); }; { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); } // Emit an implicit barrier at the end. - if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); - } } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { @@ -2374,38 +2444,39 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { emitDispatchForLoopBounds); }; { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } // Emit an implicit barrier at the end. - if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); - } } static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, const Twine &Name, llvm::Value *Init = nullptr) { - auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); + LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); if (Init) CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); return LVal; } void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { - auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); - auto *CS = dyn_cast<CompoundStmt>(Stmt); + const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); + const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); bool HasLastprivates = false; - auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &C = CGF.CGM.getContext(); - auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + auto &&CodeGen = [&S, CapturedStmt, CS, + &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { + ASTContext &C = CGF.getContext(); + QualType KmpInt32Ty = + C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", CGF.Builder.getInt32(0)); - auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) - : CGF.Builder.getInt32(0); + llvm::ConstantInt *GlobalUBVal = CS != nullptr + ? CGF.Builder.getInt32(CS->size() - 1) + : CGF.Builder.getInt32(0); LValue UB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", @@ -2423,8 +2494,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { OK_Ordinary, S.getLocStart(), FPOptions()); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, - S.getLocStart()); - auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { + S.getLocStart(), true); + auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { // Iterate through all sections and emit a switch construct: // switch (IV) { // case 0: @@ -2436,13 +2507,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // break; // } // .omp.sections.exit: - auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); - auto *SwitchStmt = CGF.Builder.CreateSwitch( - CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, - CS == nullptr ? 1 : CS->size()); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); + llvm::SwitchInst *SwitchStmt = + CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()), + ExitBB, CS == nullptr ? 1 : CS->size()); if (CS) { unsigned CaseNumber = 0; - for (auto *SubStmt : CS->children()) { + for (const Stmt *SubStmt : CS->children()) { auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); CGF.EmitBlock(CaseBB); SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); @@ -2451,10 +2522,10 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { ++CaseNumber; } } else { - auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); + llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); CGF.EmitBlock(CaseBB); SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); - CGF.EmitStmt(Stmt); + CGF.EmitStmt(CapturedStmt); CGF.EmitBranch(ExitBB); } CGF.EmitBlock(ExitBB, /*IsFinished=*/true); @@ -2483,8 +2554,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.CGM.getOpenMPRuntime().emitForStaticInit( CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); - auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); - auto *MinUBGlobalUB = CGF.Builder.CreateSelect( + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); + llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); // IV = LB; @@ -2500,11 +2571,10 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. - emitPostUpdateForReductionClause( - CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { - return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); - }); + emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivates) @@ -2535,7 +2605,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); EmitSections(S); } // Emit an implicit barrier at the end. @@ -2547,9 +2617,9 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, S.hasCancel()); } @@ -2578,10 +2648,10 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); CGF.EmitOMPPrivateClause(S, SingleScope); (void)SingleScope.Privatize(); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); @@ -2598,21 +2668,21 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; - Expr *Hint = nullptr; - if (auto *HintClause = S.getSingleClause<OMPHintClause>()) + const Expr *Hint = nullptr; + if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) Hint = HintClause->getHint(); - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitCriticalRegion(*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart(), Hint); @@ -2622,7 +2692,8 @@ void CodeGenFunction::EmitOMPParallelForDirective( const OMPParallelForDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, emitDispatchForLoopBounds); @@ -2635,7 +2706,8 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( const OMPParallelForSimdDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, emitDispatchForLoopBounds); }; @@ -2647,27 +2719,28 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'sections' directive. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitSections(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, emitEmptyBoundParameters); } -void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, - const RegionCodeGenTy &BodyGen, - const TaskGenTy &TaskGen, - OMPTaskDataTy &Data) { +void CodeGenFunction::EmitOMPTaskBasedDirective( + const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, + const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, + OMPTaskDataTy &Data) { // Emit outlined function for task construct. - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto *I = CS->getCapturedDecl()->param_begin(); - auto *PartId = std::next(I); - auto *TaskT = std::next(I, 4); + const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); + auto I = CS->getCapturedDecl()->param_begin(); + auto PartId = std::next(I); + auto TaskT = std::next(I, 4); // Check if the task is final if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. - auto *Cond = Clause->getCondition(); + const Expr *Cond = Clause->getCondition(); bool CondConstant; if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) Data.Final.setInt(CondConstant); @@ -2679,7 +2752,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, } // Check if the task has 'priority' clause. if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { - auto *Prio = Clause->getPriority(); + const Expr *Prio = Clause->getPriority(); Data.Priority.setInt(/*IntVal=*/true); Data.Priority.setPointer(EmitScalarConversion( EmitScalarExpr(Prio), Prio->getType(), @@ -2692,8 +2765,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, // Get list of private variables. for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { auto IRef = C->varlist_begin(); - for (auto *IInit : C->private_copies()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { Data.PrivateVars.push_back(*IRef); Data.PrivateCopies.push_back(IInit); @@ -2706,8 +2779,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto IElemInitRef = C->inits().begin(); - for (auto *IInit : C->private_copies()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { Data.FirstprivateVars.push_back(*IRef); Data.FirstprivateCopies.push_back(IInit); @@ -2722,8 +2795,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { auto IRef = C->varlist_begin(); auto ID = C->destination_exprs().begin(); - for (auto *IInit : C->private_copies()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { Data.LastprivateVars.push_back(*IRef); Data.LastprivateCopies.push_back(IInit); @@ -2742,7 +2815,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, auto IRed = C->reduction_ops().begin(); auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); - for (const auto *Ref : C->varlists()) { + for (const Expr *Ref : C->varlists()) { Data.ReductionVars.emplace_back(Ref); Data.ReductionCopies.emplace_back(*IPriv); Data.ReductionOps.emplace_back(*IRed); @@ -2758,50 +2831,51 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) - for (auto *IRef : C->varlists()) - Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + for (const Expr *IRef : C->varlists()) + Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, + CapturedRegion](CodeGenFunction &CGF, + PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { enum { PrivatesParam = 2, CopyFnParam = 3 }; - auto *CopyFn = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); - auto *PrivatesPtr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + llvm::Value *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); + llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( + CS->getCapturedDecl()->getParam(PrivatesParam))); // Map privates. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; CallArgs.push_back(PrivatesPtr); - for (auto *E : Data.PrivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + for (const Expr *E : Data.PrivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - for (auto *E : Data.FirstprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + for (const Expr *E : Data.FirstprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), ".firstpriv.ptr.addr"); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - for (auto *E : Data.LastprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + for (const Expr *E : Data.LastprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), ".lastpriv.ptr.addr"); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), CopyFn, CallArgs); - for (auto &&Pair : LastprivateDstsOrigs) { - auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); + for (const auto &Pair : LastprivateDstsOrigs) { + const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE( const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( @@ -2811,14 +2885,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, return CGF.EmitLValue(&DRE).getAddress(); }); } - for (auto &&Pair : PrivatePtrs) { + for (const auto &Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } if (Data.Reductions) { - OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + OMPLexicalScope LexScope(CGF, S, CapturedRegion); ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, Data.ReductionOps); llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( @@ -2826,6 +2900,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { RedCG.emitSharedLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); Replacement = @@ -2833,16 +2912,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Replacement.getPointer(), CGF.getContext().VoidPtrTy, CGF.getContext().getPointerType( Data.ReductionCopies[Cnt]->getType()), - SourceLocation()), + Data.ReductionCopies[Cnt]->getExprLoc()), Replacement.getAlignment()); Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); Scope.addPrivate(RedCG.getBaseDecl(Cnt), [Replacement]() { return Replacement; }); - // FIXME: This must removed once the runtime library is fixed. - // Emit required threadprivate variables for - // initilizer/combiner/finalizer. - CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), - RedCG, Cnt); } } // Privatize all private variables except for in_reduction items. @@ -2855,7 +2929,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); auto ITD = C->taskgroup_descriptors().begin(); - for (const auto *Ref : C->varlists()) { + for (const Expr *Ref : C->varlists()) { InRedVars.emplace_back(Ref); InRedPrivs.emplace_back(*IPriv); InRedOps.emplace_back(*IRed); @@ -2875,24 +2949,25 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, RedCG.emitAggregateType(CGF, Cnt); // The taskgroup descriptor variable is always implicit firstprivate and // privatized already during procoessing of the firstprivates. - llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( - CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + llvm::Value *ReductionsPtr = + CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), + TaskgroupDescriptors[Cnt]->getExprLoc()); Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); Replacement = Address( CGF.EmitScalarConversion( Replacement.getPointer(), CGF.getContext().VoidPtrTy, CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), - SourceLocation()), + InRedPrivs[Cnt]->getExprLoc()), Replacement.getAlignment()); Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), [Replacement]() { return Replacement; }); - // FIXME: This must removed once the runtime library is fixed. - // Emit required threadprivate variables for - // initilizer/combiner/finalizer. - CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), - RedCG, Cnt); } } (void)InRedScope.Privatize(); @@ -2900,7 +2975,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Action.Enter(CGF); BodyGen(CGF); }; - auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); OMPLexicalScope Scope(*this, S); @@ -2909,27 +2984,24 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, static ImplicitParamDecl * createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, - QualType Ty, CapturedDecl *CD) { - auto *OrigVD = ImplicitParamDecl::Create( - C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); - auto *OrigRef = - DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, - /*RefersToEnclosingVariableOrCapture=*/false, - SourceLocation(), Ty, VK_LValue); - auto *PrivateVD = ImplicitParamDecl::Create( - C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + QualType Ty, CapturedDecl *CD, + SourceLocation Loc) { + auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, + ImplicitParamDecl::Other); + auto *OrigRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, + /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); + auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, + ImplicitParamDecl::Other); auto *PrivateRef = DeclRefExpr::Create( C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, - /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty, - VK_LValue); + /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); QualType ElemType = C.getBaseElementType(Ty); - auto *InitVD = - ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr, - ElemType, ImplicitParamDecl::Other); - auto *InitRef = - DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, - /*RefersToEnclosingVariableOrCapture=*/false, - SourceLocation(), ElemType, VK_LValue); + auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, + ImplicitParamDecl::Other); + auto *InitRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, + /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); PrivateVD->setInitStyle(VarDecl::CInit); PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, InitRef, /*BasePath=*/nullptr, @@ -2944,12 +3016,12 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo) { // Emit outlined function for task construct. - auto CS = S.getCapturedStmt(OMPD_task); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); - auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); - auto *I = CS->getCapturedDecl()->param_begin(); - auto *PartId = std::next(I); - auto *TaskT = std::next(I, 4); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); + Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + auto I = CS->getCapturedDecl()->param_begin(); + auto PartId = std::next(I); + auto TaskT = std::next(I, 4); OMPTaskDataTy Data; // The task is not final. Data.Final.setInt(/*IntVal=*/false); @@ -2976,14 +3048,15 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( QualType BaseAndPointersType = getContext().getConstantArrayType( getContext().VoidPtrTy, ArrSize, ArrayType::Normal, /*IndexTypeQuals=*/0); - BPVD = createImplicitFirstprivateForType(getContext(), Data, - BaseAndPointersType, CD); - PVD = createImplicitFirstprivateForType(getContext(), Data, - BaseAndPointersType, CD); + BPVD = createImplicitFirstprivateForType( + getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); + PVD = createImplicitFirstprivateForType( + getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); QualType SizesType = getContext().getConstantArrayType( getContext().getSizeType(), ArrSize, ArrayType::Normal, /*IndexTypeQuals=*/0); - SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD); + SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, + S.getLocStart()); TargetScope.addPrivate( BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); TargetScope.addPrivate(PVD, @@ -2994,33 +3067,33 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( (void)TargetScope.Privatize(); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) - for (auto *IRef : C->varlists()) - Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + for (const Expr *IRef : C->varlists()) + Data.Dependences.emplace_back(C->getDependencyKind(), IRef); auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); if (!Data.FirstprivateVars.empty()) { enum { PrivatesParam = 2, CopyFnParam = 3 }; - auto *CopyFn = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); - auto *PrivatesPtr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + llvm::Value *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); + llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( + CS->getCapturedDecl()->getParam(PrivatesParam))); // Map privates. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; CallArgs.push_back(PrivatesPtr); - for (auto *E : Data.FirstprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + for (const Expr *E : Data.FirstprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), ".firstpriv.ptr.addr"); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), CopyFn, CallArgs); - for (auto &&Pair : PrivatePtrs) { + for (const auto &Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); @@ -3028,19 +3101,20 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } // Privatize all private variables except for in_reduction items. (void)Scope.Privatize(); - InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); - InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); - InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + if (InputInfo.NumberOfTargetItems > 0) { + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + } Action.Enter(CGF); - OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true, - /*EmitPreInitStmt=*/false); + OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); BodyGen(CGF); }; - auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, Data.NumberOfParts); llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); @@ -3054,9 +3128,9 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); - auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); + Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { if (C->getNameModifier() == OMPD_unknown || @@ -3079,7 +3153,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { SharedsTy, CapturedStruct, IfCond, Data); }; - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); } void CodeGenFunction::EmitOMPTaskyieldDirective( @@ -3108,7 +3182,7 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( auto IRed = C->reduction_ops().begin(); auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); - for (const auto *Ref : C->varlists()) { + for (const Expr *Ref : C->varlists()) { Data.ReductionVars.emplace_back(Ref); Data.ReductionCopies.emplace_back(*IPriv); Data.ReductionOps.emplace_back(*IRed); @@ -3128,40 +3202,42 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), /*Volatile=*/false, E->getType()); } - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { - CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { - if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { - return llvm::makeArrayRef(FlushClause->varlist_begin(), - FlushClause->varlist_end()); - } - return llvm::None; - }(), S.getLocStart()); + CGM.getOpenMPRuntime().emitFlush( + *this, + [&S]() -> ArrayRef<const Expr *> { + if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) + return llvm::makeArrayRef(FlushClause->varlist_begin(), + FlushClause->varlist_end()); + return llvm::None; + }(), + S.getLocStart()); } void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr) { // Emit the loop iteration variable. - auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); - auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); + const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); + const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); EmitVarDecl(*IVDecl); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on each // iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); // Emit calculation of the iterations count. EmitIgnoredExpr(S.getCalcLastIteration()); } - auto &RT = CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); bool HasLastprivateClause = false; // Check pre-condition. @@ -3176,7 +3252,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (!CondConstant) return; } else { - auto *ThenBlock = createBasicBlock("omp.precond.then"); + llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); ContBlock = createBasicBlock("omp.precond.end"); emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, getProfileCount(&S)); @@ -3225,9 +3301,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // Detect the distribute schedule kind and chunk. llvm::Value *Chunk = nullptr; OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; - if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { + if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { ScheduleKind = C->getDistScheduleKind(); - if (const auto *Ch = C->getChunkSize()) { + if (const Expr *Ch = C->getChunkSize()) { Chunk = EmitScalarExpr(Ch); Chunk = EmitScalarConversion(Chunk, Ch->getType(), S.getIterationVariable()->getType(), @@ -3254,7 +3330,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, LB.getAddress(), UB.getAddress(), ST.getAddress()); RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); - auto LoopExit = + JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) @@ -3265,9 +3341,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, ? S.getCombinedInit() : S.getInit()); - Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedCond() - : S.getCond(); + const Expr *Cond = + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); // for distribute alone, codegen // while (idx <= UB) { BODY; ++idx; } @@ -3291,31 +3368,35 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, CodeGenLoop); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { - EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { + EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); } - OpenMPDirectiveKind ReductionKind = OMPD_unknown; - if (isOpenMPParallelDirective(S.getDirectiveKind()) && - isOpenMPSimdDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_parallel_for_simd; - } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_parallel_for; - } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_simd; - } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && - S.hasClausesOfKind<OMPReductionClause>()) { - llvm_unreachable( - "No reduction clauses is allowed in distribute directive."); + if (isOpenMPSimdDirective(S.getDirectiveKind()) && + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPTeamsDirective(S.getDirectiveKind())) { + OpenMPDirectiveKind ReductionKind = OMPD_unknown; + if (isOpenMPParallelDirective(S.getDirectiveKind()) && + isOpenMPSimdDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_parallel_for_simd; + } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_parallel_for; + } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_simd; + } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && + S.hasClausesOfKind<OMPReductionClause>()) { + llvm_unreachable( + "No reduction clauses is allowed in distribute directive."); + } + EmitOMPReductionClauseFinal(S, ReductionKind); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + *this, S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); } - EmitOMPReductionClauseFinal(S, ReductionKind); - // Emit post-update of the reduction variables if IsLastIter != 0. - emitPostUpdateForReductionClause( - *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { - return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); - }); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivateClause) { EmitOMPLastprivateClauseFinal( @@ -3335,10 +3416,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } @@ -3347,34 +3427,35 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; - auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); - Fn->addFnAttr(llvm::Attribute::NoInline); + llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); + Fn->setDoesNotRecurse(); return Fn; } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { - if (!S.getAssociatedStmt()) { + if (S.hasClausesOfKind<OMPDependClause>()) { + assert(!S.getAssociatedStmt() && + "No associated statement must be in ordered depend construct."); for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); return; } - auto *C = S.getSingleClause<OMPSIMDClause>(); + const auto *C = S.getSingleClause<OMPSIMDClause>(); auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, PrePostActionTy &Action) { + const CapturedStmt *CS = S.getInnermostCapturedStmt(); if (C) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars); } else { Action.Enter(CGF); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(CS->getCapturedStmt()); } }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } @@ -3384,11 +3465,10 @@ static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, assert(CGF.hasScalarEvaluationKind(DestType) && "DestType must have scalar evaluation kind."); assert(!Val.isAggregate() && "Must be a scalar or complex."); - return Val.isScalar() - ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, - Loc) - : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, - DestType, Loc); + return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, + DestType, Loc) + : CGF.EmitComplexToScalarConversion( + Val.getComplexVal(), SrcType, DestType, Loc); } static CodeGenFunction::ComplexPairTy @@ -3399,15 +3479,17 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, CodeGenFunction::ComplexPairTy ComplexVal; if (Val.isScalar()) { // Convert the input element to the element type of the complex. - auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); - auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, - DestElementType, Loc); + QualType DestElementType = + DestType->castAs<ComplexType>()->getElementType(); + llvm::Value *ScalarVal = CGF.EmitScalarConversion( + Val.getScalarVal(), SrcType, DestElementType, Loc); ComplexVal = CodeGenFunction::ComplexPairTy( ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); } else { assert(Val.isComplex() && "Must be a scalar or complex."); - auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); - auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); + QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); + QualType DestElementType = + DestType->castAs<ComplexType>()->getElementType(); ComplexVal.first = CGF.EmitScalarConversion( Val.getComplexVal().first, SrcElementType, DestElementType, Loc); ComplexVal.second = CGF.EmitScalarConversion( @@ -3446,7 +3528,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, } } -static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, const Expr *X, const Expr *V, SourceLocation Loc) { // v = x; @@ -3470,7 +3552,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); } -static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, const Expr *X, const Expr *E, SourceLocation Loc) { // x = expr; @@ -3489,7 +3571,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, BinaryOperatorKind BO, llvm::AtomicOrdering AO, bool IsXLHSInRHSPart) { - auto &Context = CGF.CGM.getContext(); + ASTContext &Context = CGF.getContext(); // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' // expression is simple and atomic is allowed for the given type for the // target platform. @@ -3567,20 +3649,21 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, case BO_Comma: llvm_unreachable("Unsupported atomic update operation"); } - auto *UpdateVal = Update.getScalarVal(); + llvm::Value *UpdateVal = Update.getScalarVal(); if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { UpdateVal = CGF.Builder.CreateIntCast( IC, X.getAddress().getElementType(), X.getType()->hasSignedIntegerRepresentation()); } - auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); + llvm::Value *Res = + CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, llvm::AtomicOrdering AO, SourceLocation Loc, - const llvm::function_ref<RValue(RValue)> &CommonGen) { + const llvm::function_ref<RValue(RValue)> CommonGen) { // Update expressions are allowed to have the following forms: // x binop= expr; -> xrval + expr; // x++, ++x -> xrval + 1; @@ -3601,13 +3684,13 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( return Res; } -static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && "Update expr in 'atomic update' must be a binary operator."); - auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); + const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); // Update expressions are allowed to have the following forms: // x binop= expr; -> xrval + expr; // x++, ++x -> xrval + 1; @@ -3617,18 +3700,18 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic; - auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); - auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); - auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; - auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; - auto Gen = - [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { - CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); - CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); - return CGF.EmitAnyExpr(UE); - }; + llvm::AtomicOrdering AO = IsSeqCst + ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic; + const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); + const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); + const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); + return CGF.EmitAnyExpr(UE); + }; (void)CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); // OpenMP, 2.12.6, atomic Construct @@ -3656,7 +3739,7 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value, llvm_unreachable("Must be a scalar or complex."); } -static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, bool IsPostfixUpdate, const Expr *V, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, @@ -3667,27 +3750,28 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, LValue VLValue = CGF.EmitLValue(V); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic; + llvm::AtomicOrdering AO = IsSeqCst + ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic; QualType NewVValType; if (UE) { // 'x' is updated with some additional value. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && "Update expr in 'atomic capture' must be a binary operator."); - auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); + const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); // Update expressions are allowed to have the following forms: // x binop= expr; -> xrval + expr; // x++, ++x -> xrval + 1; // x--, --x -> xrval - 1; // x = x binop expr; -> xrval binop expr // x = expr Op x; - > expr binop xrval; - auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); - auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); - auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); + const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); + const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; NewVValType = XRValExpr->getType(); - auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, - IsPostfixUpdate](RValue XRValue) -> RValue { + IsPostfixUpdate](RValue XRValue) { CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); RValue Res = CGF.EmitAnyExpr(UE); @@ -3714,7 +3798,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = X->getType().getNonReferenceType(); ExprRValue = convertToType(CGF, ExprRValue, E->getType(), X->getType().getNonReferenceType(), Loc); - auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { + auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { NewVVal = XRValue; return ExprRValue; }; @@ -3737,24 +3821,24 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); } -static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, +static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, bool IsSeqCst, bool IsPostfixUpdate, const Expr *X, const Expr *V, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { switch (Kind) { case OMPC_read: - EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); break; case OMPC_write: - EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); + emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); break; case OMPC_unknown: case OMPC_update: - EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); + emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_capture: - EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, + emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_if: @@ -3810,7 +3894,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); OpenMPClauseKind Kind = OMPC_unknown; - for (auto *C : S.clauses()) { + for (const OMPClause *C : S.clauses()) { // Find first clause (skip seq_cst clause, if it is first). if (C->getClauseKind() != OMPC_seq_cst) { Kind = C->getClauseKind(); @@ -3818,28 +3902,25 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } } - const auto *CS = - S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { + const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); + if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) enterFullExpression(EWC); - } // Processing for statements under 'atomic capture'. if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { - for (const auto *C : Compound->body()) { - if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { + for (const Stmt *C : Compound->body()) { + if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) enterFullExpression(EWC); - } } } auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStopPoint(CS); - EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), + emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); }; - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } @@ -3848,7 +3929,16 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) { assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); CodeGenModule &CGM = CGF.CGM; - const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target); + + // On device emit this construct as inlined code. + if (CGM.getLangOpts().OpenMPIsDevice) { + OMPLexicalScope Scope(CGF, S, OMPD_target); + CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }); + return; + } llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -3865,9 +3955,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; - if (auto *C = S.getSingleClause<OMPDeviceClause>()) { + if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - } // Check if we have an if clause whose conditional always evaluates to false // or if we do not have any targets specified. If so the target region is not @@ -3885,9 +3974,9 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, StringRef ParentName; // In case we have Ctors/Dtors we use the complete type variant to produce // the mangling of the device outlined kernel. - if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) + if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); - else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) + else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); else ParentName = @@ -3896,22 +3985,19 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); - OMPLexicalScope Scope(CGF, S); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, - CapturedVars); + OMPLexicalScope Scope(CGF, S, OMPD_task); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); } static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, @@ -3940,14 +4026,15 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + llvm::Value *OutlinedFn = + CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); - const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); - const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); + const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); + const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); if (NT || TL) { - Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; - Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; + const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; + const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, S.getLocStart()); @@ -3962,18 +4049,19 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { // Emit teams region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); - emitPostUpdateForReductionClause( - *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); } static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, @@ -3982,18 +4070,18 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, Action.Enter(CGF); // Emit teams region as a standalone region. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); - emitPostUpdateForReductionClause( - CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); } void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( @@ -4028,7 +4116,8 @@ emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4073,7 +4162,8 @@ static void emitTargetTeamsDistributeSimdRegion( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4117,7 +4207,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4138,7 +4229,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4160,7 +4252,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4182,7 +4275,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4195,6 +4289,109 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( [](CodeGenFunction &) { return nullptr; }); } +static void emitTargetTeamsDistributeParallelForRegion( + CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeParallelForDirective &S) { + // Emit SPMD target teams distribute parallel for region as a standalone + // region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( + const OMPTargetTeamsDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitTargetTeamsDistributeParallelForSimdRegion( + CodeGenFunction &CGF, + const OMPTargetTeamsDistributeParallelForSimdDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + // Emit SPMD target teams distribute parallel for simd region as a standalone + // region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), @@ -4234,19 +4431,19 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( const auto &C = cast<OMPUseDevicePtrClause>(NC); auto OrigVarIt = C.varlist_begin(); auto InitIt = C.inits().begin(); - for (auto PvtVarIt : C.private_copies()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); - auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); - auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); + for (const Expr *PvtVarIt : C.private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); + const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); + const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); // In order to identify the right initializer we need to match the // declaration used by the mapping logic. In some cases we may get // OMPCapturedExprDecl that refers to the original declaration. const ValueDecl *MatchingVD = OrigVD; - if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { // OMPCapturedExprDecl are used to privative fields of the current // structure. - auto *ME = cast<MemberExpr>(OED->getInit()); + const auto *ME = cast<MemberExpr>(OED->getInit()); assert(isa<CXXThisExpr>(ME->getBase()) && "Base should be the current struct!"); MatchingVD = ME->getMemberDecl(); @@ -4258,7 +4455,9 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( if (InitAddrIt == CaptureDeviceAddrMap.end()) continue; - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, + InitAddrIt, InitVD, + PvtVD]() { // Initialize the temporary initialization variable with the address we // get from the runtime library. We have to cast the source address // because it is always a void *. References are materialized in the @@ -4275,7 +4474,7 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( EmitDecl(*PvtVD); // The initialization variables reached its purpose in the emission - // ofthe previous declaration, so we don't need it anymore. + // of the previous declaration, so we don't need it anymore. LocalDeclMap.erase(InitVD); // Return the address of the private variable. @@ -4312,13 +4511,12 @@ void CodeGenFunction::EmitOMPTargetDataDirective( DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + CodeGenFunction &CGF, PrePostActionTy &Action) { auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; - // Codegen that selects wheather to generate the privatization code or not. + // Codegen that selects whether to generate the privatization code or not. auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, &InnermostCodeGen](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -4337,8 +4535,9 @@ void CodeGenFunction::EmitOMPTargetDataDirective( Info.CaptureDeviceAddrMap); (void)PrivateScope.Privatize(); RCG(CGF); - } else + } else { RCG(CGF); + } }; // Forward the provided action to the privatization codegen. @@ -4364,12 +4563,12 @@ void CodeGenFunction::EmitOMPTargetDataDirective( // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - if (auto *C = S.getSingleClause<OMPIfClause>()) + if (const auto *C = S.getSingleClause<OMPIfClause>()) IfCond = C->getCondition(); // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; - if (auto *C = S.getSingleClause<OMPDeviceClause>()) + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); // Set the action to signal privatization of device pointers. @@ -4389,15 +4588,15 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective( // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - if (auto *C = S.getSingleClause<OMPIfClause>()) + if (const auto *C = S.getSingleClause<OMPIfClause>()) IfCond = C->getCondition(); // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; - if (auto *C = S.getSingleClause<OMPDeviceClause>()) + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_task); CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } @@ -4410,15 +4609,15 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - if (auto *C = S.getSingleClause<OMPIfClause>()) + if (const auto *C = S.getSingleClause<OMPIfClause>()) IfCond = C->getCondition(); // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; - if (auto *C = S.getSingleClause<OMPDeviceClause>()) + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_task); CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } @@ -4426,9 +4625,10 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, const OMPTargetParallelDirective &S, PrePostActionTy &Action) { // Get the captured statement associated with the 'parallel' region. - auto *CS = S.getCapturedStmt(OMPD_parallel); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); Action.Enter(CGF); - auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); @@ -4440,8 +4640,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, }; emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, emitEmptyBoundParameters); - emitPostUpdateForReductionClause( - CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); } void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( @@ -4472,7 +4672,8 @@ static void emitTargetParallelForRegion(CodeGenFunction &CGF, Action.Enter(CGF); // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPCancelStackRAII CancelRegion( CGF, OMPD_target_parallel_for, S.hasCancel()); CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, @@ -4512,7 +4713,8 @@ emitTargetParallelForSimdRegion(CodeGenFunction &CGF, Action.Enter(CGF); // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, emitDispatchForLoopBounds); }; @@ -4547,17 +4749,17 @@ void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, const ImplicitParamDecl *PVD, CodeGenFunction::OMPPrivateScope &Privates) { - auto *VDecl = cast<VarDecl>(Helper->getDecl()); - Privates.addPrivate( - VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); + const auto *VDecl = cast<VarDecl>(Helper->getDecl()); + Privates.addPrivate(VDecl, + [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); } void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); // Emit outlined function for task construct. - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); - auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); + Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { if (C->getNameModifier() == OMPD_unknown || @@ -4600,7 +4802,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { if (!CondConstant) return; } else { - auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); + llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); ContBlock = CGF.createBasicBlock("taskloop.if.end"); emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, CGF.getProfileCount(&S)); @@ -4631,14 +4833,14 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { (void)LoopScope.Privatize(); // Emit the loop iteration variable. const Expr *IVExpr = S.getIterationVariable(); - const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); CGF.EmitVarDecl(*IVDecl); CGF.EmitIgnoredExpr(S.getInit()); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on // each iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); // Emit calculation of the iterations count. CGF.EmitIgnoredExpr(S.getCalcLastIteration()); @@ -4668,7 +4870,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, const OMPTaskDataTy &Data) { - auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, + &Data](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, OutlinedFn, SharedsTy, @@ -4677,15 +4880,16 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); }; - if (Data.Nogroup) - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); - else { + if (Data.Nogroup) { + EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); + } else { CGM.getOpenMPRuntime().emitTaskgroupRegion( *this, [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, + Data); }, S.getLocStart()); } @@ -4710,14 +4914,44 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - if (auto *C = S.getSingleClause<OMPIfClause>()) + if (const auto *C = S.getSingleClause<OMPIfClause>()) IfCond = C->getCondition(); // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; - if (auto *C = S.getSingleClause<OMPDeviceClause>()) + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + OMPLexicalScope Scope(*this, S, OMPD_task); CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } + +void CodeGenFunction::EmitSimpleOMPExecutableDirective( + const OMPExecutableDirective &D) { + if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) + return; + auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); + } else { + if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { + for (const Expr *E : LD->counters()) { + if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( + cast<DeclRefExpr>(E)->getDecl())) { + // Emit only those that were not explicitly referenced in clauses. + if (!CGF.LocalDeclMap.count(VD)) + CGF.EmitVarDecl(*VD); + } + } + } + CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); + } + }; + OMPSimdLexicalScope Scope(*this, D); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, + isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd + : D.getDirectiveKind(), + CodeGen); +} + diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp index 78928d04220d..41c8c943f54d 100644 --- a/lib/CodeGen/CGVTT.cpp +++ b/lib/CodeGen/CGVTT.cpp @@ -100,7 +100,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName())); // Set the right visibility. - CGM.setGlobalVisibility(VTT, RD, ForDefinition); + CGM.setGVProperties(VTT, RD); } llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index 2d9bf3bce926..5a2ec65f7763 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -31,29 +31,12 @@ using namespace CodeGen; CodeGenVTables::CodeGenVTables(CodeGenModule &CGM) : CGM(CGM), VTContext(CGM.getContext().getVTableContext()) {} -llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, - const ThunkInfo &Thunk) { - const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - - // Compute the mangled name. - SmallString<256> Name; - llvm::raw_svector_ostream Out(Name); - if (const CXXDestructorDecl* DD = dyn_cast<CXXDestructorDecl>(MD)) - getCXXABI().getMangleContext().mangleCXXDtorThunk(DD, GD.getDtorType(), - Thunk.This, Out); - else - getCXXABI().getMangleContext().mangleThunk(MD, Thunk, Out); - - llvm::Type *Ty = getTypes().GetFunctionTypeForVTable(GD); - return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true, +llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy, + GlobalDecl GD) { + return GetOrCreateLLVMFunction(Name, FnTy, GD, /*ForVTable=*/true, /*DontDefer=*/true, /*IsThunk=*/true); } -static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD, - const ThunkInfo &Thunk, llvm::Function *Fn) { - CGM.setGlobalVisibility(Fn, MD, ForDefinition); -} - static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk, llvm::Function *ThunkFn, bool ForVTable, GlobalDecl GD) { @@ -62,8 +45,12 @@ static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk, !Thunk.Return.isEmpty()); // Set the right visibility. - const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - setThunkVisibility(CGM, MD, Thunk, ThunkFn); + CGM.setGVProperties(ThunkFn, GD); + + if (!CGM.getCXXABI().exportThunk()) { + ThunkFn->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + ThunkFn->setDSOLocal(true); + } if (CGM.supportsCOMDAT() && ThunkFn->isWeakForLinker()) ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName())); @@ -236,7 +223,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, } void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, - const CGFunctionInfo &FnInfo) { + const CGFunctionInfo &FnInfo, + bool IsUnprototyped) { assert(!CurGD.getDecl() && "CurGD was already set!"); CurGD = GD; CurFuncIsThunk = true; @@ -245,21 +233,28 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); QualType ThisType = MD->getThisType(getContext()); const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); - QualType ResultType = CGM.getCXXABI().HasThisReturn(GD) - ? ThisType - : CGM.getCXXABI().hasMostDerivedReturn(GD) - ? CGM.getContext().VoidPtrTy - : FPT->getReturnType(); + QualType ResultType; + if (IsUnprototyped) + ResultType = CGM.getContext().VoidTy; + else if (CGM.getCXXABI().HasThisReturn(GD)) + ResultType = ThisType; + else if (CGM.getCXXABI().hasMostDerivedReturn(GD)) + ResultType = CGM.getContext().VoidPtrTy; + else + ResultType = FPT->getReturnType(); FunctionArgList FunctionArgs; // Create the implicit 'this' parameter declaration. CGM.getCXXABI().buildThisParam(*this, FunctionArgs); - // Add the rest of the parameters. - FunctionArgs.append(MD->param_begin(), MD->param_end()); + // Add the rest of the parameters, if we have a prototype to work with. + if (!IsUnprototyped) { + FunctionArgs.append(MD->param_begin(), MD->param_end()); - if (isa<CXXDestructorDecl>(MD)) - CGM.getCXXABI().addImplicitStructorParams(*this, ResultType, FunctionArgs); + if (isa<CXXDestructorDecl>(MD)) + CGM.getCXXABI().addImplicitStructorParams(*this, ResultType, + FunctionArgs); + } // Start defining the function. auto NL = ApplyDebugLocation::CreateEmpty(*this); @@ -285,7 +280,8 @@ void CodeGenFunction::FinishThunk() { } void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, - const ThunkInfo *Thunk) { + const ThunkInfo *Thunk, + bool IsUnprototyped) { assert(isa<CXXMethodDecl>(CurGD.getDecl()) && "Please use a new CGF for this thunk"); const CXXMethodDecl *MD = cast<CXXMethodDecl>(CurGD.getDecl()); @@ -296,13 +292,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, *this, LoadCXXThisAddress(), Thunk->This) : LoadCXXThis(); - if (CurFnInfo->usesInAlloca()) { + if (CurFnInfo->usesInAlloca() || IsUnprototyped) { // We don't handle return adjusting thunks, because they require us to call // the copy constructor. For now, fall through and pretend the return // adjustment was empty so we don't crash. if (Thunk && !Thunk->Return.isEmpty()) { - CGM.ErrorUnsupported( - MD, "non-trivial argument copy for return-adjusting thunk"); + if (IsUnprototyped) + CGM.ErrorUnsupported( + MD, "return-adjusting thunk with incomplete parameter type"); + else + CGM.ErrorUnsupported( + MD, "non-trivial argument copy for return-adjusting thunk"); } EmitMustTailThunk(MD, AdjustedThisPtr, CalleePtr); return; @@ -429,55 +429,98 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, } void CodeGenFunction::generateThunk(llvm::Function *Fn, - const CGFunctionInfo &FnInfo, - GlobalDecl GD, const ThunkInfo &Thunk) { - StartThunk(Fn, GD, FnInfo); + const CGFunctionInfo &FnInfo, GlobalDecl GD, + const ThunkInfo &Thunk, + bool IsUnprototyped) { + StartThunk(Fn, GD, FnInfo, IsUnprototyped); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); - // Get our callee. - llvm::Type *Ty = - CGM.getTypes().GetFunctionType(CGM.getTypes().arrangeGlobalDeclaration(GD)); + // Get our callee. Use a placeholder type if this method is unprototyped so + // that CodeGenModule doesn't try to set attributes. + llvm::Type *Ty; + if (IsUnprototyped) + Ty = llvm::StructType::get(getLLVMContext()); + else + Ty = CGM.getTypes().GetFunctionType(FnInfo); + llvm::Constant *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true); + // Fix up the function type for an unprototyped musttail call. + if (IsUnprototyped) + Callee = llvm::ConstantExpr::getBitCast(Callee, Fn->getType()); + // Make the call and return the result. - EmitCallAndReturnForThunk(Callee, &Thunk); + EmitCallAndReturnForThunk(Callee, &Thunk, IsUnprototyped); } -void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk, - bool ForVTable) { - const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeGlobalDeclaration(GD); +static bool shouldEmitVTableThunk(CodeGenModule &CGM, const CXXMethodDecl *MD, + bool IsUnprototyped, bool ForVTable) { + // Always emit thunks in the MS C++ ABI. We cannot rely on other TUs to + // provide thunks for us. + if (CGM.getTarget().getCXXABI().isMicrosoft()) + return true; - // FIXME: re-use FnInfo in this computation. - llvm::Constant *C = CGM.GetAddrOfThunk(GD, Thunk); - llvm::GlobalValue *Entry; + // In the Itanium C++ ABI, vtable thunks are provided by TUs that provide + // definitions of the main method. Therefore, emitting thunks with the vtable + // is purely an optimization. Emit the thunk if optimizations are enabled and + // all of the parameter types are complete. + if (ForVTable) + return CGM.getCodeGenOpts().OptimizationLevel && !IsUnprototyped; - // Strip off a bitcast if we got one back. - if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(C)) { - assert(CE->getOpcode() == llvm::Instruction::BitCast); - Entry = cast<llvm::GlobalValue>(CE->getOperand(0)); - } else { - Entry = cast<llvm::GlobalValue>(C); - } + // Always emit thunks along with the method definition. + return true; +} - // There's already a declaration with the same name, check if it has the same - // type or if we need to replace it. - if (Entry->getType()->getElementType() != - CGM.getTypes().GetFunctionTypeForVTable(GD)) { - llvm::GlobalValue *OldThunkFn = Entry; +llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD, + const ThunkInfo &TI, + bool ForVTable) { + const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - // If the types mismatch then we have to rewrite the definition. - assert(OldThunkFn->isDeclaration() && - "Shouldn't replace non-declaration"); + // First, get a declaration. Compute the mangled name. Don't worry about + // getting the function prototype right, since we may only need this + // declaration to fill in a vtable slot. + SmallString<256> Name; + MangleContext &MCtx = CGM.getCXXABI().getMangleContext(); + llvm::raw_svector_ostream Out(Name); + if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) + MCtx.mangleCXXDtorThunk(DD, GD.getDtorType(), TI.This, Out); + else + MCtx.mangleThunk(MD, TI, Out); + llvm::Type *ThunkVTableTy = CGM.getTypes().GetFunctionTypeForVTable(GD); + llvm::Constant *Thunk = CGM.GetAddrOfThunk(Name, ThunkVTableTy, GD); + + // If we don't need to emit a definition, return this declaration as is. + bool IsUnprototyped = !CGM.getTypes().isFuncTypeConvertible( + MD->getType()->castAs<FunctionType>()); + if (!shouldEmitVTableThunk(CGM, MD, IsUnprototyped, ForVTable)) + return Thunk; + + // Arrange a function prototype appropriate for a function definition. In some + // cases in the MS ABI, we may need to build an unprototyped musttail thunk. + const CGFunctionInfo &FnInfo = + IsUnprototyped ? CGM.getTypes().arrangeUnprototypedMustTailThunk(MD) + : CGM.getTypes().arrangeGlobalDeclaration(GD); + llvm::FunctionType *ThunkFnTy = CGM.getTypes().GetFunctionType(FnInfo); + + // If the type of the underlying GlobalValue is wrong, we'll have to replace + // it. It should be a declaration. + llvm::Function *ThunkFn = cast<llvm::Function>(Thunk->stripPointerCasts()); + if (ThunkFn->getFunctionType() != ThunkFnTy) { + llvm::GlobalValue *OldThunkFn = ThunkFn; + + assert(OldThunkFn->isDeclaration() && "Shouldn't replace non-declaration"); // Remove the name from the old thunk function and get a new thunk. OldThunkFn->setName(StringRef()); - Entry = cast<llvm::GlobalValue>(CGM.GetAddrOfThunk(GD, Thunk)); + ThunkFn = llvm::Function::Create(ThunkFnTy, llvm::Function::ExternalLinkage, + Name.str(), &CGM.getModule()); + CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn); // If needed, replace the old thunk with a bitcast. if (!OldThunkFn->use_empty()) { llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(Entry, OldThunkFn->getType()); + llvm::ConstantExpr::getBitCast(ThunkFn, OldThunkFn->getType()); OldThunkFn->replaceAllUsesWith(NewPtrForOldDecl); } @@ -485,61 +528,48 @@ void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk, OldThunkFn->eraseFromParent(); } - llvm::Function *ThunkFn = cast<llvm::Function>(Entry); bool ABIHasKeyFunctions = CGM.getTarget().getCXXABI().hasKeyFunctions(); bool UseAvailableExternallyLinkage = ForVTable && ABIHasKeyFunctions; if (!ThunkFn->isDeclaration()) { if (!ABIHasKeyFunctions || UseAvailableExternallyLinkage) { // There is already a thunk emitted for this function, do nothing. - return; + return ThunkFn; } - setThunkProperties(CGM, Thunk, ThunkFn, ForVTable, GD); - return; + setThunkProperties(CGM, TI, ThunkFn, ForVTable, GD); + return ThunkFn; } + // If this will be unprototyped, add the "thunk" attribute so that LLVM knows + // that the return type is meaningless. These thunks can be used to call + // functions with differing return types, and the caller is required to cast + // the prototype appropriately to extract the correct value. + if (IsUnprototyped) + ThunkFn->addFnAttr("thunk"); + CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn); - if (ThunkFn->isVarArg()) { + if (!IsUnprototyped && ThunkFn->isVarArg()) { // Varargs thunks are special; we can't just generate a call because // we can't copy the varargs. Our implementation is rather // expensive/sucky at the moment, so don't generate the thunk unless // we have to. // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly. if (UseAvailableExternallyLinkage) - return; - ThunkFn = - CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, Thunk); + return ThunkFn; + ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, + TI); } else { // Normal thunk body generation. - CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, Thunk); + CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped); } - setThunkProperties(CGM, Thunk, ThunkFn, ForVTable, GD); -} - -void CodeGenVTables::maybeEmitThunkForVTable(GlobalDecl GD, - const ThunkInfo &Thunk) { - // If the ABI has key functions, only the TU with the key function should emit - // the thunk. However, we can allow inlining of thunks if we emit them with - // available_externally linkage together with vtables when optimizations are - // enabled. - if (CGM.getTarget().getCXXABI().hasKeyFunctions() && - !CGM.getCodeGenOpts().OptimizationLevel) - return; - - // We can't emit thunks for member functions with incomplete types. - const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - if (!CGM.getTypes().isFuncTypeConvertible( - MD->getType()->castAs<FunctionType>())) - return; - - emitThunk(GD, Thunk, /*ForVTable=*/true); + setThunkProperties(CGM, TI, ThunkFn, ForVTable, GD); + return ThunkFn; } -void CodeGenVTables::EmitThunks(GlobalDecl GD) -{ +void CodeGenVTables::EmitThunks(GlobalDecl GD) { const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl())->getCanonicalDecl(); @@ -554,7 +584,7 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD) return; for (const ThunkInfo& Thunk : *ThunkInfoVector) - emitThunk(GD, Thunk, /*ForVTable=*/false); + maybeEmitThunk(GD, Thunk, /*ForVTable=*/false); } void CodeGenVTables::addVTableComponent( @@ -647,9 +677,8 @@ void CodeGenVTables::addVTableComponent( layout.vtable_thunks()[nextVTableThunkIndex].first == idx) { auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second; - maybeEmitThunkForVTable(GD, thunkInfo); nextVTableThunkIndex++; - fnPtr = CGM.GetAddrOfThunk(GD, thunkInfo); + fnPtr = maybeEmitThunk(GD, thunkInfo, /*ForVTable=*/true); // Otherwise we can use the method definition directly. } else { @@ -730,7 +759,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, // Create the variable that will hold the construction vtable. llvm::GlobalVariable *VTable = CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage); - CGM.setGlobalVisibility(VTable, RD, ForDefinition); + CGM.setGVProperties(VTable, RD); // V-tables are always unnamed_addr. VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); @@ -845,7 +874,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { llvm_unreachable("Invalid TemplateSpecializationKind!"); } -/// This is a callback from Sema to tell us that that a particular vtable is +/// This is a callback from Sema to tell us that a particular vtable is /// required to be emitted in this translation unit. /// /// This is only called for vtables that _must_ be emitted (mainly due to key @@ -983,31 +1012,29 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits PointerWidth = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); - typedef std::pair<const CXXRecordDecl *, unsigned> BSEntry; - std::vector<BSEntry> BitsetEntries; - // Create a bit set entry for each address point. + typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint; + std::vector<AddressPoint> AddressPoints; for (auto &&AP : VTLayout.getAddressPoints()) - BitsetEntries.push_back( - std::make_pair(AP.first.getBase(), - VTLayout.getVTableOffset(AP.second.VTableIndex) + - AP.second.AddressPointIndex)); - - // Sort the bit set entries for determinism. - std::sort(BitsetEntries.begin(), BitsetEntries.end(), - [this](const BSEntry &E1, const BSEntry &E2) { - if (&E1 == &E2) + AddressPoints.push_back(std::make_pair( + AP.first.getBase(), VTLayout.getVTableOffset(AP.second.VTableIndex) + + AP.second.AddressPointIndex)); + + // Sort the address points for determinism. + llvm::sort(AddressPoints.begin(), AddressPoints.end(), + [this](const AddressPoint &AP1, const AddressPoint &AP2) { + if (&AP1 == &AP2) return false; std::string S1; llvm::raw_string_ostream O1(S1); getCXXABI().getMangleContext().mangleTypeName( - QualType(E1.first->getTypeForDecl(), 0), O1); + QualType(AP1.first->getTypeForDecl(), 0), O1); O1.flush(); std::string S2; llvm::raw_string_ostream O2(S2); getCXXABI().getMangleContext().mangleTypeName( - QualType(E2.first->getTypeForDecl(), 0), O2); + QualType(AP2.first->getTypeForDecl(), 0), O2); O2.flush(); if (S1 < S2) @@ -1015,10 +1042,26 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, if (S1 != S2) return false; - return E1.second < E2.second; + return AP1.second < AP2.second; }); - for (auto BitsetEntry : BitsetEntries) - AddVTableTypeMetadata(VTable, PointerWidth * BitsetEntry.second, - BitsetEntry.first); + ArrayRef<VTableComponent> Comps = VTLayout.vtable_components(); + for (auto AP : AddressPoints) { + // Create type metadata for the address point. + AddVTableTypeMetadata(VTable, PointerWidth * AP.second, AP.first); + + // The class associated with each address point could also potentially be + // used for indirect calls via a member function pointer, so we need to + // annotate the address of each function pointer with the appropriate member + // function pointer type. + for (unsigned I = 0; I != Comps.size(); ++I) { + if (Comps[I].getKind() != VTableComponent::CK_FunctionPointer) + continue; + llvm::Metadata *MD = CreateMetadataIdentifierForVirtualMemPtrType( + Context.getMemberPointerType( + Comps[I].getFunctionDecl()->getType(), + Context.getRecordType(AP.first).getTypePtr())); + VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD); + } + } } diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h index b92212c368a9..a11474a15ea4 100644 --- a/lib/CodeGen/CGVTables.h +++ b/lib/CodeGen/CGVTables.h @@ -57,12 +57,10 @@ class CodeGenVTables { /// Cache for the deleted virtual member call function. llvm::Constant *DeletedVirtualFn = nullptr; - /// emitThunk - Emit a single thunk. - void emitThunk(GlobalDecl GD, const ThunkInfo &Thunk, bool ForVTable); - - /// maybeEmitThunkForVTable - Emit the given thunk for the vtable if needed by - /// the ABI. - void maybeEmitThunkForVTable(GlobalDecl GD, const ThunkInfo &Thunk); + /// Get the address of a thunk and emit it if necessary. + llvm::Constant *maybeEmitThunk(GlobalDecl GD, + const ThunkInfo &ThunkAdjustments, + bool ForVTable); void addVTableComponent(ConstantArrayBuilder &builder, const VTableLayout &layout, unsigned idx, diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index 7d07ea4516c9..418bda1f41bb 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -193,7 +193,7 @@ class LValue { // The alignment to use when accessing this lvalue. (For vector elements, // this is the alignment of the whole vector.) - int64_t Alignment; + unsigned Alignment; // objective-c's ivar bool Ivar:1; @@ -215,13 +215,13 @@ class LValue { // to make the default bitfield pattern all-zeroes. bool ImpreciseLifetime : 1; - LValueBaseInfo BaseInfo; - TBAAAccessInfo TBAAInfo; - // This flag shows if a nontemporal load/stores should be used when accessing // this lvalue. bool Nontemporal : 1; + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Expr *BaseIvarExp; private: @@ -231,7 +231,10 @@ private: "initializing l-value with zero alignment!"); this->Type = Type; this->Quals = Quals; - this->Alignment = Alignment.getQuantity(); + const unsigned MaxAlign = 1U << 31; + this->Alignment = Alignment.getQuantity() <= MaxAlign + ? Alignment.getQuantity() + : MaxAlign; assert(this->Alignment == Alignment.getQuantity() && "Alignment exceeds allowed max!"); this->BaseInfo = BaseInfo; @@ -398,7 +401,7 @@ public: return R; } - /// \brief Create a new object to represent a bit-field access. + /// Create a new object to represent a bit-field access. /// /// \param Addr - The base address of the bit-field sequence this /// bit-field refers to. @@ -469,17 +472,25 @@ class AggValueSlot { /// evaluating an expression which constructs such an object. bool AliasedFlag : 1; + /// This is set to true if the tail padding of this slot might overlap + /// another object that may have already been initialized (and whose + /// value must be preserved by this initialization). If so, we may only + /// store up to the dsize of the type. Otherwise we can widen stores to + /// the size of the type. + bool OverlapFlag : 1; + public: enum IsAliased_t { IsNotAliased, IsAliased }; enum IsDestructed_t { IsNotDestructed, IsDestructed }; enum IsZeroed_t { IsNotZeroed, IsZeroed }; + enum Overlap_t { DoesNotOverlap, MayOverlap }; enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers }; /// ignored - Returns an aggregate value slot indicating that the /// aggregate value is being ignored. static AggValueSlot ignored() { return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed, - DoesNotNeedGCBarriers, IsNotAliased); + DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap); } /// forAddr - Make a slot for an aggregate value. @@ -497,6 +508,7 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed) { AggValueSlot AV; if (addr.isValid()) { @@ -511,6 +523,7 @@ public: AV.ObjCGCFlag = needsGC; AV.ZeroedFlag = isZeroed; AV.AliasedFlag = isAliased; + AV.OverlapFlag = mayOverlap; return AV; } @@ -518,9 +531,10 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed) { - return forAddr(LV.getAddress(), - LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed); + return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC, + isAliased, mayOverlap, isZeroed); } IsDestructed_t isExternallyDestructed() const { @@ -568,6 +582,10 @@ public: return IsAliased_t(AliasedFlag); } + Overlap_t mayOverlap() const { + return Overlap_t(OverlapFlag); + } + RValue asRValue() const { if (isIgnored()) { return RValue::getIgnored(); @@ -580,6 +598,14 @@ public: IsZeroed_t isZeroed() const { return IsZeroed_t(ZeroedFlag); } + + /// Get the preferred size to use when storing a value to this slot. This + /// is the type size unless that might overlap another object, in which + /// case it's the dsize. + CharUnits getPreferredSize(ASTContext &Ctx, QualType Type) const { + return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).first + : Ctx.getTypeSizeInChars(Type); + } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 84248cc64719..2a0f4f0e83ec 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS Coverage IPO IRReader + AggressiveInstCombine InstCombine Instrumentation LTO @@ -31,6 +32,10 @@ if (CLANG_BUILT_STANDALONE) set(codegen_deps) endif() +if (MSVC) + set_source_files_properties(CodeGenModule.cpp PROPERTIES COMPILE_FLAGS /bigobj) +endif() + add_clang_library(clangCodeGen BackendUtil.cpp CGAtomic.cpp @@ -56,6 +61,7 @@ add_clang_library(clangCodeGen CGExprScalar.cpp CGGPUBuiltin.cpp CGLoopInfo.cpp + CGNonTrivialStruct.cpp CGObjC.cpp CGObjCGNU.cpp CGObjCMac.cpp @@ -93,7 +99,6 @@ add_clang_library(clangCodeGen LINK_LIBS clangAnalysis clangAST - clangAnalysis clangBasic clangFrontend clangLex diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index 6ca69d63cdce..7ca55070d4a0 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -126,7 +126,7 @@ namespace clang { Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { - llvm::TimePassesIsEnabled = TimePasses; + FrontendTimesIsEnabled = TimePasses; } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr<llvm::Module> takeModule() { @@ -144,12 +144,12 @@ namespace clang { Context = &Ctx; - if (llvm::TimePassesIsEnabled) + if (FrontendTimesIsEnabled) LLVMIRGeneration.startTimer(); Gen->Initialize(Ctx); - if (llvm::TimePassesIsEnabled) + if (FrontendTimesIsEnabled) LLVMIRGeneration.stopTimer(); } @@ -159,7 +159,7 @@ namespace clang { "LLVM IR generation of declaration"); // Recurse. - if (llvm::TimePassesIsEnabled) { + if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount += 1; if (LLVMIRGenerationRefCount == 1) LLVMIRGeneration.startTimer(); @@ -167,7 +167,7 @@ namespace clang { Gen->HandleTopLevelDecl(D); - if (llvm::TimePassesIsEnabled) { + if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount -= 1; if (LLVMIRGenerationRefCount == 0) LLVMIRGeneration.stopTimer(); @@ -180,12 +180,12 @@ namespace clang { PrettyStackTraceDecl CrashInfo(D, SourceLocation(), Context->getSourceManager(), "LLVM IR generation of inline function"); - if (llvm::TimePassesIsEnabled) + if (FrontendTimesIsEnabled) LLVMIRGeneration.startTimer(); Gen->HandleInlineFunctionDefinition(D); - if (llvm::TimePassesIsEnabled) + if (FrontendTimesIsEnabled) LLVMIRGeneration.stopTimer(); } @@ -227,7 +227,7 @@ namespace clang { void HandleTranslationUnit(ASTContext &C) override { { PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); - if (llvm::TimePassesIsEnabled) { + if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount += 1; if (LLVMIRGenerationRefCount == 1) LLVMIRGeneration.startTimer(); @@ -235,13 +235,13 @@ namespace clang { Gen->HandleTranslationUnit(C); - if (llvm::TimePassesIsEnabled) { + if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount -= 1; if (LLVMIRGenerationRefCount == 0) LLVMIRGeneration.stopTimer(); } - IRGenFinished = true; + IRGenFinished = true; } // Silently ignore if we weren't initialized for some reason. @@ -341,17 +341,17 @@ namespace clang { SourceLocation LocCookie); void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); - /// \brief Specialized handler for InlineAsm diagnostic. + /// Specialized handler for InlineAsm diagnostic. /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D); - /// \brief Specialized handler for StackSize diagnostic. + /// Specialized handler for StackSize diagnostic. /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); - /// \brief Specialized handler for unsupported backend feature diagnostic. + /// Specialized handler for unsupported backend feature diagnostic. void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); - /// \brief Specialized handlers for optimization remarks. + /// Specialized handlers for optimization remarks. /// Note that these handlers only accept remarks and they always handle /// them. void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D, @@ -697,7 +697,7 @@ void BackendConsumer::OptimizationFailureHandler( EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure); } -/// \brief This function is invoked when the backend needs +/// This function is invoked when the backend needs /// to report something to the user. void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { unsigned DiagID = diag::err_fe_inline_asm; @@ -846,7 +846,10 @@ GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) { std::unique_ptr<ASTConsumer> CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { BackendAction BA = static_cast<BackendAction>(Act); - std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, InFile, BA); + std::unique_ptr<raw_pwrite_stream> OS = CI.takeOutputStream(); + if (!OS) + OS = GetOutputStream(CI, InFile, BA); + if (BA != Backend_EmitNothing && !OS) return nullptr; @@ -947,12 +950,21 @@ std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) { return {}; }; - Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef); - if (!BMOrErr) - return DiagErrors(BMOrErr.takeError()); - + Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef); + if (!BMsOrErr) + return DiagErrors(BMsOrErr.takeError()); + BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr); + // We have nothing to do if the file contains no ThinLTO module. This is + // possible if ThinLTO compilation was not able to split module. Content of + // the file was already processed by indexing and will be passed to the + // linker using merged object file. + if (!Bm) { + auto M = llvm::make_unique<llvm::Module>("empty", *VMContext); + M->setTargetTriple(CI.getTargetOpts().Triple); + return M; + } Expected<std::unique_ptr<llvm::Module>> MOrErr = - BMOrErr->parseModule(*VMContext); + Bm->parseModule(*VMContext); if (!MOrErr) return DiagErrors(MOrErr.takeError()); return std::move(*MOrErr); diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 9dbd7cc3fcbf..3c582688e91e 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -65,25 +65,9 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()), Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(), CGBuilderInserterTy(this)), - CurFn(nullptr), ReturnValue(Address::invalid()), - CapturedStmtInfo(nullptr), SanOpts(CGM.getLangOpts().Sanitize), - IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false), - SawAsmBlock(false), IsOutlinedSEHHelper(false), BlockInfo(nullptr), - BlockPointer(nullptr), LambdaThisCaptureField(nullptr), - NormalCleanupDest(nullptr), NextCleanupDestIndex(1), - FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr), - EHSelectorSlot(nullptr), DebugInfo(CGM.getModuleDebugInfo()), - DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(nullptr), - PGO(cgm), SwitchInsn(nullptr), SwitchWeights(nullptr), - CaseRangeBlock(nullptr), UnreachableBlock(nullptr), NumReturnExprs(0), - NumSimpleReturnExprs(0), CXXABIThisDecl(nullptr), - CXXABIThisValue(nullptr), CXXThisValue(nullptr), - CXXStructorImplicitParamDecl(nullptr), - CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr), - CurLexicalScope(nullptr), TerminateLandingPad(nullptr), - TerminateHandler(nullptr), TrapBB(nullptr), - ShouldEmitLifetimeMarkers( - shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { + SanOpts(CGM.getLangOpts().Sanitize), DebugInfo(CGM.getModuleDebugInfo()), + PGO(cgm), ShouldEmitLifetimeMarkers(shouldEmitLifetimeMarkers( + CGM.getCodeGenOpts(), CGM.getLangOpts())) { if (!suppressNewContext) CGM.getCXXABI().getMangleContext().startNewFunction(); @@ -419,6 +403,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { EmitIfUsed(*this, TerminateHandler); EmitIfUsed(*this, UnreachableBlock); + for (const auto &FuncletAndParent : TerminateFunclets) + EmitIfUsed(*this, FuncletAndParent.second); + if (CGM.getCodeGenOpts().EmitDeclMetadata) EmitDeclMetadata(); @@ -436,11 +423,17 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // if compiled with no optimizations. We do it for coroutine as the lifetime // of CleanupDestSlot alloca make correct coroutine frame building very // difficult. - if (NormalCleanupDest && isCoroutine()) { + if (NormalCleanupDest.isValid() && isCoroutine()) { llvm::DominatorTree DT(*CurFn); - llvm::PromoteMemToReg(NormalCleanupDest, DT); - NormalCleanupDest = nullptr; + llvm::PromoteMemToReg( + cast<llvm::AllocaInst>(NormalCleanupDest.getPointer()), DT); + NormalCleanupDest = Address::invalid(); } + + // Add the required-vector-width attribute. + if (LargestVectorWidth != 0) + CurFn->addFnAttr("min-legal-vector-width", + llvm::utostr(LargestVectorWidth)); } /// ShouldInstrumentFunction - Return true if the current function should be @@ -462,9 +455,19 @@ bool CodeGenFunction::ShouldXRayInstrumentFunction() const { } /// AlwaysEmitXRayCustomEvents - Return true if we should emit IR for calls to -/// the __xray_customevent(...) builin calls, when doing XRay instrumentation. +/// the __xray_customevent(...) builtin calls, when doing XRay instrumentation. bool CodeGenFunction::AlwaysEmitXRayCustomEvents() const { - return CGM.getCodeGenOpts().XRayAlwaysEmitCustomEvents; + return CGM.getCodeGenOpts().XRayInstrumentFunctions && + (CGM.getCodeGenOpts().XRayAlwaysEmitCustomEvents || + CGM.getCodeGenOpts().XRayInstrumentationBundle.Mask == + XRayInstrKind::Custom); +} + +bool CodeGenFunction::AlwaysEmitXRayTypedEvents() const { + return CGM.getCodeGenOpts().XRayInstrumentFunctions && + (CGM.getCodeGenOpts().XRayAlwaysEmitTypedEvents || + CGM.getCodeGenOpts().XRayInstrumentationBundle.Mask == + XRayInstrKind::Typed); } llvm::Constant * @@ -842,14 +845,24 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (D) { // Apply the no_sanitize* attributes to SanOpts. - for (auto Attr : D->specific_attrs<NoSanitizeAttr>()) - SanOpts.Mask &= ~Attr->getMask(); + for (auto Attr : D->specific_attrs<NoSanitizeAttr>()) { + SanitizerMask mask = Attr->getMask(); + SanOpts.Mask &= ~mask; + if (mask & SanitizerKind::Address) + SanOpts.set(SanitizerKind::KernelAddress, false); + if (mask & SanitizerKind::KernelAddress) + SanOpts.set(SanitizerKind::Address, false); + if (mask & SanitizerKind::HWAddress) + SanOpts.set(SanitizerKind::KernelHWAddress, false); + if (mask & SanitizerKind::KernelHWAddress) + SanOpts.set(SanitizerKind::HWAddress, false); + } } // Apply sanitizer attributes to the function. if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress)) Fn->addFnAttr(llvm::Attribute::SanitizeAddress); - if (SanOpts.hasOneOf(SanitizerKind::HWAddress)) + if (SanOpts.hasOneOf(SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); @@ -857,6 +870,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr(llvm::Attribute::SanitizeMemory); if (SanOpts.has(SanitizerKind::SafeStack)) Fn->addFnAttr(llvm::Attribute::SafeStack); + if (SanOpts.has(SanitizerKind::ShadowCallStack)) + Fn->addFnAttr(llvm::Attribute::ShadowCallStack); + + // Apply fuzzing attribute to the function. + if (SanOpts.hasOneOf(SanitizerKind::Fuzzer | SanitizerKind::FuzzerNoLink)) + Fn->addFnAttr(llvm::Attribute::OptForFuzzing); // Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize, // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time. @@ -884,7 +903,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } // Apply xray attributes to the function (as a string, for now) - if (D && ShouldXRayInstrumentFunction()) { + bool InstrumentXray = ShouldXRayInstrumentFunction() && + CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::Function); + if (D && InstrumentXray) { if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { if (XRayAttr->alwaysXRayInstrument()) Fn->addFnAttr("function-instrument", "xray-always"); @@ -921,8 +943,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) { if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) { + // Remove any (C++17) exception specifications, to allow calling e.g. a + // noexcept function through a non-noexcept pointer. + auto ProtoTy = + getContext().getFunctionTypeWithExceptionSpec(FD->getType(), + EST_None); llvm::Constant *FTRTTIConst = - CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true); + CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true); llvm::Constant *FTRTTIConstEncoded = EncodeAddrForUseInPrologue(Fn, FTRTTIConst); llvm::Constant *PrologueStructElems[] = {PrologueSig, @@ -987,7 +1014,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, ArgTypes.push_back(VD->getType()); QualType FnType = getContext().getFunctionType( RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); - DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder); + DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, CurFuncIsThunk, + Builder); } if (ShouldInstrumentFunction()) { @@ -1006,10 +1034,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // The attribute "counting-function" is set to mcount function name which is // architecture dependent. if (CGM.getCodeGenOpts().InstrumentForProfiling) { - if (CGM.getCodeGenOpts().CallFEntry) - Fn->addFnAttr("fentry-call", "true"); - else { - if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) { + // Calls to fentry/mcount should not be generated if function has + // the no_instrument_function attribute. + if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) { + if (CGM.getCodeGenOpts().CallFEntry) + Fn->addFnAttr("fentry-call", "true"); + else { Fn->addFnAttr("instrument-function-entry-inlined", getTarget().getMCountName()); } @@ -1055,6 +1085,11 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, EmitStartEHSpec(CurCodeDecl); PrologueCleanupDepth = EHStack.stable_begin(); + + // Emit OpenMP specific initialization of the device functions. + if (getLangOpts().OpenMP && CurCodeDecl) + CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl); + EmitFunctionProlog(*CurFnInfo, CurFn, Args); if (D && isa<CXXMethodDecl>(D) && cast<CXXMethodDecl>(D)->isInstance()) { @@ -1108,8 +1143,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // may have a static invoker function, which may call this operator with // a null 'this' pointer. if (isLambdaCallOperator(MD) && - cast<CXXRecordDecl>(MD->getParent())->getLambdaCaptureDefault() == - LCD_None) + MD->getParent()->getLambdaCaptureDefault() == LCD_None) SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck(isa<CXXConstructorDecl>(MD) ? TCK_ConstructorCall @@ -1141,6 +1175,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // Emit a location at the end of the prologue. if (CGDebugInfo *DI = getDebugInfo()) DI->EmitLocation(Builder, StartLoc); + + // TODO: Do we need to handle this in two places like we do with + // target-features/target-cpu? + if (CurFuncDecl) + if (const auto *VecWidth = CurFuncDecl->getAttr<MinVectorWidthAttr>()) + LargestVectorWidth = VecWidth->getVectorWidth(); } void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, @@ -1748,12 +1788,9 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { if (const VariableArrayType *vlaType = dyn_cast_or_null<VariableArrayType>( getContext().getAsArrayType(Ty))) { - QualType eltType; - llvm::Value *numElts; - std::tie(numElts, eltType) = getVLASize(vlaType); - - SizeVal = numElts; - CharUnits eltSize = getContext().getTypeSizeInChars(eltType); + auto VlaSize = getVLASize(vlaType); + SizeVal = VlaSize.NumElts; + CharUnits eltSize = getContext().getTypeSizeInChars(VlaSize.Type); if (!eltSize.isOne()) SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(eltSize)); vla = vlaType; @@ -1836,7 +1873,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, // this is the size of the VLA in bytes, not its size in elements. llvm::Value *numVLAElements = nullptr; if (isa<VariableArrayType>(arrayType)) { - numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).first; + numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).NumElts; // Walk into all VLAs. This doesn't require changes to addr, // which has type T* where T is the first non-VLA element type. @@ -1917,14 +1954,13 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, return numElements; } -std::pair<llvm::Value*, QualType> -CodeGenFunction::getVLASize(QualType type) { +CodeGenFunction::VlaSizePair CodeGenFunction::getVLASize(QualType type) { const VariableArrayType *vla = getContext().getAsVariableArrayType(type); assert(vla && "type was not a variable array type!"); return getVLASize(vla); } -std::pair<llvm::Value*, QualType> +CodeGenFunction::VlaSizePair CodeGenFunction::getVLASize(const VariableArrayType *type) { // The number of elements so far; always size_t. llvm::Value *numElements = nullptr; @@ -1945,7 +1981,22 @@ CodeGenFunction::getVLASize(const VariableArrayType *type) { } } while ((type = getContext().getAsVariableArrayType(elementType))); - return std::pair<llvm::Value*,QualType>(numElements, elementType); + return { numElements, elementType }; +} + +CodeGenFunction::VlaSizePair +CodeGenFunction::getVLAElements1D(QualType type) { + const VariableArrayType *vla = getContext().getAsVariableArrayType(type); + assert(vla && "type was not a variable array type!"); + return getVLAElements1D(vla); +} + +CodeGenFunction::VlaSizePair +CodeGenFunction::getVLAElements1D(const VariableArrayType *Vla) { + llvm::Value *VlaSize = VLASizeMap[Vla->getSizeExpr()]; + assert(VlaSize && "no size for VLA!"); + assert(VlaSize->getType() == SizeTy); + return { VlaSize, Vla->getElementType() }; } void CodeGenFunction::EmitVariablyModifiedType(QualType type) { @@ -2228,7 +2279,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, return std::all_of( ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) { SmallVector<StringRef, 1> OrFeatures; - Feature.split(OrFeatures, "|"); + Feature.split(OrFeatures, '|'); return std::any_of(OrFeatures.begin(), OrFeatures.end(), [&](StringRef Feature) { if (!CallerFeatureMap.lookup(Feature)) { @@ -2266,17 +2317,28 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, // Return if the builtin doesn't have any required features. if (!FeatureList || StringRef(FeatureList) == "") return; - StringRef(FeatureList).split(ReqFeatures, ","); + StringRef(FeatureList).split(ReqFeatures, ','); if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) CGM.getDiags().Report(E->getLocStart(), diag::err_builtin_needs_feature) << TargetDecl->getDeclName() << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); - } else if (TargetDecl->hasAttr<TargetAttr>()) { + } else if (TargetDecl->hasAttr<TargetAttr>() || + TargetDecl->hasAttr<CPUSpecificAttr>()) { // Get the required features for the callee. + + const TargetAttr *TD = TargetDecl->getAttr<TargetAttr>(); + TargetAttr::ParsedTargetAttr ParsedAttr = CGM.filterFunctionTargetAttrs(TD); + SmallVector<StringRef, 1> ReqFeatures; llvm::StringMap<bool> CalleeFeatureMap; CGM.getFunctionFeatureMap(CalleeFeatureMap, TargetDecl); + + for (const auto &F : ParsedAttr.Features) { + if (F[0] == '+' && CalleeFeatureMap.lookup(F.substr(1))) + ReqFeatures.push_back(StringRef(F).substr(1)); + } + for (const auto &F : CalleeFeatureMap) { // Only positive features are "required". if (F.getValue()) @@ -2297,6 +2359,99 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } +llvm::Value *CodeGenFunction::FormResolverCondition( + const TargetMultiVersionResolverOption &RO) { + llvm::Value *TrueCondition = nullptr; + if (!RO.ParsedAttribute.Architecture.empty()) + TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture); + + if (!RO.ParsedAttribute.Features.empty()) { + SmallVector<StringRef, 8> FeatureList; + llvm::for_each(RO.ParsedAttribute.Features, + [&FeatureList](const std::string &Feature) { + FeatureList.push_back(StringRef{Feature}.substr(1)); + }); + llvm::Value *FeatureCmp = EmitX86CpuSupports(FeatureList); + TrueCondition = TrueCondition ? Builder.CreateAnd(TrueCondition, FeatureCmp) + : FeatureCmp; + } + return TrueCondition; +} + +void CodeGenFunction::EmitTargetMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<TargetMultiVersionResolverOption> Options) { + assert((getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86 || + getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86_64) && + "Only implemented for x86 targets"); + + // Main function's basic block. + llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver); + Builder.SetInsertPoint(CurBlock); + EmitX86CpuInit(); + + llvm::Function *DefaultFunc = nullptr; + for (const TargetMultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + llvm::Value *TrueCondition = FormResolverCondition(RO); + + if (!TrueCondition) { + DefaultFunc = RO.Function; + } else { + llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver); + llvm::IRBuilder<> RetBuilder(RetBlock); + RetBuilder.CreateRet(RO.Function); + CurBlock = createBasicBlock("ro_else", Resolver); + Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + } + } + + assert(DefaultFunc && "No default version?"); + // Emit return from the 'else-ist' block. + Builder.SetInsertPoint(CurBlock); + Builder.CreateRet(DefaultFunc); +} + +void CodeGenFunction::EmitCPUDispatchMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<CPUDispatchMultiVersionResolverOption> Options) { + assert((getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86 || + getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86_64) && + "Only implemented for x86 targets"); + + // Main function's basic block. + llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); + Builder.SetInsertPoint(CurBlock); + EmitX86CpuInit(); + + for (const CPUDispatchMultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + + // "generic" case should catch-all. + if (RO.FeatureMask == 0) { + Builder.CreateRet(RO.Function); + return; + } + llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); + llvm::IRBuilder<> RetBuilder(RetBlock); + RetBuilder.CreateRet(RO.Function); + CurBlock = createBasicBlock("resolver_else", Resolver); + llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask); + Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + } + + Builder.SetInsertPoint(CurBlock); + llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); +} + llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) { if (CGDebugInfo *DI = getDebugInfo()) return DI->SourceLocToDebugLoc(Location); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index dd4c2e43ef64..79870ed59c96 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -34,6 +34,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -137,6 +138,88 @@ enum SanitizerHandler { #undef SANITIZER_CHECK }; +/// Helper class with most of the code for saving a value for a +/// conditional expression cleanup. +struct DominatingLLVMValue { + typedef llvm::PointerIntPair<llvm::Value*, 1, bool> saved_type; + + /// Answer whether the given value needs extra work to be saved. + static bool needsSaving(llvm::Value *value) { + // If it's not an instruction, we don't need to save. + if (!isa<llvm::Instruction>(value)) return false; + + // If it's an instruction in the entry block, we don't need to save. + llvm::BasicBlock *block = cast<llvm::Instruction>(value)->getParent(); + return (block != &block->getParent()->getEntryBlock()); + } + + static saved_type save(CodeGenFunction &CGF, llvm::Value *value); + static llvm::Value *restore(CodeGenFunction &CGF, saved_type value); +}; + +/// A partial specialization of DominatingValue for llvm::Values that +/// might be llvm::Instructions. +template <class T> struct DominatingPointer<T,true> : DominatingLLVMValue { + typedef T *type; + static type restore(CodeGenFunction &CGF, saved_type value) { + return static_cast<T*>(DominatingLLVMValue::restore(CGF, value)); + } +}; + +/// A specialization of DominatingValue for Address. +template <> struct DominatingValue<Address> { + typedef Address type; + + struct saved_type { + DominatingLLVMValue::saved_type SavedValue; + CharUnits Alignment; + }; + + static bool needsSaving(type value) { + return DominatingLLVMValue::needsSaving(value.getPointer()); + } + static saved_type save(CodeGenFunction &CGF, type value) { + return { DominatingLLVMValue::save(CGF, value.getPointer()), + value.getAlignment() }; + } + static type restore(CodeGenFunction &CGF, saved_type value) { + return Address(DominatingLLVMValue::restore(CGF, value.SavedValue), + value.Alignment); + } +}; + +/// A specialization of DominatingValue for RValue. +template <> struct DominatingValue<RValue> { + typedef RValue type; + class saved_type { + enum Kind { ScalarLiteral, ScalarAddress, AggregateLiteral, + AggregateAddress, ComplexAddress }; + + llvm::Value *Value; + unsigned K : 3; + unsigned Align : 29; + saved_type(llvm::Value *v, Kind k, unsigned a = 0) + : Value(v), K(k), Align(a) {} + + public: + static bool needsSaving(RValue value); + static saved_type save(CodeGenFunction &CGF, RValue value); + RValue restore(CodeGenFunction &CGF); + + // implementations in CGCleanup.cpp + }; + + static bool needsSaving(type value) { + return saved_type::needsSaving(value); + } + static saved_type save(CodeGenFunction &CGF, type value) { + return saved_type::save(CGF, value); + } + static type restore(CodeGenFunction &CGF, saved_type value) { + return value.restore(CGF); + } +}; + /// CodeGenFunction - This class organizes the per-function state that is used /// while generating LLVM code. class CodeGenFunction : public CodeGenTypeCache { @@ -200,7 +283,7 @@ public: Address UB)> CodeGenDispatchBoundsTy; - /// \brief CGBuilder insert helper. This function is called after an + /// CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, @@ -213,7 +296,7 @@ public: const Decl *CurCodeDecl; const CGFunctionInfo *CurFnInfo; QualType FnRetTy; - llvm::Function *CurFn; + llvm::Function *CurFn = nullptr; // Holds coroutine data if the current function is a coroutine. We use a // wrapper to manage its lifetime, so that we don't have to define CGCoroData @@ -241,7 +324,7 @@ public: /// ReturnValue - The temporary alloca to hold the return /// value. This is invalid iff the function has no return value. - Address ReturnValue; + Address ReturnValue = Address::invalid(); /// Return true if a label was seen in the current scope. bool hasLabelBeenSeenInCurrentScope() const { @@ -254,7 +337,7 @@ public: /// we prefer to insert allocas. llvm::AssertingVH<llvm::Instruction> AllocaInsertPt; - /// \brief API for captured statement code generation. + /// API for captured statement code generation. class CGCapturedStmtInfo { public: explicit CGCapturedStmtInfo(CapturedRegionKind K = CR_Default) @@ -282,10 +365,10 @@ public: CapturedRegionKind getKind() const { return Kind; } virtual void setContextValue(llvm::Value *V) { ThisValue = V; } - // \brief Retrieve the value of the context parameter. + // Retrieve the value of the context parameter. virtual llvm::Value *getContextValue() const { return ThisValue; } - /// \brief Lookup the captured field decl for a variable. + /// Lookup the captured field decl for a variable. virtual const FieldDecl *lookup(const VarDecl *VD) const { return CaptureFields.lookup(VD->getCanonicalDecl()); } @@ -297,32 +380,32 @@ public: return true; } - /// \brief Emit the captured statement body. + /// Emit the captured statement body. virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) { CGF.incrementProfileCounter(S); CGF.EmitStmt(S); } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. virtual StringRef getHelperName() const { return "__captured_stmt"; } private: - /// \brief The kind of captured statement being generated. + /// The kind of captured statement being generated. CapturedRegionKind Kind; - /// \brief Keep the map between VarDecl and FieldDecl. + /// Keep the map between VarDecl and FieldDecl. llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields; - /// \brief The base address of the captured record, passed in as the first + /// The base address of the captured record, passed in as the first /// argument of the parallel region function. llvm::Value *ThisValue; - /// \brief Captured 'this' type. + /// Captured 'this' type. FieldDecl *CXXThisFieldDecl; }; - CGCapturedStmtInfo *CapturedStmtInfo; + CGCapturedStmtInfo *CapturedStmtInfo = nullptr; - /// \brief RAII for correct setting/restoring of CapturedStmtInfo. + /// RAII for correct setting/restoring of CapturedStmtInfo. class CGCapturedStmtRAII { private: CodeGenFunction &CGF; @@ -361,13 +444,13 @@ public: } }; - /// \brief Sanitizers enabled for this function. + /// Sanitizers enabled for this function. SanitizerSet SanOpts; - /// \brief True if CodeGen currently emits code implementing sanitizer checks. - bool IsSanitizerScope; + /// True if CodeGen currently emits code implementing sanitizer checks. + bool IsSanitizerScope = false; - /// \brief RAII object to set/unset CodeGenFunction::IsSanitizerScope. + /// RAII object to set/unset CodeGenFunction::IsSanitizerScope. class SanitizerScope { CodeGenFunction *CGF; public: @@ -377,28 +460,28 @@ public: /// In C++, whether we are code generating a thunk. This controls whether we /// should emit cleanups. - bool CurFuncIsThunk; + bool CurFuncIsThunk = false; /// In ARC, whether we should autorelease the return value. - bool AutoreleaseResult; + bool AutoreleaseResult = false; /// Whether we processed a Microsoft-style asm block during CodeGen. These can /// potentially set the return value. - bool SawAsmBlock; + bool SawAsmBlock = false; const FunctionDecl *CurSEHParent = nullptr; /// True if the current function is an outlined SEH helper. This can be a /// finally block or filter expression. - bool IsOutlinedSEHHelper; + bool IsOutlinedSEHHelper = false; - const CodeGen::CGBlockInfo *BlockInfo; - llvm::Value *BlockPointer; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; + llvm::Value *BlockPointer = nullptr; llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; - FieldDecl *LambdaThisCaptureField; + FieldDecl *LambdaThisCaptureField = nullptr; - /// \brief A mapping from NRVO variables to the flags used to indicate + /// A mapping from NRVO variables to the flags used to indicate /// when the NRVO has been applied to this variable. llvm::DenseMap<const VarDecl *, llvm::Value *> NRVOFlags; @@ -426,30 +509,33 @@ public: /// The size of the following cleanup object. unsigned Size; /// The kind of cleanup to push: a value from the CleanupKind enumeration. - CleanupKind Kind; + unsigned Kind : 31; + /// Whether this is a conditional cleanup. + unsigned IsConditional : 1; size_t getSize() const { return Size; } - CleanupKind getKind() const { return Kind; } + CleanupKind getKind() const { return (CleanupKind)Kind; } + bool isConditional() const { return IsConditional; } }; /// i32s containing the indexes of the cleanup destinations. - llvm::AllocaInst *NormalCleanupDest; + Address NormalCleanupDest = Address::invalid(); - unsigned NextCleanupDestIndex; + unsigned NextCleanupDestIndex = 1; /// FirstBlockInfo - The head of a singly-linked-list of block layouts. - CGBlockInfo *FirstBlockInfo; + CGBlockInfo *FirstBlockInfo = nullptr; /// EHResumeBlock - Unified block containing a call to llvm.eh.resume. - llvm::BasicBlock *EHResumeBlock; + llvm::BasicBlock *EHResumeBlock = nullptr; /// The exception slot. All landing pads write the current exception pointer /// into this alloca. - llvm::Value *ExceptionSlot; + llvm::Value *ExceptionSlot = nullptr; /// The selector slot. Under the MandatoryCleanup model, all landing pads /// write the current selector value into this alloca. - llvm::AllocaInst *EHSelectorSlot; + llvm::AllocaInst *EHSelectorSlot = nullptr; /// A stack of exception code slots. Entering an __except block pushes a slot /// on the stack and leaving pops one. The __exception_code() intrinsic loads @@ -524,28 +610,52 @@ public: initFullExprCleanup(); } - /// \brief Queue a cleanup to be pushed after finishing the current + /// Queue a cleanup to be pushed after finishing the current /// full-expression. template <class T, class... As> void pushCleanupAfterFullExpr(CleanupKind Kind, As... A) { - assert(!isInConditionalBranch() && "can't defer conditional cleanup"); + if (!isInConditionalBranch()) + return pushCleanupAfterFullExprImpl<T>(Kind, Address::invalid(), A...); + + Address ActiveFlag = createCleanupActiveFlag(); + assert(!DominatingValue<Address>::needsSaving(ActiveFlag) && + "cleanup active flag should never need saving"); - LifetimeExtendedCleanupHeader Header = { sizeof(T), Kind }; + typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple; + SavedTuple Saved{saveValueInCond(A)...}; + + typedef EHScopeStack::ConditionalCleanup<T, As...> CleanupType; + pushCleanupAfterFullExprImpl<CleanupType>(Kind, ActiveFlag, Saved); + } + + template <class T, class... As> + void pushCleanupAfterFullExprImpl(CleanupKind Kind, Address ActiveFlag, + As... A) { + LifetimeExtendedCleanupHeader Header = {sizeof(T), Kind, + ActiveFlag.isValid()}; size_t OldSize = LifetimeExtendedCleanupStack.size(); LifetimeExtendedCleanupStack.resize( - LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size); + LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size + + (Header.IsConditional ? sizeof(ActiveFlag) : 0)); static_assert(sizeof(Header) % alignof(T) == 0, "Cleanup will be allocated on misaligned address"); char *Buffer = &LifetimeExtendedCleanupStack[OldSize]; new (Buffer) LifetimeExtendedCleanupHeader(Header); new (Buffer + sizeof(Header)) T(A...); + if (Header.IsConditional) + new (Buffer + sizeof(Header) + sizeof(T)) Address(ActiveFlag); } - /// Set up the last cleaup that was pushed as a conditional + /// Set up the last cleanup that was pushed as a conditional /// full-expression cleanup. - void initFullExprCleanup(); + void initFullExprCleanup() { + initFullExprCleanupWithFlag(createCleanupActiveFlag()); + } + + void initFullExprCleanupWithFlag(Address ActiveFlag); + Address createCleanupActiveFlag(); /// PushDestructorCleanup - Push a cleanup to call the /// complete-object destructor of an object of the given type at the @@ -583,10 +693,10 @@ public: void ActivateCleanupBlock(EHScopeStack::stable_iterator Cleanup, llvm::Instruction *DominatingIP); - /// \brief Enters a new scope for capturing cleanups, all of which + /// Enters a new scope for capturing cleanups, all of which /// will be executed once the scope is exited. class RunCleanupsScope { - EHScopeStack::stable_iterator CleanupStackDepth; + EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth; size_t LifetimeExtendedCleanupStackSize; bool OldDidCallStackSave; protected: @@ -600,7 +710,7 @@ public: CodeGenFunction& CGF; public: - /// \brief Enter a new cleanup scope. + /// Enter a new cleanup scope. explicit RunCleanupsScope(CodeGenFunction &CGF) : PerformCleanup(true), CGF(CGF) { @@ -609,20 +719,22 @@ public: CGF.LifetimeExtendedCleanupStack.size(); OldDidCallStackSave = CGF.DidCallStackSave; CGF.DidCallStackSave = false; + OldCleanupScopeDepth = CGF.CurrentCleanupScopeDepth; + CGF.CurrentCleanupScopeDepth = CleanupStackDepth; } - /// \brief Exit this cleanup scope, emitting any accumulated cleanups. + /// Exit this cleanup scope, emitting any accumulated cleanups. ~RunCleanupsScope() { if (PerformCleanup) ForceCleanup(); } - /// \brief Determine whether this scope requires any cleanups. + /// Determine whether this scope requires any cleanups. bool requiresCleanups() const { return CGF.EHStack.stable_begin() != CleanupStackDepth; } - /// \brief Force the emission of cleanups now, instead of waiting + /// Force the emission of cleanups now, instead of waiting /// until this object is destroyed. /// \param ValuesToReload - A list of values that need to be available at /// the insertion point after cleanup emission. If cleanup emission created @@ -634,9 +746,14 @@ public: CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize, ValuesToReload); PerformCleanup = false; + CGF.CurrentCleanupScopeDepth = OldCleanupScopeDepth; } }; + // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. + EHScopeStack::stable_iterator CurrentCleanupScopeDepth = + EHScopeStack::stable_end(); + class LexicalScope : public RunCleanupsScope { SourceRange Range; SmallVector<const LabelDecl*, 4> Labels; @@ -646,7 +763,7 @@ public: void operator=(const LexicalScope &) = delete; public: - /// \brief Enter a new cleanup scope. + /// Enter a new cleanup scope. explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; @@ -659,7 +776,7 @@ public: Labels.push_back(label); } - /// \brief Exit this cleanup scope, emitting any accumulated + /// Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { if (CGDebugInfo *DI = CGF.getDebugInfo()) @@ -673,7 +790,7 @@ public: } } - /// \brief Force the emission of cleanups now, instead of waiting + /// Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { CGF.CurLexicalScope = ParentScope; @@ -692,57 +809,107 @@ public: typedef llvm::DenseMap<const Decl *, Address> DeclMapTy; - /// \brief The scope used to remap some variables as private in the OpenMP - /// loop body (or other captured region emitted without outlining), and to - /// restore old vars back on exit. - class OMPPrivateScope : public RunCleanupsScope { + /// The class used to assign some variables some temporarily addresses. + class OMPMapVars { DeclMapTy SavedLocals; - DeclMapTy SavedPrivates; - - private: - OMPPrivateScope(const OMPPrivateScope &) = delete; - void operator=(const OMPPrivateScope &) = delete; + DeclMapTy SavedTempAddresses; + OMPMapVars(const OMPMapVars &) = delete; + void operator=(const OMPMapVars &) = delete; public: - /// \brief Enter a new OpenMP private scope. - explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {} - - /// \brief Registers \a LocalVD variable as a private and apply \a - /// PrivateGen function for it to generate corresponding private variable. - /// \a PrivateGen returns an address of the generated private variable. - /// \return true if the variable is registered as private, false if it has - /// been privatized already. - bool - addPrivate(const VarDecl *LocalVD, - llvm::function_ref<Address()> PrivateGen) { - assert(PerformCleanup && "adding private to dead scope"); + explicit OMPMapVars() = default; + ~OMPMapVars() { + assert(SavedLocals.empty() && "Did not restored original addresses."); + }; + /// Sets the address of the variable \p LocalVD to be \p TempAddr in + /// function \p CGF. + /// \return true if at least one variable was set already, false otherwise. + bool setVarAddr(CodeGenFunction &CGF, const VarDecl *LocalVD, + Address TempAddr) { LocalVD = LocalVD->getCanonicalDecl(); // Only save it once. if (SavedLocals.count(LocalVD)) return false; // Copy the existing local entry to SavedLocals. auto it = CGF.LocalDeclMap.find(LocalVD); - if (it != CGF.LocalDeclMap.end()) { - SavedLocals.insert({LocalVD, it->second}); - } else { - SavedLocals.insert({LocalVD, Address::invalid()}); - } + if (it != CGF.LocalDeclMap.end()) + SavedLocals.try_emplace(LocalVD, it->second); + else + SavedLocals.try_emplace(LocalVD, Address::invalid()); // Generate the private entry. - Address Addr = PrivateGen(); QualType VarTy = LocalVD->getType(); if (VarTy->isReferenceType()) { Address Temp = CGF.CreateMemTemp(VarTy); - CGF.Builder.CreateStore(Addr.getPointer(), Temp); - Addr = Temp; + CGF.Builder.CreateStore(TempAddr.getPointer(), Temp); + TempAddr = Temp; } - SavedPrivates.insert({LocalVD, Addr}); + SavedTempAddresses.try_emplace(LocalVD, TempAddr); return true; } - /// \brief Privatizes local variables previously registered as private. + /// Applies new addresses to the list of the variables. + /// \return true if at least one variable is using new address, false + /// otherwise. + bool apply(CodeGenFunction &CGF) { + copyInto(SavedTempAddresses, CGF.LocalDeclMap); + SavedTempAddresses.clear(); + return !SavedLocals.empty(); + } + + /// Restores original addresses of the variables. + void restore(CodeGenFunction &CGF) { + if (!SavedLocals.empty()) { + copyInto(SavedLocals, CGF.LocalDeclMap); + SavedLocals.clear(); + } + } + + private: + /// Copy all the entries in the source map over the corresponding + /// entries in the destination, which must exist. + static void copyInto(const DeclMapTy &Src, DeclMapTy &Dest) { + for (auto &Pair : Src) { + if (!Pair.second.isValid()) { + Dest.erase(Pair.first); + continue; + } + + auto I = Dest.find(Pair.first); + if (I != Dest.end()) + I->second = Pair.second; + else + Dest.insert(Pair); + } + } + }; + + /// The scope used to remap some variables as private in the OpenMP loop body + /// (or other captured region emitted without outlining), and to restore old + /// vars back on exit. + class OMPPrivateScope : public RunCleanupsScope { + OMPMapVars MappedVars; + OMPPrivateScope(const OMPPrivateScope &) = delete; + void operator=(const OMPPrivateScope &) = delete; + + public: + /// Enter a new OpenMP private scope. + explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {} + + /// Registers \p LocalVD variable as a private and apply \p PrivateGen + /// function for it to generate corresponding private variable. \p + /// PrivateGen returns an address of the generated private variable. + /// \return true if the variable is registered as private, false if it has + /// been privatized already. + bool addPrivate(const VarDecl *LocalVD, + const llvm::function_ref<Address()> PrivateGen) { + assert(PerformCleanup && "adding private to dead scope"); + return MappedVars.setVarAddr(CGF, LocalVD, PrivateGen()); + } + + /// Privatizes local variables previously registered as private. /// Registration is separate from the actual privatization to allow /// initializers use values of the original variables, not the private one. /// This is important, for example, if the private variable is a class @@ -750,19 +917,14 @@ public: /// variables. But at initialization original variables must be used, not /// private copies. /// \return true if at least one variable was privatized, false otherwise. - bool Privatize() { - copyInto(SavedPrivates, CGF.LocalDeclMap); - SavedPrivates.clear(); - return !SavedLocals.empty(); - } + bool Privatize() { return MappedVars.apply(CGF); } void ForceCleanup() { RunCleanupsScope::ForceCleanup(); - copyInto(SavedLocals, CGF.LocalDeclMap); - SavedLocals.clear(); + MappedVars.restore(CGF); } - /// \brief Exit scope - all the mapped variables are restored. + /// Exit scope - all the mapped variables are restored. ~OMPPrivateScope() { if (PerformCleanup) ForceCleanup(); @@ -773,34 +935,15 @@ public: VD = VD->getCanonicalDecl(); return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0; } - - private: - /// Copy all the entries in the source map over the corresponding - /// entries in the destination, which must exist. - static void copyInto(const DeclMapTy &src, DeclMapTy &dest) { - for (auto &pair : src) { - if (!pair.second.isValid()) { - dest.erase(pair.first); - continue; - } - - auto it = dest.find(pair.first); - if (it != dest.end()) { - it->second = pair.second; - } else { - dest.insert(pair); - } - } - } }; - /// \brief Takes the old cleanup stack size and emits the cleanup blocks + /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, std::initializer_list<llvm::Value **> ValuesToReload = {}); - /// \brief Takes the old cleanup stack size and emits the cleanup blocks + /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added, then adds all lifetime-extended cleanups from /// the given position to the stack. void @@ -843,7 +986,8 @@ public: llvm::BasicBlock *getEHResumeBlock(bool isCleanup); llvm::BasicBlock *getEHDispatchBlock(EHScopeStack::stable_iterator scope); - llvm::BasicBlock *getMSVCDispatchBlock(EHScopeStack::stable_iterator scope); + llvm::BasicBlock * + getFuncletEHDispatchBlock(EHScopeStack::stable_iterator scope); /// An object to manage conditionally-evaluated expressions. class ConditionalEvaluation { @@ -1052,22 +1196,27 @@ public: private: CGDebugInfo *DebugInfo; - bool DisableDebugInfo; + bool DisableDebugInfo = false; /// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid /// calling llvm.stacksave for multiple VLAs in the same scope. - bool DidCallStackSave; + bool DidCallStackSave = false; /// IndirectBranch - The first time an indirect goto is seen we create a block /// with an indirect branch. Every time we see the address of a label taken, /// we add the label to the indirect goto. Every subsequent indirect goto is /// codegen'd as a jump to the IndirectBranch's basic block. - llvm::IndirectBrInst *IndirectBranch; + llvm::IndirectBrInst *IndirectBranch = nullptr; /// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C /// decls. DeclMapTy LocalDeclMap; + // Keep track of the cleanups for callee-destructed parameters pushed to the + // cleanup stack so that they can be deactivated later. + llvm::DenseMap<const ParmVarDecl *, EHScopeStack::stable_iterator> + CalleeDestructedParamCleanups; + /// SizeArguments - If a ParmVarDecl had the pass_object_size attribute, this /// will contain a mapping from said ParmVarDecl to its implicit "object_size" /// parameter. @@ -1119,7 +1268,7 @@ private: /// Emits exit block with special codegen procedure specific for the related /// OpenMP construct + emits code for normal construct cleanup. void emitExit(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, - const llvm::function_ref<void(CodeGenFunction &)> &CodeGen) { + const llvm::function_ref<void(CodeGenFunction &)> CodeGen) { if (Stack.back().Kind == Kind && getExitBlock().isValid()) { assert(CGF.getOMPCancelDestination(Kind).isValid()); assert(CGF.HaveInsertPoint()); @@ -1207,13 +1356,13 @@ private: /// SwitchInsn - This is nearest current switch instruction. It is null if /// current context is not in a switch. - llvm::SwitchInst *SwitchInsn; + llvm::SwitchInst *SwitchInsn = nullptr; /// The branch weights of SwitchInsn when doing instrumentation based PGO. - SmallVector<uint64_t, 16> *SwitchWeights; + SmallVector<uint64_t, 16> *SwitchWeights = nullptr; /// CaseRangeBlock - This block holds if condition check for last case /// statement range in current switch instruction. - llvm::BasicBlock *CaseRangeBlock; + llvm::BasicBlock *CaseRangeBlock = nullptr; /// OpaqueLValues - Keeps track of the current set of opaque value /// expressions. @@ -1230,13 +1379,13 @@ private: /// A block containing a single 'unreachable' instruction. Created /// lazily by getUnreachableBlock(). - llvm::BasicBlock *UnreachableBlock; + llvm::BasicBlock *UnreachableBlock = nullptr; /// Counts of the number return expressions in the function. - unsigned NumReturnExprs; + unsigned NumReturnExprs = 0; /// Count the number of simple (constant) return expressions in the function. - unsigned NumSimpleReturnExprs; + unsigned NumSimpleReturnExprs = 0; /// The last regular (non-return) debug location (breakpoint) in the function. SourceLocation LastStopPoint; @@ -1356,9 +1505,9 @@ public: private: /// CXXThisDecl - When generating code for a C++ member function, /// this will hold the implicit 'this' declaration. - ImplicitParamDecl *CXXABIThisDecl; - llvm::Value *CXXABIThisValue; - llvm::Value *CXXThisValue; + ImplicitParamDecl *CXXABIThisDecl = nullptr; + llvm::Value *CXXABIThisValue = nullptr; + llvm::Value *CXXThisValue = nullptr; CharUnits CXXABIThisAlignment; CharUnits CXXThisAlignment; @@ -1376,16 +1525,16 @@ private: /// CXXStructorImplicitParamDecl - When generating code for a constructor or /// destructor, this will hold the implicit argument (e.g. VTT). - ImplicitParamDecl *CXXStructorImplicitParamDecl; - llvm::Value *CXXStructorImplicitParamValue; + ImplicitParamDecl *CXXStructorImplicitParamDecl = nullptr; + llvm::Value *CXXStructorImplicitParamValue = nullptr; /// OutermostConditional - Points to the outermost active /// conditional control. This is used so that we know if a /// temporary should be destroyed conditionally. - ConditionalEvaluation *OutermostConditional; + ConditionalEvaluation *OutermostConditional = nullptr; /// The current lexical scope. - LexicalScope *CurLexicalScope; + LexicalScope *CurLexicalScope = nullptr; /// The current source location that should be used for exception /// handling code. @@ -1416,14 +1565,21 @@ private: CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()); } - llvm::BasicBlock *TerminateLandingPad; - llvm::BasicBlock *TerminateHandler; - llvm::BasicBlock *TrapBB; + llvm::BasicBlock *TerminateLandingPad = nullptr; + llvm::BasicBlock *TerminateHandler = nullptr; + llvm::BasicBlock *TrapBB = nullptr; + + /// Terminate funclets keyed by parent funclet pad. + llvm::MapVector<llvm::Value *, llvm::BasicBlock *> TerminateFunclets; + + /// Largest vector width used in ths function. Will be used to create a + /// function attribute. + unsigned LargestVectorWidth = 0; /// True if we need emit the life-time markers. const bool ShouldEmitLifetimeMarkers; - /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to + /// Add OpenCL kernel arg metadata and the kernel attribute metadata to /// the function metadata. void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); @@ -1532,6 +1688,7 @@ public: return false; case QualType::DK_cxx_destructor: case QualType::DK_objc_weak_lifetime: + case QualType::DK_nontrivial_c_struct: return getLangOpts().Exceptions; case QualType::DK_objc_strong_lifetime: return getLangOpts().Exceptions && @@ -1579,10 +1736,7 @@ public: /// \return an LLVM value which is a pointer to a struct which contains /// information about the block, including the block invoke function, the /// captured variables, etc. - /// \param InvokeF will contain the block invoke function if it is not - /// nullptr. - llvm::Value *EmitBlockLiteral(const BlockExpr *, - llvm::Function **InvokeF = nullptr); + llvm::Value *EmitBlockLiteral(const BlockExpr *); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, @@ -1604,7 +1758,25 @@ public: class AutoVarEmission; void emitByrefStructureInit(const AutoVarEmission &emission); - void enterByrefCleanup(const AutoVarEmission &emission); + + /// Enter a cleanup to destroy a __block variable. Note that this + /// cleanup should be a no-op if the variable hasn't left the stack + /// yet; if a cleanup is required for the variable itself, that needs + /// to be done externally. + /// + /// \param Kind Cleanup kind. + /// + /// \param Addr When \p LoadBlockVarAddr is false, the address of the __block + /// structure that will be passed to _Block_object_dispose. When + /// \p LoadBlockVarAddr is true, the address of the field of the block + /// structure that holds the address of the __block structure. + /// + /// \param Flags The flag that will be passed to _Block_object_dispose. + /// + /// \param LoadBlockVarAddr Indicates whether we need to emit a load from + /// \p Addr to get the address of the __block structure. + void enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags, + bool LoadBlockVarAddr); void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum, llvm::Value *ptr); @@ -1627,7 +1799,7 @@ public: void GenerateCode(GlobalDecl GD, llvm::Function *Fn, const CGFunctionInfo &FnInfo); - /// \brief Emit code for the start of a function. + /// Emit code for the start of a function. /// \param Loc The location to be associated with the function. /// \param StartLoc The location of the function body. void StartFunction(GlobalDecl GD, @@ -1653,7 +1825,7 @@ public: void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); void EmitAsanPrologueOrEpilogue(bool Prologue); - /// \brief Emit the unified return block, trying to avoid its emission when + /// Emit the unified return block, trying to avoid its emission when /// possible. /// \return The debug location of the user written return statement if the /// return block is is avoided. @@ -1664,10 +1836,10 @@ public: void FinishFunction(SourceLocation EndLoc=SourceLocation()); void StartThunk(llvm::Function *Fn, GlobalDecl GD, - const CGFunctionInfo &FnInfo); + const CGFunctionInfo &FnInfo, bool IsUnprototyped); - void EmitCallAndReturnForThunk(llvm::Constant *Callee, - const ThunkInfo *Thunk); + void EmitCallAndReturnForThunk(llvm::Constant *Callee, const ThunkInfo *Thunk, + bool IsUnprototyped); void FinishThunk(); @@ -1677,7 +1849,8 @@ public: /// Generate a thunk for the given method. void generateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, - GlobalDecl GD, const ThunkInfo &Thunk); + GlobalDecl GD, const ThunkInfo &Thunk, + bool IsUnprototyped); llvm::Function *GenerateVarArgsThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, @@ -1688,7 +1861,7 @@ public: void EmitInitializerForField(FieldDecl *Field, LValue LHS, Expr *Init); - /// Struct with all informations about dynamic [sub]class needed to set vptr. + /// Struct with all information about dynamic [sub]class needed to set vptr. struct VPtr { BaseSubobject Base; const CXXRecordDecl *NearestVBase; @@ -1723,9 +1896,11 @@ public: CFITCK_DerivedCast, CFITCK_UnrelatedCast, CFITCK_ICall, + CFITCK_NVMFCall, + CFITCK_VMFCall, }; - /// \brief Derived is the presumed address of an object of type T after a + /// Derived is the presumed address of an object of type T after a /// cast. If T is a polymorphic class type, emit a check that the virtual /// table for Derived belongs to a class derived from T. void EmitVTablePtrCheckForCast(QualType T, llvm::Value *Derived, @@ -1775,6 +1950,10 @@ public: /// XRay custom event handling calls. bool AlwaysEmitXRayCustomEvents() const; + /// AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit + /// XRay typed event handling calls. + bool AlwaysEmitXRayTypedEvents() const; + /// Encode an address into a form suitable for use in a function prologue. llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F, llvm::Constant *Addr); @@ -1808,6 +1987,10 @@ public: /// getTerminateLandingPad - Return a landing pad that just calls terminate. llvm::BasicBlock *getTerminateLandingPad(); + /// getTerminateLandingPad - Return a cleanup funclet that just calls + /// terminate. + llvm::BasicBlock *getTerminateFunclet(); + /// getTerminateHandler - Return a handler (not a landing pad, just /// a catch handler) that just calls terminate. This is used when /// a terminate scope encloses a try. @@ -1841,11 +2024,7 @@ public: llvm::BasicBlock *createBasicBlock(const Twine &name = "", llvm::Function *parent = nullptr, llvm::BasicBlock *before = nullptr) { -#ifdef NDEBUG - return llvm::BasicBlock::Create(getLLVMContext(), "", parent, before); -#else return llvm::BasicBlock::Create(getLLVMContext(), name, parent, before); -#endif } /// getBasicBlockForLabel - Return the LLVM basicblock that the specified @@ -1975,15 +2154,20 @@ public: /// to the stack. /// /// Because the address of a temporary is often exposed to the program in - /// various ways, this function will perform the cast by default. The cast - /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is + /// various ways, this function will perform the cast. The original alloca + /// instruction is returned through \p Alloca if it is not nullptr. + /// + /// The cast is not performaed in CreateTempAllocaWithoutCast. This is /// more efficient if the caller knows that the address will not be exposed. llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp", llvm::Value *ArraySize = nullptr); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, const Twine &Name = "tmp", llvm::Value *ArraySize = nullptr, - bool CastToDefaultAddrSpace = true); + Address *Alloca = nullptr); + Address CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align, + const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr); /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. @@ -2018,12 +2202,18 @@ public: Address CreateIRTemp(QualType T, const Twine &Name = "tmp"); /// CreateMemTemp - Create a temporary memory object of the given type, with - /// appropriate alignment. Cast it to the default address space if - /// \p CastToDefaultAddrSpace is true. + /// appropriate alignmen and cast it to the default address space. Returns + /// the original alloca instruction by \p Alloca if it is not nullptr. Address CreateMemTemp(QualType T, const Twine &Name = "tmp", - bool CastToDefaultAddrSpace = true); + Address *Alloca = nullptr); Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp", - bool CastToDefaultAddrSpace = true); + Address *Alloca = nullptr); + + /// CreateMemTemp - Create a temporary memory object of the given type, with + /// appropriate alignmen without casting it to the default address space. + Address CreateMemTempWithoutCast(QualType T, const Twine &Name = "tmp"); + Address CreateMemTempWithoutCast(QualType T, CharUnits Align, + const Twine &Name = "tmp"); /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. @@ -2032,7 +2222,8 @@ public: T.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); } /// Emit a cast to void* in the appropriate address space. @@ -2089,31 +2280,52 @@ public: } return false; } - /// EmitAggregateCopy - Emit an aggregate assignment. - /// - /// The difference to EmitAggregateCopy is that tail padding is not copied. - /// This is required for correctness when assigning non-POD structures in C++. - void EmitAggregateAssign(Address DestPtr, Address SrcPtr, - QualType EltTy) { + + /// Determine whether a return value slot may overlap some other object. + AggValueSlot::Overlap_t overlapForReturnValue() { + // FIXME: Assuming no overlap here breaks guaranteed copy elision for base + // class subobjects. These cases may need to be revisited depending on the + // resolution of the relevant core issue. + return AggValueSlot::DoesNotOverlap; + } + + /// Determine whether a field initialization may overlap some other object. + AggValueSlot::Overlap_t overlapForFieldInit(const FieldDecl *FD) { + // FIXME: These cases can result in overlap as a result of P0840R0's + // [[no_unique_address]] attribute. We can still infer NoOverlap in the + // presence of that attribute if the field is within the nvsize of its + // containing class, because non-virtual subobjects are initialized in + // address order. + return AggValueSlot::DoesNotOverlap; + } + + /// Determine whether a base class initialization may overlap some other + /// object. + AggValueSlot::Overlap_t overlapForBaseInit(const CXXRecordDecl *RD, + const CXXRecordDecl *BaseRD, + bool IsVirtual); + + /// Emit an aggregate assignment. + void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) { bool IsVolatile = hasVolatileMember(EltTy); - EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, true); + EmitAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile); } - void EmitAggregateCopyCtor(Address DestPtr, Address SrcPtr, - QualType DestTy, QualType SrcTy) { - EmitAggregateCopy(DestPtr, SrcPtr, SrcTy, /*IsVolatile=*/false, - /*IsAssignment=*/false); + void EmitAggregateCopyCtor(LValue Dest, LValue Src, + AggValueSlot::Overlap_t MayOverlap) { + EmitAggregateCopy(Dest, Src, Src.getType(), MayOverlap); } /// EmitAggregateCopy - Emit an aggregate copy. /// - /// \param isVolatile - True iff either the source or the destination is - /// volatile. - /// \param isAssignment - If false, allow padding to be copied. This often - /// yields more efficient. - void EmitAggregateCopy(Address DestPtr, Address SrcPtr, - QualType EltTy, bool isVolatile=false, - bool isAssignment = false); + /// \param isVolatile \c true iff either the source or the destination is + /// volatile. + /// \param MayOverlap Whether the tail padding of the destination might be + /// occupied by some other object. More efficient code can often be + /// generated if not. + void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, + AggValueSlot::Overlap_t MayOverlap, + bool isVolatile = false); /// GetAddrOfLocalVar - Return the address of a local variable. Address GetAddrOfLocalVar(const VarDecl *VD) { @@ -2123,27 +2335,13 @@ public: return it->second; } - /// getOpaqueLValueMapping - Given an opaque value expression (which - /// must be mapped to an l-value), return its mapping. - const LValue &getOpaqueLValueMapping(const OpaqueValueExpr *e) { - assert(OpaqueValueMapping::shouldBindAsLValue(e)); + /// Given an opaque value expression, return its LValue mapping if it exists, + /// otherwise create one. + LValue getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e); - llvm::DenseMap<const OpaqueValueExpr*,LValue>::iterator - it = OpaqueLValues.find(e); - assert(it != OpaqueLValues.end() && "no mapping for opaque value!"); - return it->second; - } - - /// getOpaqueRValueMapping - Given an opaque value expression (which - /// must be mapped to an r-value), return its mapping. - const RValue &getOpaqueRValueMapping(const OpaqueValueExpr *e) { - assert(!OpaqueValueMapping::shouldBindAsLValue(e)); - - llvm::DenseMap<const OpaqueValueExpr*,RValue>::iterator - it = OpaqueRValues.find(e); - assert(it != OpaqueRValues.end() && "no mapping for opaque value!"); - return it->second; - } + /// Given an opaque value expression, return its RValue mapping if it exists, + /// otherwise create one. + RValue getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e); /// Get the index of the current ArrayInitLoopExpr, if any. llvm::Value *getArrayInitIndex() { return ArrayInitIndex; } @@ -2193,12 +2391,24 @@ public: /// This function can be called with a null (unreachable) insert point. void EmitVariablyModifiedType(QualType Ty); - /// getVLASize - Returns an LLVM value that corresponds to the size, + struct VlaSizePair { + llvm::Value *NumElts; + QualType Type; + + VlaSizePair(llvm::Value *NE, QualType T) : NumElts(NE), Type(T) {} + }; + + /// Return the number of elements for a single dimension + /// for the given array type. + VlaSizePair getVLAElements1D(const VariableArrayType *vla); + VlaSizePair getVLAElements1D(QualType vla); + + /// Returns an LLVM value that corresponds to the size, /// in non-variably-sized elements, of a variable length array type, /// plus that largest non-variably-sized element type. Assumes that /// the type has already been emitted with EmitVariablyModifiedType. - std::pair<llvm::Value*,QualType> getVLASize(const VariableArrayType *vla); - std::pair<llvm::Value*,QualType> getVLASize(QualType vla); + VlaSizePair getVLASize(const VariableArrayType *vla); + VlaSizePair getVLASize(QualType vla); /// LoadCXXThis - Load the value of 'this'. This function is only valid while /// generating code for an C++ member function. @@ -2279,11 +2489,14 @@ public: void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, const CXXConstructExpr *E); + Address This, const CXXConstructExpr *E, + AggValueSlot::Overlap_t Overlap); void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, CallArgList &Args); + Address This, CallArgList &Args, + AggValueSlot::Overlap_t Overlap, + SourceLocation Loc); /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. @@ -2333,13 +2546,13 @@ public: CharUnits CookieSize = CharUnits()); RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, - const Expr *Arg, bool IsDelete); + const CallExpr *TheCallExpr, bool IsDelete); llvm::Value *EmitCXXTypeidExpr(const CXXTypeidExpr *E); llvm::Value *EmitDynamicCast(Address V, const CXXDynamicCastExpr *DCE); Address EmitCXXUuidofExpr(const CXXUuidofExpr *E); - /// \brief Situations in which we might emit a check for the suitability of a + /// Situations in which we might emit a check for the suitability of a /// pointer or glvalue. enum TypeCheckKind { /// Checking the operand of a load. Must be suitably sized and aligned. @@ -2383,17 +2596,17 @@ public: /// Determine whether the pointer type check \p TCK requires a vptr check. static bool isVptrCheckRequired(TypeCheckKind TCK, QualType Ty); - /// \brief Whether any type-checking sanitizers are enabled. If \c false, + /// Whether any type-checking sanitizers are enabled. If \c false, /// calls to EmitTypeCheck can be skipped. bool sanitizePerformTypeCheck() const; - /// \brief Emit a check that \p V is the address of storage of the + /// Emit a check that \p V is the address of storage of the /// appropriate size and alignment for an object of type \p Type. void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero(), SanitizerSet SkippedChecks = SanitizerSet()); - /// \brief Emit a check that \p Base points into an array object, which + /// Emit a check that \p Base points into an array object, which /// we can access at index \p Index. \p Accessed should be \c false if we /// this expression is used as an lvalue, for instance in "&Arr[Idx]". void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, @@ -2434,7 +2647,7 @@ public: typedef void SpecialInitFn(CodeGenFunction &Init, const VarDecl &D, llvm::Value *Address); - /// \brief Determine whether the given initializer is trivial in the sense + /// Determine whether the given initializer is trivial in the sense /// that it requires no code to be generated. bool isTrivialInitializer(const Expr *Init); @@ -2448,7 +2661,9 @@ public: const VarDecl *Variable; - /// The address of the alloca. Invalid if the variable was emitted + /// The address of the alloca for languages with explicit address space + /// (e.g. OpenCL) or alloca casted to generic pointer for address space + /// agnostic languages (e.g. C++). Invalid if the variable was emitted /// as a global constant. Address Addr; @@ -2464,13 +2679,19 @@ public: /// Non-null if we should use lifetime annotations. llvm::Value *SizeForLifetimeMarkers; + /// Address with original alloca instruction. Invalid if the variable was + /// emitted as a global constant. + Address AllocaAddr; + struct Invalid {}; - AutoVarEmission(Invalid) : Variable(nullptr), Addr(Address::invalid()) {} + AutoVarEmission(Invalid) + : Variable(nullptr), Addr(Address::invalid()), + AllocaAddr(Address::invalid()) {} AutoVarEmission(const VarDecl &variable) - : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), - IsByRef(false), IsConstantAggregate(false), - SizeForLifetimeMarkers(nullptr) {} + : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), + IsByRef(false), IsConstantAggregate(false), + SizeForLifetimeMarkers(nullptr), AllocaAddr(Address::invalid()) {} bool wasEmittedAsGlobal() const { return !Addr.isValid(); } @@ -2486,11 +2707,15 @@ public: } /// Returns the raw, allocated address, which is not necessarily - /// the address of the object itself. + /// the address of the object itself. It is casted to default + /// address space for address space agnostic languages. Address getAllocatedAddress() const { return Addr; } + /// Returns the address for the original alloca instruction. + Address getOriginalAllocatedAddress() const { return AllocaAddr; } + /// Returns the address of the object within this declaration. /// Note that this does not chase the forwarding pointer for /// __block decls. @@ -2506,6 +2731,15 @@ public: void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + /// Emits the alloca and debug information for the size expressions for each + /// dimension of an array. It registers the association of its (1-dimensional) + /// QualTypes and size expression's debug node, so that CGDebugInfo can + /// reference this node when creating the DISubrange object to describe the + /// array types. + void EmitAndRegisterVariableArrayDimensions(CGDebugInfo *DI, + const VarDecl &D, + bool EmitDebugInfo); + void EmitStaticVarDecl(const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage); @@ -2655,6 +2889,9 @@ public: llvm::Value *EmitSEHExceptionInfo(); llvm::Value *EmitSEHAbnormalTermination(); + /// Emit simple code for OpenMP directives in Simd-only mode. + void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D); + /// Scan the outlined statement for captures from the parent function. For /// each capture, mark the capture as escaped and emit a call to /// llvm.localrecover. Insert the localrecover result into the LocalDeclMap. @@ -2697,7 +2934,7 @@ public: SmallVectorImpl<llvm::Value *> &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, SourceLocation Loc); - /// \brief Perform element by element copying of arrays with type \a + /// Perform element by element copying of arrays with type \a /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure /// generated by \a CopyGen. /// @@ -2708,8 +2945,8 @@ public: /// to another single array element. void EmitOMPAggregateAssign( Address DestAddr, Address SrcAddr, QualType OriginalType, - const llvm::function_ref<void(Address, Address)> &CopyGen); - /// \brief Emit proper copying of data from one variable to another. + const llvm::function_ref<void(Address, Address)> CopyGen); + /// Emit proper copying of data from one variable to another. /// /// \param OriginalType Original type of the copied variables. /// \param DestAddr Destination address. @@ -2724,7 +2961,7 @@ public: Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy); - /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or + /// Emit atomic update code for constructs: \a X = \a X \a BO \a E or /// \a X = \a E \a BO \a E. /// /// \param X Value to be updated. @@ -2740,7 +2977,7 @@ public: std::pair<bool, RValue> EmitOMPAtomicSimpleUpdateExpr( LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, llvm::AtomicOrdering AO, SourceLocation Loc, - const llvm::function_ref<RValue(RValue)> &CommonGen); + const llvm::function_ref<RValue(RValue)> CommonGen); bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); void EmitOMPPrivateClause(const OMPExecutableDirective &D, @@ -2748,7 +2985,7 @@ public: void EmitOMPUseDevicePtrClause( const OMPClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap); - /// \brief Emit code for copyin clause in \a D directive. The next code is + /// Emit code for copyin clause in \a D directive. The next code is /// generated at the start of outlined functions for directives: /// \code /// threadprivate_var1 = master_threadprivate_var1; @@ -2760,7 +2997,7 @@ public: /// \param D OpenMP directive possibly with 'copyin' clause(s). /// \returns true if at least one copyin variable is found, false otherwise. bool EmitOMPCopyinClause(const OMPExecutableDirective &D); - /// \brief Emit initial code for lastprivate variables. If some variable is + /// Emit initial code for lastprivate variables. If some variable is /// not also firstprivate, then the default initialization is used. Otherwise /// initialization of this variable is performed by EmitOMPFirstprivateClause /// method. @@ -2773,7 +3010,7 @@ public: /// otherwise. bool EmitOMPLastprivateClauseInit(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); - /// \brief Emit final copying of lastprivate values to original variables at + /// Emit final copying of lastprivate values to original variables at /// the end of the worksharing or simd directive. /// /// \param D Directive that has at least one 'lastprivate' directives. @@ -2791,8 +3028,8 @@ public: /// linear clause. void EmitOMPLinearClauseFinal( const OMPLoopDirective &D, - const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); - /// \brief Emit initial code for reduction variables. Creates reduction copies + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen); + /// Emit initial code for reduction variables. Creates reduction copies /// and initializes them with the values according to OpenMP standard. /// /// \param D Directive (possibly) with the 'reduction' clause. @@ -2801,14 +3038,14 @@ public: /// void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); - /// \brief Emit final update of reduction values to original variables at + /// Emit final update of reduction values to original variables at /// the end of the directive. /// /// \param D Directive that has at least one 'reduction' directives. /// \param ReductionKind The kind of reduction to perform. void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind); - /// \brief Emit initial code for linear variables. Creates private copies + /// Emit initial code for linear variables. Creates private copies /// and initializes them with the values according to OpenMP standard. /// /// \param D Directive (possibly) with the 'linear' clause. @@ -2821,6 +3058,7 @@ public: const OMPTaskDataTy & /*Data*/)> TaskGenTy; void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data); struct OMPTargetDataInfo { @@ -2930,7 +3168,16 @@ public: static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S); - /// \brief Emit inner loop of the worksharing/simd construct. + /// Emit device code for the target teams distribute parallel for simd + /// directive. + static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeParallelForSimdDirective &S); + + static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeParallelForDirective &S); + /// Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. /// \param RequiresCleanup true, if directive has some associated private @@ -2943,8 +3190,8 @@ public: void EmitOMPInnerLoop( const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, - const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, - const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen); + const llvm::function_ref<void(CodeGenFunction &)> BodyGen, + const llvm::function_ref<void(CodeGenFunction &)> PostIncGen); JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind); /// Emit initial code for loop counters of loop-based directives. @@ -2954,7 +3201,7 @@ public: /// Helper for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); - /// \brief Emit code for the worksharing loop-based directive. + /// Emit code for the worksharing loop-based directive. /// \return true, if this construct has any lastprivate clause, false - /// otherwise. bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, @@ -2969,17 +3216,14 @@ public: void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal( const OMPLoopDirective &D, - const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen); /// Emits the lvalue for the expression with possibly captured variable. LValue EmitOMPSharedLValue(const Expr *E); private: - /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not - /// nullptr. It should be called without \p InvokeF if the caller does not - /// need invoke function to be returned. - llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info, - llvm::Function **InvokeF = nullptr); + /// Helpers for blocks. + llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// struct with the values to be passed to the OpenMP loop-related functions struct OMPLoopArguments { @@ -3030,7 +3274,7 @@ private: OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, const CodeGenLoopTy &CodeGenLoopContent); - /// \brief Emit code for sections directive. + /// Emit code for sections directive. void EmitSections(const OMPExecutableDirective &S); public: @@ -3071,7 +3315,7 @@ public: /// LValue EmitLValue(const Expr *E); - /// \brief Same as EmitLValue but additionally we generate checking code to + /// Same as EmitLValue but additionally we generate checking code to /// guard against undefined behavior. This is only suitable when we know /// that the address will be used to access the object. LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK); @@ -3332,6 +3576,9 @@ public: ArrayRef<llvm::Value*> args, const Twine &name = ""); + SmallVector<llvm::OperandBundleDef, 1> + getBundlesForFunclet(llvm::Value *Callee); + llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee, ArrayRef<llvm::Value *> Args, const Twine &Name = ""); @@ -3351,6 +3598,16 @@ public: CXXDtorType Type, const CXXRecordDecl *RD); + // These functions emit calls to the special functions of non-trivial C + // structs. + void defaultInitNonTrivialCStructVar(LValue Dst); + void callCStructDefaultConstructor(LValue Dst); + void callCStructDestructor(LValue Dst); + void callCStructCopyConstructor(LValue Dst, LValue Src); + void callCStructMoveConstructor(LValue Dst, LValue Src); + void callCStructCopyAssignmentOperator(LValue Dst, LValue Src); + void callCStructMoveAssignmentOperator(LValue Dst, LValue Src); + RValue EmitCXXMemberOrOperatorCall(const CXXMethodDecl *Method, const CGCallee &Callee, @@ -3424,6 +3681,10 @@ public: SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch); + + llvm::Value *EmitISOVolatileLoad(const CallExpr *E); + llvm::Value *EmitISOVolatileStore(const CallExpr *E); + llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E); @@ -3482,6 +3743,8 @@ public: llvm::Value *EmitARCLoadWeak(Address addr); llvm::Value *EmitARCLoadWeakRetained(Address addr); llvm::Value *EmitARCStoreWeak(Address addr, llvm::Value *value, bool ignored); + void emitARCCopyAssignWeak(QualType Ty, Address DstAddr, Address SrcAddr); + void emitARCMoveAssignWeak(QualType Ty, Address DstAddr, Address SrcAddr); void EmitARCCopyWeak(Address dst, Address src); void EmitARCMoveWeak(Address dst, Address src); llvm::Value *EmitARCRetainAutorelease(QualType type, llvm::Value *value); @@ -3525,6 +3788,7 @@ public: static Destroyer destroyARCStrongPrecise; static Destroyer destroyARCWeak; static Destroyer emitARCIntrinsicUse; + static Destroyer destroyNonTrivialCStruct; void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); llvm::Value *EmitObjCAutoreleasePoolPush(); @@ -3532,7 +3796,7 @@ public: void EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr); void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr); - /// \brief Emits a reference binding to the passed in expression. + /// Emits a reference binding to the passed in expression. RValue EmitReferenceBindingToExpr(const Expr *E); //===--------------------------------------------------------------------===// @@ -3610,6 +3874,9 @@ public: void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::Constant *fn, llvm::Constant *addr); + /// Call atexit() with function dtorStub. + void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub); + /// Emit code in this function to perform a guarded variable /// initialization. Guarded initializations are used when it's not /// possible to prove that an initialization will be done exactly @@ -3746,26 +4013,26 @@ public: /// enabled, a runtime check specified by \p Kind is also emitted. llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind); - /// \brief Emit a description of a type in a format suitable for passing to + /// Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); - /// \brief Convert a value into a format suitable for passing to a runtime + /// Convert a value into a format suitable for passing to a runtime /// sanitizer handler. llvm::Value *EmitCheckValue(llvm::Value *V); - /// \brief Emit a description of a source location in a format suitable for + /// Emit a description of a source location in a format suitable for /// passing to a runtime sanitizer handler. llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc); - /// \brief Create a basic block that will call a handler function in a + /// Create a basic block that will call a handler function in a /// sanitizer runtime with the provided arguments, and create a conditional /// branch to it. void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked, SanitizerHandler Check, ArrayRef<llvm::Constant *> StaticArgs, ArrayRef<llvm::Value *> DynamicArgs); - /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath + /// Emit a slow path cross-DSO CFI check which calls __cfi_slowpath /// if Cond if false. void EmitCfiSlowPathCheck(SanitizerMask Kind, llvm::Value *Cond, llvm::ConstantInt *TypeId, llvm::Value *Ptr, @@ -3775,21 +4042,21 @@ public: /// checking is enabled. Otherwise, just emit an unreachable instruction. void EmitUnreachable(SourceLocation Loc); - /// \brief Create a basic block that will call the trap intrinsic, and emit a + /// Create a basic block that will call the trap intrinsic, and emit a /// conditional branch to it, for the -ftrapv checks. void EmitTrapCheck(llvm::Value *Checked); - /// \brief Emit a call to trap or debugtrap and attach function attribute + /// Emit a call to trap or debugtrap and attach function attribute /// "trap-func-name" if specified. llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID); - /// \brief Emit a stub for the cross-DSO CFI check function. + /// Emit a stub for the cross-DSO CFI check function. void EmitCfiCheckStub(); - /// \brief Emit a cross-DSO CFI failure handling function. + /// Emit a cross-DSO CFI failure handling function. void EmitCfiCheckFail(); - /// \brief Create a check for a function parameter that may potentially be + /// Create a check for a function parameter that may potentially be /// declared as non-null. void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum); @@ -3829,10 +4096,10 @@ private: void ExpandTypeFromArgs(QualType Ty, LValue Dst, SmallVectorImpl<llvm::Value *>::iterator &AI); - /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg + /// ExpandTypeToArgs - Expand an CallArg \arg Arg, with the LLVM type for \arg /// Ty, into individual arguments on the provided vector \arg IRCallArgs, /// starting at index \arg IRCallArgPos. See ABIArgInfo::Expand. - void ExpandTypeToArgs(QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy, + void ExpandTypeToArgs(QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy, SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos); @@ -3844,7 +4111,7 @@ private: std::string &ConstraintStr, SourceLocation Loc); - /// \brief Attempts to statically evaluate the object size of E. If that + /// Attempts to statically evaluate the object size of E. If that /// fails, emits code to figure the size of E out for us. This is /// pass_object_size aware. /// @@ -3853,7 +4120,7 @@ private: llvm::IntegerType *ResType, llvm::Value *EmittedE); - /// \brief Emits the size of E, as required by __builtin_object_size. This + /// Emits the size of E, as required by __builtin_object_size. This /// function is aware of pass_object_size parameters, and will act accordingly /// if E is a parameter with the pass_object_size attribute. llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type, @@ -3973,6 +4240,48 @@ public: void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); + struct TargetMultiVersionResolverOption { + llvm::Function *Function; + TargetAttr::ParsedTargetAttr ParsedAttribute; + unsigned Priority; + TargetMultiVersionResolverOption( + const TargetInfo &TargInfo, llvm::Function *F, + const clang::TargetAttr::ParsedTargetAttr &PT) + : Function(F), ParsedAttribute(PT), Priority(0u) { + for (StringRef Feat : PT.Features) + Priority = std::max(Priority, + TargInfo.multiVersionSortPriority(Feat.substr(1))); + + if (!PT.Architecture.empty()) + Priority = std::max(Priority, + TargInfo.multiVersionSortPriority(PT.Architecture)); + } + + bool operator>(const TargetMultiVersionResolverOption &Other) const { + return Priority > Other.Priority; + } + }; + void EmitTargetMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<TargetMultiVersionResolverOption> Options); + + struct CPUDispatchMultiVersionResolverOption { + llvm::Function *Function; + // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask + // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports, + // this can be extended to 64 bits. + uint32_t FeatureMask; + CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask) + : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {} + bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const { + return FeatureMask > Other.FeatureMask; + } + }; + void EmitCPUDispatchMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<CPUDispatchMultiVersionResolverOption> Options); + static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); + private: QualType getVarArgType(const Expr *Arg); @@ -3988,110 +4297,35 @@ private: llvm::Value *EmitX86CpuIs(StringRef CPUStr); llvm::Value *EmitX86CpuSupports(const CallExpr *E); llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); + llvm::Value *EmitX86CpuSupports(uint32_t Mask); llvm::Value *EmitX86CpuInit(); + llvm::Value * + FormResolverCondition(const TargetMultiVersionResolverOption &RO); }; -/// Helper class with most of the code for saving a value for a -/// conditional expression cleanup. -struct DominatingLLVMValue { - typedef llvm::PointerIntPair<llvm::Value*, 1, bool> saved_type; - - /// Answer whether the given value needs extra work to be saved. - static bool needsSaving(llvm::Value *value) { - // If it's not an instruction, we don't need to save. - if (!isa<llvm::Instruction>(value)) return false; - - // If it's an instruction in the entry block, we don't need to save. - llvm::BasicBlock *block = cast<llvm::Instruction>(value)->getParent(); - return (block != &block->getParent()->getEntryBlock()); - } - - /// Try to save the given value. - static saved_type save(CodeGenFunction &CGF, llvm::Value *value) { - if (!needsSaving(value)) return saved_type(value, false); - - // Otherwise, we need an alloca. - auto align = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType())); - Address alloca = - CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save"); - CGF.Builder.CreateStore(value, alloca); - - return saved_type(alloca.getPointer(), true); - } - - static llvm::Value *restore(CodeGenFunction &CGF, saved_type value) { - // If the value says it wasn't saved, trust that it's still dominating. - if (!value.getInt()) return value.getPointer(); - - // Otherwise, it should be an alloca instruction, as set up in save(). - auto alloca = cast<llvm::AllocaInst>(value.getPointer()); - return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment()); - } -}; - -/// A partial specialization of DominatingValue for llvm::Values that -/// might be llvm::Instructions. -template <class T> struct DominatingPointer<T,true> : DominatingLLVMValue { - typedef T *type; - static type restore(CodeGenFunction &CGF, saved_type value) { - return static_cast<T*>(DominatingLLVMValue::restore(CGF, value)); - } -}; - -/// A specialization of DominatingValue for Address. -template <> struct DominatingValue<Address> { - typedef Address type; - - struct saved_type { - DominatingLLVMValue::saved_type SavedValue; - CharUnits Alignment; - }; - - static bool needsSaving(type value) { - return DominatingLLVMValue::needsSaving(value.getPointer()); - } - static saved_type save(CodeGenFunction &CGF, type value) { - return { DominatingLLVMValue::save(CGF, value.getPointer()), - value.getAlignment() }; - } - static type restore(CodeGenFunction &CGF, saved_type value) { - return Address(DominatingLLVMValue::restore(CGF, value.SavedValue), - value.Alignment); - } -}; - -/// A specialization of DominatingValue for RValue. -template <> struct DominatingValue<RValue> { - typedef RValue type; - class saved_type { - enum Kind { ScalarLiteral, ScalarAddress, AggregateLiteral, - AggregateAddress, ComplexAddress }; +inline DominatingLLVMValue::saved_type +DominatingLLVMValue::save(CodeGenFunction &CGF, llvm::Value *value) { + if (!needsSaving(value)) return saved_type(value, false); - llvm::Value *Value; - unsigned K : 3; - unsigned Align : 29; - saved_type(llvm::Value *v, Kind k, unsigned a = 0) - : Value(v), K(k), Align(a) {} + // Otherwise, we need an alloca. + auto align = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType())); + Address alloca = + CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save"); + CGF.Builder.CreateStore(value, alloca); - public: - static bool needsSaving(RValue value); - static saved_type save(CodeGenFunction &CGF, RValue value); - RValue restore(CodeGenFunction &CGF); + return saved_type(alloca.getPointer(), true); +} - // implementations in CGCleanup.cpp - }; +inline llvm::Value *DominatingLLVMValue::restore(CodeGenFunction &CGF, + saved_type value) { + // If the value says it wasn't saved, trust that it's still dominating. + if (!value.getInt()) return value.getPointer(); - static bool needsSaving(type value) { - return saved_type::needsSaving(value); - } - static saved_type save(CodeGenFunction &CGF, type value) { - return saved_type::save(CGF, value); - } - static type restore(CodeGenFunction &CGF, saved_type value) { - return value.restore(CGF); - } -}; + // Otherwise, it should be an alloca instruction, as set up in save(). + auto alloca = cast<llvm::AllocaInst>(value.getPointer()); + return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment()); +} } // end namespace CodeGen } // end namespace clang diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 5bdf81aaf66e..ecdf78d4b347 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -123,7 +123,6 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); - BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC(); if (LangOpts.ObjC1) createObjCRuntime(); @@ -208,7 +207,10 @@ void CodeGenModule::createOpenMPRuntime() { OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); break; default: - OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); + if (LangOpts.OpenMPSimd) + OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); + else + OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } } @@ -392,26 +394,29 @@ void CodeGenModule::Release() { applyGlobalValReplacements(); applyReplacements(); checkAliases(); + emitMultiVersionFunctions(); EmitCXXGlobalInitFunc(); EmitCXXGlobalDtorFunc(); + registerGlobalDtorsWithAtExit(); EmitCXXThreadLocalInitFunc(); if (ObjCRuntime) if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction()) AddGlobalCtor(ObjCInitFunction); if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice && CUDARuntime) { - if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction()) + if (llvm::Function *CudaCtorFunction = + CUDARuntime->makeModuleCtorFunction()) AddGlobalCtor(CudaCtorFunction); - if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction()) - AddGlobalDtor(CudaDtorFunction); } - if (OpenMPRuntime) + if (OpenMPRuntime) { if (llvm::Function *OpenMPRegistrationFunction = OpenMPRuntime->emitRegistrationFunction()) { auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? OpenMPRegistrationFunction : nullptr; AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); } + OpenMPRuntime->clear(); + } if (PGOReader) { getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) @@ -453,6 +458,10 @@ void CodeGenModule::Release() { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); } + if (CodeGenOpts.ControlFlowGuard) { + // We want function ID tables for Control Flow Guard. + getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1); + } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers // FIXME: we could support it by stripping all the information introduced @@ -498,12 +507,26 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } + if (CodeGenOpts.CFProtectionReturn && + Target.checkCFProtectionReturnSupported(getDiags())) { + // Indicate that we want to instrument return control flow protection. + getModule().addModuleFlag(llvm::Module::Override, "cf-protection-return", + 1); + } + + if (CodeGenOpts.CFProtectionBranch && + Target.checkCFProtectionBranchSupported(getDiags())) { + // Indicate that we want to instrument branch control flow protection. + getModule().addModuleFlag(llvm::Module::Override, "cf-protection-branch", + 1); + } + if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", - LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0); + CodeGenOpts.FlushDenorm ? 1 : 0); } // Emit OpenCL specific module metadata: OpenCL/SPIR version. @@ -533,6 +556,9 @@ void CodeGenModule::Release() { getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel)); } + if (CodeGenOpts.NoPLT) + getModule().setRtLibUseGOT(); + SimplifyPersonality(); if (getCodeGenOpts().EmitDeclMetadata) @@ -544,7 +570,8 @@ void CodeGenModule::Release() { if (DebugInfo) DebugInfo->finalize(); - EmitVersionIdentMetadata(); + if (getCodeGenOpts().EmitVersionIdentMetadata) + EmitVersionIdentMetadata(); EmitTargetMetadata(); } @@ -580,13 +607,9 @@ llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { } TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { - // Pointee values may have incomplete types, but they shall never be - // dereferenced. - if (AccessType->isIncompleteType()) - return TBAAAccessInfo::getIncompleteInfo(); - - uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity(); - return TBAAAccessInfo(getTBAATypeInfo(AccessType), Size); + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->getAccessInfo(AccessType); } TBAAAccessInfo @@ -629,6 +652,14 @@ CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB); } +TBAAAccessInfo +CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, + TBAAAccessInfo SrcInfo) { + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo); +} + void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo) { if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) @@ -670,21 +701,129 @@ llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { } void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, - const NamedDecl *D, - ForDefinition_t IsForDefinition) const { + const NamedDecl *D) const { + if (GV->hasDLLImportStorageClass()) + return; // Internal definitions always have default visibility. if (GV->hasLocalLinkage()) { GV->setVisibility(llvm::GlobalValue::DefaultVisibility); return; } - + if (!D) + return; // Set visibility for definitions. LinkageInfo LV = D->getLinkageAndVisibility(); - if (LV.isVisibilityExplicit() || - (IsForDefinition && !GV->hasAvailableExternallyLinkage())) + if (LV.isVisibilityExplicit() || !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } +static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, + llvm::GlobalValue *GV) { + if (GV->hasLocalLinkage()) + return true; + + if (!GV->hasDefaultVisibility() && !GV->hasExternalWeakLinkage()) + return true; + + // DLLImport explicitly marks the GV as external. + if (GV->hasDLLImportStorageClass()) + return false; + + const llvm::Triple &TT = CGM.getTriple(); + // Every other GV is local on COFF. + // Make an exception for windows OS in the triple: Some firmware builds use + // *-win32-macho triples. This (accidentally?) produced windows relocations + // without GOT tables in older clang versions; Keep this behaviour. + // FIXME: even thread local variables? + if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) + return true; + + // Only handle COFF and ELF for now. + if (!TT.isOSBinFormatELF()) + return false; + + // If this is not an executable, don't assume anything is local. + const auto &CGOpts = CGM.getCodeGenOpts(); + llvm::Reloc::Model RM = CGOpts.RelocationModel; + const auto &LOpts = CGM.getLangOpts(); + if (RM != llvm::Reloc::Static && !LOpts.PIE) + return false; + + // A definition cannot be preempted from an executable. + if (!GV->isDeclarationForLinker()) + return true; + + // Most PIC code sequences that assume that a symbol is local cannot produce a + // 0 if it turns out the symbol is undefined. While this is ABI and relocation + // depended, it seems worth it to handle it here. + if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage()) + return false; + + // PPC has no copy relocations and cannot use a plt entry as a symbol address. + llvm::Triple::ArchType Arch = TT.getArch(); + if (Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || + Arch == llvm::Triple::ppc64le) + return false; + + // If we can use copy relocations we can assume it is local. + if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV)) + if (!Var->isThreadLocal() && + (RM == llvm::Reloc::Static || CGOpts.PIECopyRelocations)) + return true; + + // If we can use a plt entry as the symbol address we can assume it + // is local. + // FIXME: This should work for PIE, but the gold linker doesn't support it. + if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) + return true; + + // Otherwise don't assue it is local. + return false; +} + +void CodeGenModule::setDSOLocal(llvm::GlobalValue *GV) const { + GV->setDSOLocal(shouldAssumeDSOLocal(*this, GV)); +} + +void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, + GlobalDecl GD) const { + const auto *D = dyn_cast<NamedDecl>(GD.getDecl()); + // C++ destructors have a few C++ ABI specific special cases. + if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(D)) { + getCXXABI().setCXXDestructorDLLStorage(GV, Dtor, GD.getDtorType()); + return; + } + setDLLImportDLLExport(GV, D); +} + +void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, + const NamedDecl *D) const { + if (D && D->isExternallyVisible()) { + if (D->hasAttr<DLLImportAttr>()) + GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); + else if (D->hasAttr<DLLExportAttr>() && !GV->isDeclarationForLinker()) + GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); + } +} + +void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, + GlobalDecl GD) const { + setDLLImportDLLExport(GV, GD); + setGlobalVisibilityAndLocal(GV, dyn_cast<NamedDecl>(GD.getDecl())); +} + +void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, + const NamedDecl *D) const { + setDLLImportDLLExport(GV, D); + setGlobalVisibilityAndLocal(GV, D); +} + +void CodeGenModule::setGlobalVisibilityAndLocal(llvm::GlobalValue *GV, + const NamedDecl *D) const { + setGlobalVisibility(GV, D); + setDSOLocal(GV); +} + static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { return llvm::StringSwitch<llvm::GlobalVariable::ThreadLocalMode>(S) .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel) @@ -722,36 +861,68 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const { GV->setThreadLocalMode(TLM); } -StringRef CodeGenModule::getMangledName(GlobalDecl GD) { - GlobalDecl CanonicalGD = GD.getCanonicalDecl(); +static std::string getCPUSpecificMangling(const CodeGenModule &CGM, + StringRef Name) { + const TargetInfo &Target = CGM.getTarget(); + return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str(); +} - // Some ABIs don't have constructor variants. Make sure that base and - // complete constructors get mangled the same. - if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) { - if (!getTarget().getCXXABI().hasConstructorVariants()) { - CXXCtorType OrigCtorType = GD.getCtorType(); - assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete); - if (OrigCtorType == Ctor_Base) - CanonicalGD = GlobalDecl(CD, Ctor_Complete); - } +static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, + const CPUSpecificAttr *Attr, + raw_ostream &Out) { + // cpu_specific gets the current name, dispatch gets the resolver. + if (Attr) + Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName()); + else + Out << ".resolver"; +} + +static void AppendTargetMangling(const CodeGenModule &CGM, + const TargetAttr *Attr, raw_ostream &Out) { + if (Attr->isDefaultVersion()) + return; + + Out << '.'; + const TargetInfo &Target = CGM.getTarget(); + TargetAttr::ParsedTargetAttr Info = + Attr->parse([&Target](StringRef LHS, StringRef RHS) { + // Multiversioning doesn't allow "no-${feature}", so we can + // only have "+" prefixes here. + assert(LHS.startswith("+") && RHS.startswith("+") && + "Features should always have a prefix."); + return Target.multiVersionSortPriority(LHS.substr(1)) > + Target.multiVersionSortPriority(RHS.substr(1)); + }); + + bool IsFirst = true; + + if (!Info.Architecture.empty()) { + IsFirst = false; + Out << "arch_" << Info.Architecture; } - auto FoundName = MangledDeclNames.find(CanonicalGD); - if (FoundName != MangledDeclNames.end()) - return FoundName->second; + for (StringRef Feat : Info.Features) { + if (!IsFirst) + Out << '_'; + IsFirst = false; + Out << Feat.substr(1); + } +} - const auto *ND = cast<NamedDecl>(GD.getDecl()); +static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, + const NamedDecl *ND, + bool OmitMultiVersionMangling = false) { SmallString<256> Buffer; - StringRef Str; - if (getCXXABI().getMangleContext().shouldMangleDeclName(ND)) { + llvm::raw_svector_ostream Out(Buffer); + MangleContext &MC = CGM.getCXXABI().getMangleContext(); + if (MC.shouldMangleDeclName(ND)) { llvm::raw_svector_ostream Out(Buffer); if (const auto *D = dyn_cast<CXXConstructorDecl>(ND)) - getCXXABI().getMangleContext().mangleCXXCtor(D, GD.getCtorType(), Out); + MC.mangleCXXCtor(D, GD.getCtorType(), Out); else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND)) - getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out); + MC.mangleCXXDtor(D, GD.getDtorType(), Out); else - getCXXABI().getMangleContext().mangleName(ND, Out); - Str = Out.str(); + MC.mangleName(ND, Out); } else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); @@ -761,14 +932,103 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) { llvm::raw_svector_ostream Out(Buffer); Out << "__regcall3__" << II->getName(); - Str = Out.str(); } else { - Str = II->getName(); + Out << II->getName(); + } + } + + if (const auto *FD = dyn_cast<FunctionDecl>(ND)) + if (FD->isMultiVersion() && !OmitMultiVersionMangling) { + if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()) + AppendCPUSpecificCPUDispatchMangling( + CGM, FD->getAttr<CPUSpecificAttr>(), Out); + else + AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out); + } + + return Out.str(); +} + +void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, + const FunctionDecl *FD) { + if (!FD->isMultiVersion()) + return; + + // Get the name of what this would be without the 'target' attribute. This + // allows us to lookup the version that was emitted when this wasn't a + // multiversion function. + std::string NonTargetName = + getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); + GlobalDecl OtherGD; + if (lookupRepresentativeDecl(NonTargetName, OtherGD)) { + assert(OtherGD.getCanonicalDecl() + .getDecl() + ->getAsFunction() + ->isMultiVersion() && + "Other GD should now be a multiversioned function"); + // OtherFD is the version of this function that was mangled BEFORE + // becoming a MultiVersion function. It potentially needs to be updated. + const FunctionDecl *OtherFD = + OtherGD.getCanonicalDecl().getDecl()->getAsFunction(); + std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD); + // This is so that if the initial version was already the 'default' + // version, we don't try to update it. + if (OtherName != NonTargetName) { + // Remove instead of erase, since others may have stored the StringRef + // to this. + const auto ExistingRecord = Manglings.find(NonTargetName); + if (ExistingRecord != std::end(Manglings)) + Manglings.remove(&(*ExistingRecord)); + auto Result = Manglings.insert(std::make_pair(OtherName, OtherGD)); + MangledDeclNames[OtherGD.getCanonicalDecl()] = Result.first->first(); + if (llvm::GlobalValue *Entry = GetGlobalValue(NonTargetName)) + Entry->setName(OtherName); + } + } +} + +StringRef CodeGenModule::getMangledName(GlobalDecl GD) { + GlobalDecl CanonicalGD = GD.getCanonicalDecl(); + + // Some ABIs don't have constructor variants. Make sure that base and + // complete constructors get mangled the same. + if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) { + if (!getTarget().getCXXABI().hasConstructorVariants()) { + CXXCtorType OrigCtorType = GD.getCtorType(); + assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete); + if (OrigCtorType == Ctor_Base) + CanonicalGD = GlobalDecl(CD, Ctor_Complete); } } + const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); + // Since CPUSpecific can require multiple emits per decl, store the manglings + // separately. + if (FD && + (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) { + const auto *SD = FD->getAttr<CPUSpecificAttr>(); + + std::pair<GlobalDecl, unsigned> SpecCanonicalGD{ + CanonicalGD, + SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()}; + + auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD); + if (FoundName != CPUSpecificMangledDeclNames.end()) + return FoundName->second; + + auto Result = CPUSpecificManglings.insert( + std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD)); + return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first(); + } + + auto FoundName = MangledDeclNames.find(CanonicalGD); + if (FoundName != MangledDeclNames.end()) + return FoundName->second; + // Keep the first result in the case of a mangling collision. - auto Result = Manglings.insert(std::make_pair(Str, GD)); + const auto *ND = cast<NamedDecl>(GD.getDecl()); + auto Result = + Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } @@ -808,6 +1068,11 @@ void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority, /// AddGlobalDtor - Add a function to the list that will be called /// when the module is unloaded. void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) { + if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) { + DtorsUsingAtExit[Priority].push_back(Dtor); + return; + } + // FIXME: Type coercion of void()* types. GlobalDtors.push_back(Structor(Priority, Dtor, nullptr)); } @@ -855,14 +1120,8 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { GVALinkage Linkage = getContext().GetGVALinkageForFunction(D); - if (isa<CXXDestructorDecl>(D) && - getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D), - GD.getDtorType())) { - // Destructor variants in the Microsoft C++ ABI are always internal or - // linkonce_odr thunks emitted on an as-needed basis. - return Linkage == GVA_Internal ? llvm::GlobalValue::InternalLinkage - : llvm::GlobalValue::LinkOnceODRLinkage; - } + if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(D)) + return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType()); if (isa<CXXConstructorDecl>(D) && cast<CXXConstructorDecl>(D)->isInheritingConstructor() && @@ -876,25 +1135,6 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false); } -void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F) { - const auto *FD = cast<FunctionDecl>(GD.getDecl()); - - if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) { - if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) { - // Don't dllexport/import destructor thunks. - F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); - return; - } - } - - if (FD->hasAttr<DLLImportAttr>()) - F->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); - else if (FD->hasAttr<DLLExportAttr>()) - F->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); - else - F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); -} - llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD); if (!MDS) return nullptr; @@ -902,11 +1142,6 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } -void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D, - llvm::Function *F) { - setNonAliasAttributes(D, F); -} - void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, const CGFunctionInfo &Info, llvm::Function *F) { @@ -937,6 +1172,34 @@ static bool hasUnwindExceptions(const LangOptions &LangOpts) { return true; } +static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM, + const CXXMethodDecl *MD) { + // Check that the type metadata can ever actually be used by a call. + if (!CGM.getCodeGenOpts().LTOUnit || + !CGM.HasHiddenLTOVisibility(MD->getParent())) + return false; + + // Only functions whose address can be taken with a member function pointer + // need this sort of type metadata. + return !MD->isStatic() && !MD->isVirtual() && !isa<CXXConstructorDecl>(MD) && + !isa<CXXDestructorDecl>(MD); +} + +std::vector<const CXXRecordDecl *> +CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { + llvm::SetVector<const CXXRecordDecl *> MostBases; + + std::function<void (const CXXRecordDecl *)> CollectMostBases; + CollectMostBases = [&](const CXXRecordDecl *RD) { + if (RD->getNumBases() == 0) + MostBases.insert(RD); + for (const CXXBaseSpecifier &B : RD->bases()) + CollectMostBases(B.getType()->getAsCXXRecordDecl()); + }; + CollectMostBases(RD); + return MostBases.takeVector(); +} + void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F) { llvm::AttrBuilder B; @@ -947,12 +1210,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); - if (LangOpts.getStackProtector() == LangOptions::SSPOn) - B.addAttribute(llvm::Attribute::StackProtect); - else if (LangOpts.getStackProtector() == LangOptions::SSPStrong) - B.addAttribute(llvm::Attribute::StackProtectStrong); - else if (LangOpts.getStackProtector() == LangOptions::SSPReq) - B.addAttribute(llvm::Attribute::StackProtectReq); + if (!D || !D->hasAttr<NoStackProtectorAttr>()) { + if (LangOpts.getStackProtector() == LangOptions::SSPOn) + B.addAttribute(llvm::Attribute::StackProtect); + else if (LangOpts.getStackProtector() == LangOptions::SSPStrong) + B.addAttribute(llvm::Attribute::StackProtectStrong); + else if (LangOpts.getStackProtector() == LangOptions::SSPReq) + B.addAttribute(llvm::Attribute::StackProtectReq); + } if (!D) { // If we don't have a declaration to control inlining, the function isn't @@ -1044,6 +1309,10 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (alignment) F->setAlignment(alignment); + if (!D->hasAttr<AlignedAttr>()) + if (LangOpts.FunctionAlignment) + F->setAlignment(1 << LangOpts.FunctionAlignment); + // Some C++ ABIs require 2-byte alignment for member functions, in order to // reserve a bit for differentiating between virtual and non-virtual member // functions. If the current target's C++ ABI requires this and this is a @@ -1056,13 +1325,26 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // In the cross-dso CFI mode, we want !type attributes on definitions only. if (CodeGenOpts.SanitizeCfiCrossDso) if (auto *FD = dyn_cast<FunctionDecl>(D)) - CreateFunctionTypeMetadata(FD, F); + CreateFunctionTypeMetadataForIcall(FD, F); + + // Emit type metadata on member functions for member function pointer checks. + // These are only ever necessary on definitions; we're guaranteed that the + // definition will be present in the LTO unit as a result of LTO visibility. + auto *MD = dyn_cast<CXXMethodDecl>(D); + if (MD && requiresMemberFunctionPointerTypeMetadata(*this, MD)) { + for (const CXXRecordDecl *Base : getMostBaseClasses(MD->getParent())) { + llvm::Metadata *Id = + CreateMetadataIdentifierForType(Context.getMemberPointerType( + MD->getType(), Context.getRecordType(Base).getTypePtr())); + F->addTypeMetadata(0, Id); + } + } } -void CodeGenModule::SetCommonAttributes(const Decl *D, - llvm::GlobalValue *GV) { - if (const auto *ND = dyn_cast_or_null<NamedDecl>(D)) - setGlobalVisibility(GV, ND, ForDefinition); +void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { + const Decl *D = GD.getDecl(); + if (dyn_cast_or_null<NamedDecl>(D)) + setGVProperties(GV, GD); else GV->setVisibility(llvm::GlobalValue::DefaultVisibility); @@ -1070,19 +1352,59 @@ void CodeGenModule::SetCommonAttributes(const Decl *D, addUsedGlobal(GV); } -void CodeGenModule::setAliasAttributes(const Decl *D, - llvm::GlobalValue *GV) { - SetCommonAttributes(D, GV); +bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D, + llvm::AttrBuilder &Attrs) { + // Add target-cpu and target-features attributes to functions. If + // we have a decl for the function and it has a target attribute then + // parse that and add it to the feature set. + StringRef TargetCPU = getTarget().getTargetOpts().CPU; + std::vector<std::string> Features; + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + FD = FD ? FD->getMostRecentDecl() : FD; + const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr; + const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; + bool AddedAttr = false; + if (TD || SD) { + llvm::StringMap<bool> FeatureMap; + getFunctionFeatureMap(FeatureMap, FD); + + // Produce the canonical string for this set of features. + for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap) + Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str()); + + // Now add the target-cpu and target-features to the function. + // While we populated the feature map above, we still need to + // get and parse the target attribute so we can get the cpu for + // the function. + if (TD) { + TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); + if (ParsedAttr.Architecture != "" && + getTarget().isValidCPUName(ParsedAttr.Architecture)) + TargetCPU = ParsedAttr.Architecture; + } + } else { + // Otherwise just add the existing target cpu and target features to the + // function. + Features = getTarget().getTargetOpts().Features; + } + + if (TargetCPU != "") { + Attrs.addAttribute("target-cpu", TargetCPU); + AddedAttr = true; + } + if (!Features.empty()) { + llvm::sort(Features.begin(), Features.end()); + Attrs.addAttribute("target-features", llvm::join(Features, ",")); + AddedAttr = true; + } - // Process the dllexport attribute based on whether the original definition - // (not necessarily the aliasee) was exported. - if (D->hasAttr<DLLExportAttr>()) - GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + return AddedAttr; } -void CodeGenModule::setNonAliasAttributes(const Decl *D, +void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO) { - SetCommonAttributes(D, GO); + const Decl *D = GD.getDecl(); + SetCommonAttributes(GD, GO); if (D) { if (auto *GV = dyn_cast<llvm::GlobalVariable>(GO)) { @@ -1096,55 +1418,60 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D, if (auto *F = dyn_cast<llvm::Function>(GO)) { if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>()) - if (!D->getAttr<SectionAttr>()) - F->addFnAttr("implicit-section-name", SA->getName()); + if (!D->getAttr<SectionAttr>()) + F->addFnAttr("implicit-section-name", SA->getName()); + + llvm::AttrBuilder Attrs; + if (GetCPUAndFeaturesAttributes(D, Attrs)) { + // We know that GetCPUAndFeaturesAttributes will always have the + // newest set, since it has the newest possible FunctionDecl, so the + // new ones should replace the old. + F->removeFnAttr("target-cpu"); + F->removeFnAttr("target-features"); + F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); + } } - - if (const SectionAttr *SA = D->getAttr<SectionAttr>()) + + if (const auto *CSA = D->getAttr<CodeSegAttr>()) + GO->setSection(CSA->getName()); + else if (const auto *SA = D->getAttr<SectionAttr>()) GO->setSection(SA->getName()); } - getTargetCodeGenInfo().setTargetAttributes(D, GO, *this, ForDefinition); + getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); } -void CodeGenModule::SetInternalFunctionAttributes(const Decl *D, +void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI) { + const Decl *D = GD.getDecl(); SetLLVMFunctionAttributes(D, FI, F); SetLLVMFunctionAttributesForDefinition(D, F); F->setLinkage(llvm::Function::InternalLinkage); - setNonAliasAttributes(D, F); + setNonAliasAttributes(GD, F); } -static void setLinkageForGV(llvm::GlobalValue *GV, - const NamedDecl *ND) { +static void setLinkageForGV(llvm::GlobalValue *GV, const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); - if (!isExternallyVisible(LV.getLinkage())) { - // Don't set internal linkage on declarations. - } else { - if (ND->hasAttr<DLLImportAttr>()) { - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - } else if (ND->hasAttr<DLLExportAttr>()) { - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) { - // "extern_weak" is overloaded in LLVM; we probably should have - // separate linkage types for this. - GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); - } - } + // Don't set internal linkage on declarations. + // "extern_weak" is overloaded in LLVM; we probably should have + // separate linkage types for this. + if (isExternallyVisible(LV.getLinkage()) && + (ND->hasAttr<WeakAttr>() || ND->isWeakImported())) + GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } -void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, - llvm::Function *F) { +void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, + llvm::Function *F) { // Only if we are checking indirect calls. if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall)) return; - // Non-static class methods are handled via vtable pointer checks elsewhere. + // Non-static class methods are handled via vtable or member function pointer + // checks elsewhere. if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic()) return; @@ -1168,8 +1495,7 @@ void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, - bool IsThunk, - ForDefinition_t IsForDefinition) { + bool IsThunk) { if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) { // If this is an intrinsic function, set the function's attributes @@ -1183,9 +1509,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, if (!IsIncompleteFunction) { SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F); // Setup target-specific attributes. - if (!IsForDefinition) - getTargetCodeGenInfo().setTargetAttributes(FD, F, *this, - NotForDefinition); + if (F->isDeclaration()) + getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); } // Add the Returned attribute for "this", except for iOS 5 and earlier @@ -1204,14 +1529,12 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // overridden by a definition. setLinkageForGV(F, FD); - setGlobalVisibility(F, FD, NotForDefinition); - - if (FD->getAttr<PragmaClangTextSectionAttr>()) { - F->addFnAttr("implicit-section-name"); - } + setGVProperties(F, FD); - if (const SectionAttr *SA = FD->getAttr<SectionAttr>()) - F->setSection(SA->getName()); + if (const auto *CSA = FD->getAttr<CodeSegAttr>()) + F->setSection(CSA->getName()); + else if (const auto *SA = FD->getAttr<SectionAttr>()) + F->setSection(SA->getName()); if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by @@ -1238,7 +1561,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // Don't emit entries for function declarations in the cross-DSO mode. This // is handled with better precision by the receiving DSO. if (!CodeGenOpts.SanitizeCfiCrossDso) - CreateFunctionTypeMetadata(FD, F); + CreateFunctionTypeMetadataForIcall(FD, F); if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); @@ -1299,6 +1622,12 @@ void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) { LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } +void CodeGenModule::AddELFLibDirective(StringRef Lib) { + auto &C = getLLVMContext(); + LinkerOptionsMetadata.push_back(llvm::MDNode::get( + C, {llvm::MDString::get(C, "lib"), llvm::MDString::get(C, Lib)})); +} + void CodeGenModule::AddDependentLib(StringRef Lib) { llvm::SmallString<24> Opt; getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt); @@ -1306,7 +1635,7 @@ void CodeGenModule::AddDependentLib(StringRef Lib) { LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } -/// \brief Add link options implied by the given module, including modules +/// Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, SmallVectorImpl<llvm::MDNode *> &Metadata, @@ -1325,6 +1654,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, // Add linker options to link against the libraries/frameworks // described by this module. llvm::LLVMContext &Context = CGM.getLLVMContext(); + + // For modules that use export_as for linking, use that module + // name instead. + if (Mod->UseExportAsModuleLinkName) + return; + for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) { // Link against a framework. Frameworks are currently Darwin only, so we // don't to ask TargetCodeGenInfo for the spelling of the linker option. @@ -1586,7 +1921,8 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, StringRef Category) const { // For now globals can be blacklisted only in ASan and KASan. const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask & - (SanitizerKind::Address | SanitizerKind::KernelAddress | SanitizerKind::HWAddress); + (SanitizerKind::Address | SanitizerKind::KernelAddress | + SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress); if (!EnabledAsanMask) return false; const auto &SanitizerBL = getContext().getSanitizerBlacklist(); @@ -1615,9 +1951,10 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, StringRef Category) const { if (!LangOpts.XRayInstrument) return false; + const auto &XRayFilter = getContext().getXRayFilter(); using ImbueAttr = XRayFunctionFilter::ImbueAttribute; - auto Attr = XRayFunctionFilter::ImbueAttribute::NONE; + auto Attr = ImbueAttr::NONE; if (Loc.isValid()) Attr = XRayFilter.shouldImbueLocation(Loc, Category); if (Attr == ImbueAttr::NONE) @@ -1662,7 +1999,8 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // If OpenMP is enabled and threadprivates must be generated like TLS, delay // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && - getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global)) + getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) && + !isTypeConstant(Global->getType(), false)) return false; return true; @@ -1691,6 +2029,7 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor( /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); + setDSOLocal(GV); return ConstantAddress(GV, Alignment); } @@ -1742,6 +2081,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (Global->hasAttr<IFuncAttr>()) return emitIFuncDefinition(GD); + // If this is a cpu_dispatch multiversion function, emit the resolver. + if (Global->hasAttr<CPUDispatchAttr>()) + return emitCPUDispatchDefinition(GD); + // If this is CUDA, be selective about which declarations we emit. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { @@ -2058,6 +2401,124 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); +void CodeGenModule::emitMultiVersionFunctions() { + for (GlobalDecl GD : MultiVersionFuncs) { + SmallVector<CodeGenFunction::TargetMultiVersionResolverOption, 10> Options; + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); + getContext().forEachMultiversionedFunctionVersion( + FD, [this, &GD, &Options](const FunctionDecl *CurFD) { + GlobalDecl CurGD{ + (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)}; + StringRef MangledName = getMangledName(CurGD); + llvm::Constant *Func = GetGlobalValue(MangledName); + if (!Func) { + if (CurFD->isDefined()) { + EmitGlobalFunctionDefinition(CurGD, nullptr); + Func = GetGlobalValue(MangledName); + } else { + const CGFunctionInfo &FI = + getTypes().arrangeGlobalDeclaration(GD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false, + /*DontDefer=*/false, ForDefinition); + } + assert(Func && "This should have just been created"); + } + Options.emplace_back(getTarget(), cast<llvm::Function>(Func), + CurFD->getAttr<TargetAttr>()->parse()); + }); + + llvm::Function *ResolverFunc = cast<llvm::Function>( + GetGlobalValue((getMangledName(GD) + ".resolver").str())); + if (supportsCOMDAT()) + ResolverFunc->setComdat( + getModule().getOrInsertComdat(ResolverFunc->getName())); + std::stable_sort( + Options.begin(), Options.end(), + std::greater<CodeGenFunction::TargetMultiVersionResolverOption>()); + CodeGenFunction CGF(*this); + CGF.EmitTargetMultiVersionResolver(ResolverFunc, Options); + } +} + +void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + assert(FD && "Not a FunctionDecl?"); + const auto *DD = FD->getAttr<CPUDispatchAttr>(); + assert(DD && "Not a cpu_dispatch Function?"); + llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); + + StringRef ResolverName = getMangledName(GD); + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + auto *ResolverFunc = cast<llvm::Function>( + GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, + /*ForVTable=*/false)); + + SmallVector<CodeGenFunction::CPUDispatchMultiVersionResolverOption, 10> + Options; + const TargetInfo &Target = getTarget(); + for (const IdentifierInfo *II : DD->cpus()) { + // Get the name of the target function so we can look it up/create it. + std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + + getCPUSpecificMangling(*this, II->getName()); + llvm::Constant *Func = GetOrCreateLLVMFunction( + MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + llvm::SmallVector<StringRef, 32> Features; + Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); + llvm::transform(Features, Features.begin(), + [](StringRef Str) { return Str.substr(1); }); + Features.erase(std::remove_if( + Features.begin(), Features.end(), [&Target](StringRef Feat) { + return !Target.validateCpuSupports(Feat); + }), Features.end()); + Options.emplace_back(cast<llvm::Function>(Func), + CodeGenFunction::GetX86CpuSupportsMask(Features)); + } + + llvm::sort( + Options.begin(), Options.end(), + std::greater<CodeGenFunction::CPUDispatchMultiVersionResolverOption>()); + CodeGenFunction CGF(*this); + CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options); +} + +/// If an ifunc for the specified mangled name is not in the module, create and +/// return an llvm IFunc Function with the specified type. +llvm::Constant * +CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, + const FunctionDecl *FD) { + std::string MangledName = + getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); + std::string IFuncName = MangledName + ".ifunc"; + if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName)) + return IFuncGV; + + // Since this is the first time we've created this IFunc, make sure + // that we put this multiversioned function into the list to be + // replaced later if necessary (target multiversioning only). + if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) + MultiVersionFuncs.push_back(GD); + + std::string ResolverName = MangledName + ".resolver"; + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + llvm::Constant *Resolver = + GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, + /*ForVTable=*/false); + llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( + DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + GIF->setName(IFuncName); + SetCommonAttributes(FD, GIF); + + return GIF; +} + /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially @@ -2071,6 +2532,33 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); + // Any attempts to use a MultiVersion function should result in retrieving + // the iFunc instead. Name Mangling will handle the rest of the changes. + if (const FunctionDecl *FD = cast_or_null<FunctionDecl>(D)) { + // For the device mark the function as one that should be emitted. + if (getLangOpts().OpenMPIsDevice && OpenMPRuntime && + !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() && + !DontDefer && !IsForDefinition) { + const FunctionDecl *FDDef = FD->getDefinition(); + GlobalDecl GDDef; + if (const auto *CD = dyn_cast<CXXConstructorDecl>(FDDef)) + GDDef = GlobalDecl(CD, GD.getCtorType()); + else if (const auto *DD = dyn_cast<CXXDestructorDecl>(FDDef)) + GDDef = GlobalDecl(DD, GD.getDtorType()); + else + GDDef = GlobalDecl(FDDef); + addDeferredDeclToEmit(GDDef); + } + + if (FD->isMultiVersion()) { + const auto *TA = FD->getAttr<TargetAttr>(); + if (TA && TA->isDefaultVersion()) + UpdateMultiVersionNames(GD, FD); + if (!IsForDefinition) + return GetOrCreateMultiVersionIFunc(GD, Ty, FD); + } + } + // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry) { @@ -2081,8 +2569,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( } // Handle dropped DLL attributes. - if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>()) + if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>()) { Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + setDSOLocal(Entry); + } // If there are two attempts to define the same mangled name, issue an // error. @@ -2094,8 +2584,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( (GD.getCanonicalDecl().getDecl() != OtherGD.getCanonicalDecl().getDecl()) && DiagnosedConflictingDefinitions.insert(GD).second) { - getDiags().Report(D->getLocation(), - diag::err_duplicate_mangled_name); + getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) + << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } @@ -2157,8 +2647,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( assert(F->getName() == MangledName && "name was uniqued!"); if (D) - SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk, - IsForDefinition); + SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); F->addAttributes(llvm::AttributeList::FunctionIndex, B); @@ -2234,6 +2723,16 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, Ty = getTypes().ConvertFunctionType(CanonTy, FD); } + // Devirtualized destructor calls may come through here instead of via + // getAddrOfCXXStructor. Make sure we use the MS ABI base destructor instead + // of the complete destructor when necessary. + if (const auto *DD = dyn_cast<CXXDestructorDecl>(GD.getDecl())) { + if (getTarget().getCXXABI().isMicrosoft() && + GD.getDtorType() == Dtor_Complete && + DD->getParent()->getNumVBases() == 0) + GD = GlobalDecl(DD, Dtor_Base); + } + StringRef MangledName = getMangledName(GD); return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, /*IsThunk=*/false, llvm::AttributeList(), @@ -2255,7 +2754,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { // Demangle the premangled name from getTerminateFn() IdentifierInfo &CXXII = - (Name == "_ZSt9terminatev" || Name == "\01?terminate@@YAXXZ") + (Name == "_ZSt9terminatev" || Name == "?terminate@@YAXXZ") ? C.Idents.get("terminate") : C.Idents.get(Name); @@ -2302,6 +2801,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, F->setLinkage(llvm::GlobalValue::ExternalLinkage); } } + setDSOLocal(F); } } @@ -2313,13 +2813,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, llvm::Constant * CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name, llvm::AttributeList ExtraAttrs) { - llvm::Constant *C = - GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, - /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); - if (auto *F = dyn_cast<llvm::Function>(C)) - if (F->empty()) - F->setCallingConv(getBuiltinCC()); - return C; + return CreateRuntimeFunction(FTy, Name, ExtraAttrs, true); } /// isTypeConstant - Determine whether an object of this type can be emitted @@ -2350,7 +2844,7 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) { /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. /// -/// If IsForDefinition is true, it is guranteed that an actual global with +/// If IsForDefinition is true, it is guaranteed that an actual global with /// type Ty will be returned, not conversion of a variable with the same /// mangled name but some other type. llvm::Constant * @@ -2370,6 +2864,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>()) Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D) + getOpenMPRuntime().registerTargetGlobalVariable(D, Entry); + if (Entry->getType() == Ty) return Entry; @@ -2386,8 +2883,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, (OtherD = dyn_cast<VarDecl>(OtherGD.getDecl())) && OtherD->hasInit() && DiagnosedConflictingDefinitions.insert(D).second) { - getDiags().Report(D->getLocation(), - diag::err_duplicate_mangled_name); + getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) + << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } @@ -2438,6 +2935,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // Handle things which are present even on external declarations. if (D) { + if (LangOpts.OpenMP && !LangOpts.OpenMPSimd) + getOpenMPRuntime().registerTargetGlobalVariable(D, GV); + // FIXME: This code is overly simple and should be merged with other global // handling. GV->setConstant(isTypeConstant(D->getType(), false)); @@ -2445,7 +2945,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); setLinkageForGV(GV, D); - setGlobalVisibility(GV, D, NotForDefinition); if (D->getTLSKind()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) @@ -2453,6 +2952,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, setTLSMode(GV, *D); } + setGVProperties(GV, D); + // If required by the ABI, treat declarations of static data members with // inline initializers as definitions. if (getContext().isMSStaticDataMemberInlineDefinition(D)) { @@ -2501,7 +3002,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, GetAddrOfGlobalVar(D, InitType, IsForDefinition)); // Erase the old global, since it is no longer used. - cast<llvm::GlobalValue>(GV)->eraseFromParent(); + GV->eraseFromParent(); GV = NewGV; } else { GV->setInitializer(Init); @@ -2602,7 +3103,7 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the /// given global variable. If Ty is non-null and if the global doesn't exist, /// then it will be created with the specified type instead of whatever the -/// normal requested type would be. If IsForDefinition is true, it is guranteed +/// normal requested type would be. If IsForDefinition is true, it is guaranteed /// that an actual global with type Ty will be returned, not conversion of a /// variable with the same mangled name but some other type. llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, @@ -2625,7 +3126,10 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::Constant * CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, StringRef Name) { - return GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr); + auto *Ret = + GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr); + setDSOLocal(cast<llvm::GlobalValue>(Ret->stripPointerCasts())); + return Ret; } void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { @@ -2679,6 +3183,39 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } +LangAS CodeGenModule::getStringLiteralAddressSpace() const { + // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. + if (LangOpts.OpenCL) + return LangAS::opencl_constant; + if (auto AS = getTarget().getConstantAddressSpace()) + return AS.getValue(); + return LangAS::Default; +} + +// In address space agnostic languages, string literals are in default address +// space in AST. However, certain targets (e.g. amdgcn) request them to be +// emitted in constant address space in LLVM IR. To be consistent with other +// parts of AST, string literal global variables in constant address space +// need to be casted to default address space before being put into address +// map and referenced by other part of CodeGen. +// In OpenCL, string literals are in constant address space in AST, therefore +// they should not be casted to default address space. +static llvm::Constant * +castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM, + llvm::GlobalVariable *GV) { + llvm::Constant *Cast = GV; + if (!CGM.getLangOpts().OpenCL) { + if (auto AS = CGM.getTarget().getConstantAddressSpace()) { + if (AS != LangAS::Default) + Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast( + CGM, GV, AS.getValue(), LangAS::Default, + GV->getValueType()->getPointerTo( + CGM.getContext().getTargetAddressSpace(LangAS::Default))); + } + } + return Cast; +} + template<typename SomeDecl> void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV) { @@ -2753,6 +3290,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, if (getLangOpts().OpenCL && ASTTy->isSamplerT()) return; + // If this is OpenMP device, check if it is legal to emit this global + // normally. + if (LangOpts.OpenMPIsDevice && OpenMPRuntime && + OpenMPRuntime->emitTargetGlobalVariable(D)) + return; + llvm::Constant *Init = nullptr; CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); bool NeedsGlobalCtor = false; @@ -2989,7 +3532,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, return true; // A variable cannot be both common and exist in a section. - // We dont try to determine which is the right section in the front-end. + // We don't try to determine which is the right section in the front-end. // If no specialized section name is applicable, it will resort to default. if (D->hasAttr<PragmaClangBSSSectionAttr>() || D->hasAttr<PragmaClangDataSectionAttr>() || @@ -3261,18 +3804,18 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, // declarations). auto *Fn = cast<llvm::Function>(GV); setFunctionLinkage(GD, Fn); - setFunctionDLLStorageClass(GD, Fn); // FIXME: this is redundant with part of setFunctionDefinitionAttributes - setGlobalVisibility(Fn, D, ForDefinition); + setGVProperties(Fn, GD); MaybeHandleStaticInExternC(D, Fn); + maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI); - setFunctionDefinitionAttributes(D, Fn); + setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>()) @@ -3281,6 +3824,15 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, AddGlobalDtor(Fn, DA->getPriority()); if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, Fn); + + if (D->isCPUSpecificMultiVersion()) { + auto *Spec = D->getAttr<CPUSpecificAttr>(); + // If there is another specific version we need to emit, do so here. + if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) { + ++Spec->ActiveArgIndex; + EmitGlobalFunctionDefinition(GD, nullptr); + } + } } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { @@ -3356,7 +3908,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { if (VD->getTLSKind()) setTLSMode(GA, *VD); - setAliasAttributes(D, GA); + SetCommonAttributes(GD, GA); } void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { @@ -3377,7 +3929,8 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { GlobalDecl OtherGD; if (lookupRepresentativeDecl(MangledName, OtherGD) && DiagnosedConflictingDefinitions.insert(GD).second) { - Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name); + Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name) + << MangledName; Diags.Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } @@ -3415,7 +3968,7 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { } else GIF->setName(MangledName); - SetCommonAttributes(D, GIF); + SetCommonAttributes(GD, GIF); } llvm::Function *CodeGenModule::getIntrinsic(unsigned IID, @@ -3477,14 +4030,13 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { if (!CFConstantStringClassRef) { llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); - llvm::Constant *GV = - CreateRuntimeVariable(Ty, "__CFConstantStringClassReference"); + llvm::GlobalValue *GV = cast<llvm::GlobalValue>( + CreateRuntimeVariable(Ty, "__CFConstantStringClassReference")); if (getTriple().isOSBinFormatCOFF()) { IdentifierInfo &II = getContext().Idents.get(GV->getName()); TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); - llvm::GlobalValue *CGV = cast<llvm::GlobalValue>(GV); const VarDecl *VD = nullptr; for (const auto &Result : DC->lookup(&II)) @@ -3492,13 +4044,14 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { break; if (!VD || !VD->hasAttr<DLLExportAttr>()) { - CGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - CGV->setLinkage(llvm::GlobalValue::ExternalLinkage); + GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } else { - CGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - CGV->setLinkage(llvm::GlobalValue::ExternalLinkage); + GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } } + setDSOLocal(GV); // Decay array -> ptr CFConstantStringClassRef = @@ -3666,10 +4219,8 @@ static llvm::GlobalVariable * GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, CodeGenModule &CGM, StringRef GlobalName, CharUnits Alignment) { - // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. - unsigned AddrSpace = 0; - if (CGM.getLangOpts().OpenCL) - AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); + unsigned AddrSpace = CGM.getContext().getTargetAddressSpace( + CGM.getStringLiteralAddressSpace()); llvm::Module &M = CGM.getModule(); // Create a global variable for this string @@ -3682,6 +4233,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); GV->setComdat(M.getOrInsertComdat(GV->getName())); } + CGM.setDSOLocal(GV); return GV; } @@ -3730,7 +4282,9 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, SanitizerMD->reportGlobalToASan(GV, S->getStrTokenLoc(0), "<string literal>", QualType()); - return ConstantAddress(GV, Alignment); + + return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), + Alignment); } /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant @@ -3774,7 +4328,9 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString( GlobalName, Alignment); if (Entry) *Entry = GV; - return ConstantAddress(GV, Alignment); + + return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), + Alignment); } ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( @@ -3847,7 +4403,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) && isa<CXXRecordDecl>(InitVD->getLexicalDeclContext())) { // Temporaries defined inside a class get linkonce_odr linkage because the - // class can be defined in multipe translation units. + // class can be defined in multiple translation units. Linkage = llvm::GlobalVariable::LinkOnceODRLinkage; } else { // There is no need for this temporary to have external linkage if the @@ -3860,7 +4416,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); - setGlobalVisibility(GV, VD, ForDefinition); + setGVProperties(GV, VD); GV->setAlignment(Align.getQuantity()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -3997,18 +4553,13 @@ void CodeGenModule::EmitDeclContext(const DeclContext *DC) { /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { // Ignore dependent declarations. - if (D->getDeclContext() && D->getDeclContext()->isDependentContext()) + if (D->isTemplated()) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: - // Skip function templates - if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() || - cast<FunctionDecl>(D)->isLateTemplateParsed()) - return; - EmitGlobal(cast<FunctionDecl>(D)); // Always provide some coverage mapping // even for the functions that aren't emitted. @@ -4021,10 +4572,6 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::Var: case Decl::Decomposition: - // Skip variable templates - if (cast<VarDecl>(D)->getDescribedVarTemplate()) - return; - LLVM_FALLTHROUGH; case Decl::VarTemplateSpecialization: EmitGlobal(cast<VarDecl>(D)); if (auto *DD = dyn_cast<DecompositionDecl>(D)) @@ -4083,16 +4630,9 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { DI->EmitUsingDirective(cast<UsingDirectiveDecl>(*D)); return; case Decl::CXXConstructor: - // Skip function templates - if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() || - cast<FunctionDecl>(D)->isLateTemplateParsed()) - return; - getCXXABI().EmitCXXConstructors(cast<CXXConstructorDecl>(D)); break; case Decl::CXXDestructor: - if (cast<FunctionDecl>(D)->isLateTemplateParsed()) - return; getCXXABI().EmitCXXDestructors(cast<CXXDestructorDecl>(D)); break; @@ -4152,7 +4692,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { AppendLinkerOptions(PCD->getArg()); break; case PCK_Lib: - AddDependentLib(PCD->getArg()); + if (getTarget().getTriple().isOSBinFormatELF() && + !getTarget().getTriple().isPS4()) + AddELFLibDirective(PCD->getArg()); + else + AddDependentLib(PCD->getArg()); break; case PCK_Compiler: case PCK_ExeStr: @@ -4358,9 +4902,7 @@ static void EmitGlobalDeclMetadata(CodeGenModule &CGM, /// to such functions with an unmangled name from inline assembly within the /// same translation unit. void CodeGenModule::EmitStaticExternCAliases() { - // Don't do anything if we're generating CUDA device code -- the NVPTX - // assembly target doesn't support aliases. - if (Context.getTargetInfo().getTriple().isNVPTX()) + if (!getTargetCodeGenInfo().shouldEmitStaticExternCAliases()) return; for (auto &I : StaticExternCValues) { IdentifierInfo *Name = I.first; @@ -4504,7 +5046,7 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? - if (!ForEH && !getLangOpts().RTTI) + if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice) return llvm::Constant::getNullValue(Int8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && @@ -4515,6 +5057,9 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, } void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { + // Do not emit threadprivates in simd-only mode. + if (LangOpts.OpenMP && LangOpts.OpenMPSimd) + return; for (auto RefExpr : D->varlists()) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(RefExpr)->getDecl()); bool PerformInit = @@ -4529,8 +5074,10 @@ void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { } } -llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { - llvm::Metadata *&InternalId = MetadataIdMap[T.getCanonicalType()]; +llvm::Metadata * +CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, + StringRef Suffix) { + llvm::Metadata *&InternalId = Map[T.getCanonicalType()]; if (InternalId) return InternalId; @@ -4538,6 +5085,7 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { std::string OutName; llvm::raw_string_ostream Out(OutName); getCXXABI().getMangleContext().mangleTypeName(T, Out); + Out << Suffix; InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); } else { @@ -4548,6 +5096,15 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return InternalId; } +llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { + return CreateMetadataIdentifierImpl(T, MetadataIdMap, ""); +} + +llvm::Metadata * +CodeGenModule::CreateMetadataIdentifierForVirtualMemPtrType(QualType T) { + return CreateMetadataIdentifierImpl(T, VirtualMetadataIdMap, ".virtual"); +} + // Generalize pointer types to a void pointer with the qualifiers of the // originally pointed-to type, e.g. 'const char *' and 'char * const *' // generalize to 'const void *' while 'char *' and 'const char **' generalize to @@ -4581,25 +5138,8 @@ static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { - T = GeneralizeFunctionType(getContext(), T); - - llvm::Metadata *&InternalId = GeneralizedMetadataIdMap[T.getCanonicalType()]; - if (InternalId) - return InternalId; - - if (isExternallyVisible(T->getLinkage())) { - std::string OutName; - llvm::raw_string_ostream Out(OutName); - getCXXABI().getMangleContext().mangleTypeName(T, Out); - Out << ".generalized"; - - InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); - } else { - InternalId = llvm::MDNode::getDistinct(getLLVMContext(), - llvm::ArrayRef<llvm::Metadata *>()); - } - - return InternalId; + return CreateMetadataIdentifierImpl(GeneralizeFunctionType(getContext(), T), + GeneralizedMetadataIdMap, ".generalized"); } /// Returns whether this module needs the "all-vtables" type identifier. @@ -4634,22 +5174,28 @@ void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable, } } +TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const TargetAttr *TD) { + assert(TD != nullptr); + TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); + + ParsedAttr.Features.erase( + llvm::remove_if(ParsedAttr.Features, + [&](const std::string &Feat) { + return !Target.isValidFeatureName( + StringRef{Feat}.substr(1)); + }), + ParsedAttr.Features.end()); + return ParsedAttr; +} + + // Fills in the supplied string map with the set of target features for the // passed in function. void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, const FunctionDecl *FD) { StringRef TargetCPU = Target.getTargetOpts().CPU; if (const auto *TD = FD->getAttr<TargetAttr>()) { - // If we have a TargetAttr build up the feature map based on that. - TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - - ParsedAttr.Features.erase( - llvm::remove_if(ParsedAttr.Features, - [&](const std::string &Feat) { - return !Target.isValidFeatureName( - StringRef{Feat}.substr(1)); - }), - ParsedAttr.Features.end()); + TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD); // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. @@ -4667,6 +5213,12 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, // the attribute. Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.Features); + } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) { + llvm::SmallVector<StringRef, 32> FeaturesTmp; + Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(), + FeaturesTmp); + std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end()); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 22c4463b2c81..ee64ed4f2ae2 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -324,6 +324,10 @@ private: /// is defined once we get to the end of the of the translation unit. std::vector<GlobalDecl> Aliases; + /// List of multiversion functions that have to be emitted. Used to make sure + /// we properly emit the iFunc. + std::vector<GlobalDecl> MultiVersionFuncs; + typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy; ReplacementsTy Replacements; @@ -362,6 +366,13 @@ private: llvm::MapVector<GlobalDecl, StringRef> MangledDeclNames; llvm::StringMap<GlobalDecl, llvm::BumpPtrAllocator> Manglings; + // An ordered map of canonical GlobalDecls paired with the cpu-index for + // cpu-specific name manglings. + llvm::MapVector<std::pair<GlobalDecl, unsigned>, StringRef> + CPUSpecificMangledDeclNames; + llvm::StringMap<std::pair<GlobalDecl, unsigned>, llvm::BumpPtrAllocator> + CPUSpecificManglings; + /// Global annotations. std::vector<llvm::Constant*> Annotations; @@ -387,10 +398,10 @@ private: llvm::GlobalValue *> StaticExternCMap; StaticExternCMap StaticExternCValues; - /// \brief thread_local variables defined or used in this TU. + /// thread_local variables defined or used in this TU. std::vector<const VarDecl *> CXXThreadLocals; - /// \brief thread_local variables with initializers that need to run + /// thread_local variables with initializers that need to run /// before any thread_local variable in this TU is odr-used. std::vector<llvm::Function *> CXXThreadLocalInits; std::vector<const VarDecl *> CXXThreadLocalInitVars; @@ -421,14 +432,14 @@ private: /// Global destructor functions and arguments that need to run on termination. std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors; - /// \brief The complete set of modules that has been imported. + /// The complete set of modules that has been imported. llvm::SetVector<clang::Module *> ImportedModules; - /// \brief The set of modules for which the module initializers + /// The set of modules for which the module initializers /// have been emitted. llvm::SmallPtrSet<clang::Module *, 16> EmittedModuleInitializers; - /// \brief A vector of metadata strings. + /// A vector of metadata strings. SmallVector<llvm::MDNode *, 16> LinkerOptionsMetadata; /// @name Cache for Objective-C runtime types @@ -438,7 +449,7 @@ private: /// int * but is actually an Obj-C class pointer. llvm::WeakTrackingVH CFConstantStringClassRef; - /// \brief The type used to describe the state of a fast enumeration in + /// The type used to describe the state of a fast enumeration in /// Objective-C's for..in loop. QualType ObjCFastEnumerationStateType; @@ -499,6 +510,7 @@ private: /// MDNodes. typedef llvm::DenseMap<QualType, llvm::Metadata *> MetadataTypeMap; MetadataTypeMap MetadataIdMap; + MetadataTypeMap VirtualMetadataIdMap; MetadataTypeMap GeneralizedMetadataIdMap; public: @@ -685,6 +697,11 @@ public: TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, TBAAAccessInfo InfoB); + /// mergeTBAAInfoForMemoryTransfer - Get merged TBAA information for the + /// purposes of memory transfer calls. + TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, + TBAAAccessInfo SrcInfo); + /// getTBAAInfoForSubobject - Get TBAA information for an access with a given /// base lvalue. TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) { @@ -710,8 +727,19 @@ public: llvm::ConstantInt *getSize(CharUnits numChars); /// Set the visibility for the given LLVM GlobalValue. - void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D, - ForDefinition_t IsForDefinition) const; + void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const; + + void setGlobalVisibilityAndLocal(llvm::GlobalValue *GV, + const NamedDecl *D) const; + + void setDSOLocal(llvm::GlobalValue *GV) const; + + void setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl D) const; + void setDLLImportDLLExport(llvm::GlobalValue *GV, const NamedDecl *D) const; + /// Set visibility, dllimport/dllexport and dso_local. + /// This must be called after dllimport/dllexport is set. + void setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const; + void setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const; /// Set the TLS mode for the given LLVM GlobalValue for the thread-local /// variable declaration D. @@ -757,7 +785,7 @@ public: /// Return the llvm::Constant for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created /// with the specified type instead of whatever the normal requested type - /// would be. If IsForDefinition is true, it is guranteed that an actual + /// would be. If IsForDefinition is true, it is guaranteed that an actual /// global with type Ty will be returned, not conversion of a variable with /// the same mangled name but some other type. llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D, @@ -765,6 +793,13 @@ public: ForDefinition_t IsForDefinition = NotForDefinition); + /// Return the AST address space of string literal, which is used to emit + /// the string literal as global variable in LLVM IR. + /// Note: This is not necessarily the address space of the string literal + /// in AST. For address space agnostic language, e.g. C++, string literal + /// in AST is always in default address space. + LangAS getStringLiteralAddressSpace() const; + /// Return the address of the given function. If Ty is non-null, then this /// function will use the specified type if it has to create it. llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = nullptr, @@ -780,7 +815,8 @@ public: ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E); /// Get the address of the thunk for the given global decl. - llvm::Constant *GetAddrOfThunk(GlobalDecl GD, const ThunkInfo &Thunk); + llvm::Constant *GetAddrOfThunk(StringRef Name, llvm::Type *FnTy, + GlobalDecl GD); /// Get a reference to the target of VD. ConstantAddress GetWeakRefReference(const ValueDecl *VD); @@ -879,12 +915,12 @@ public: void setAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *CLE, llvm::GlobalVariable *GV); - /// \brief Returns a pointer to a global variable representing a temporary + /// Returns a pointer to a global variable representing a temporary /// with static or thread storage duration. ConstantAddress GetAddrOfGlobalTemporary(const MaterializeTemporaryExpr *E, const Expr *Inner); - /// \brief Retrieve the record type that describes the state of an + /// Retrieve the record type that describes the state of an /// Objective-C fast enumeration loop (for..in). QualType getObjCFastEnumerationStateType(); @@ -912,22 +948,22 @@ public: /// Emit code for a single top level declaration. void EmitTopLevelDecl(Decl *D); - /// \brief Stored a deferred empty coverage mapping for an unused + /// Stored a deferred empty coverage mapping for an unused /// and thus uninstrumented top level declaration. void AddDeferredUnusedCoverageMapping(Decl *D); - /// \brief Remove the deferred empty coverage mapping as this + /// Remove the deferred empty coverage mapping as this /// declaration is actually instrumented. void ClearUnusedCoverageMapping(const Decl *D); - /// \brief Emit all the deferred coverage mappings + /// Emit all the deferred coverage mappings /// for the uninstrumented functions. void EmitDeferredUnusedCoverageMappings(); /// Tell the consumer that this variable has been instantiated. void HandleCXXStaticMemberVarInstantiation(VarDecl *VD); - /// \brief If the declaration has internal linkage but is inside an + /// If the declaration has internal linkage but is inside an /// extern "C" linkage specification, prepare to emit an alias for it /// to the expected name. template<typename SomeDecl> @@ -976,7 +1012,7 @@ public: llvm::Constant *getMemberPointerConstant(const UnaryOperator *e); - /// \brief Emit type info if type of an expression is a variably modified + /// Emit type info if type of an expression is a variably modified /// type. Also emit proper debug info for cast types. void EmitExplicitCastExprType(const ExplicitCastExpr *E, CodeGenFunction *CGF = nullptr); @@ -1002,7 +1038,7 @@ public: /// Set the attributes on the LLVM function for the given decl and function /// info. This applies attributes necessary for handling the ABI as well as /// user specified attributes like section. - void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F, + void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI); /// Set the LLVM function attributes (sext, zext, etc). @@ -1061,6 +1097,10 @@ public: /// It's up to you to ensure that this is safe. void AddDefaultFnAttrs(llvm::Function &F); + /// Parses the target attributes passed in, and returns only the ones that are + /// valid feature names. + TargetAttr::ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD); + // Fills in the supplied string map with the set of target features for the // passed in function. void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, @@ -1075,25 +1115,24 @@ public: void RefreshTypeCacheForClass(const CXXRecordDecl *Class); - /// \brief Appends Opts to the "llvm.linker.options" metadata value. + /// Appends Opts to the "llvm.linker.options" metadata value. void AppendLinkerOptions(StringRef Opts); - /// \brief Appends a detect mismatch command to the linker options. + /// Appends a detect mismatch command to the linker options. void AddDetectMismatch(StringRef Name, StringRef Value); - /// \brief Appends a dependent lib to the "llvm.linker.options" metadata + /// Appends a dependent lib to the "llvm.linker.options" metadata /// value. void AddDependentLib(StringRef Lib); + void AddELFLibDirective(StringRef Lib); + llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD); void setFunctionLinkage(GlobalDecl GD, llvm::Function *F) { F->setLinkage(getFunctionLinkage(GD)); } - /// Set the DLL storage class on F. - void setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F); - /// Return the appropriate linkage for the vtable, VTT, and type information /// of the given class. llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD); @@ -1158,40 +1197,29 @@ public: DeferredVTables.push_back(RD); } - /// Emit code for a singal global function or var decl. Forward declarations + /// Emit code for a single global function or var decl. Forward declarations /// are emitted lazily. void EmitGlobal(GlobalDecl D); - bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target); bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D); - /// Set attributes for a global definition. - void setFunctionDefinitionAttributes(const FunctionDecl *D, - llvm::Function *F); - llvm::GlobalValue *GetGlobalValue(StringRef Ref); /// Set attributes which are common to any form of a global definition (alias, /// Objective-C method, function, global variable). /// /// NOTE: This should only be called for definitions. - void SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV); - - /// Set attributes which must be preserved by an alias. This includes common - /// attributes (i.e. it includes a call to SetCommonAttributes). - /// - /// NOTE: This should only be called for definitions. - void setAliasAttributes(const Decl *D, llvm::GlobalValue *GV); + void SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV); void addReplacement(StringRef Name, llvm::Constant *C); void addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C); - /// \brief Emit a code for threadprivate directive. + /// Emit a code for threadprivate directive. /// \param D Threadprivate declaration. void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D); - /// \brief Emit a code for declare reduction construct. + /// Emit a code for declare reduction construct. void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, CodeGenFunction *CGF = nullptr); @@ -1212,13 +1240,18 @@ public: /// internal identifiers). llvm::Metadata *CreateMetadataIdentifierForType(QualType T); + /// Create a metadata identifier that is intended to be used to check virtual + /// calls via a member function pointer. + llvm::Metadata *CreateMetadataIdentifierForVirtualMemPtrType(QualType T); + /// Create a metadata identifier for the generalization of the given type. /// This may either be an MDString (for external identifiers) or a distinct /// unnamed MDNode (for internal identifiers). llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T); /// Create and attach type metadata to the given function. - void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F); + void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, + llvm::Function *F); /// Returns whether this module needs the "all-vtables" type identifier. bool NeedAllVtablesTypeId() const; @@ -1227,7 +1260,15 @@ public: void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD); - /// \brief Get the declaration of std::terminate for the platform. + /// Return a vector of most-base classes for RD. This is used to implement + /// control flow integrity checks for member function pointers. + /// + /// A most-base class of a class C is defined as a recursive base class of C, + /// including C itself, that does not have any bases. + std::vector<const CXXRecordDecl *> + getMostBaseClasses(const CXXRecordDecl *RD); + + /// Get the declaration of std::terminate for the platform. llvm::Constant *getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); @@ -1247,18 +1288,24 @@ private: llvm::AttributeList ExtraAttrs = llvm::AttributeList(), ForDefinition_t IsForDefinition = NotForDefinition); + llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD, + llvm::Type *DeclTy, + const FunctionDecl *FD); + void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD); + llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *PTy, const VarDecl *D, ForDefinition_t IsForDefinition = NotForDefinition); - void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO); + bool GetCPUAndFeaturesAttributes(const Decl *D, + llvm::AttrBuilder &AttrBuilder); + void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO); /// Set function attributes for a function declaration. void SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, - bool IsIncompleteFunction, bool IsThunk, - ForDefinition_t IsForDefinition); + bool IsIncompleteFunction, bool IsThunk); void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); @@ -1266,6 +1313,7 @@ private: void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); + void emitCPUDispatchDefinition(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); @@ -1274,7 +1322,7 @@ private: void EmitDeclContext(const DeclContext *DC); void EmitLinkageSpec(const LinkageSpecDecl *D); - /// \brief Emit the function that initializes C++ thread_local variables. + /// Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); /// Emit the function that initializes C++ globals. @@ -1319,6 +1367,14 @@ private: void checkAliases(); + std::map<int, llvm::TinyPtrVector<llvm::Function *>> DtorsUsingAtExit; + + /// Register functions annotated with __attribute__((destructor)) using + /// __cxa_atexit, if it is available, or atexit otherwise. + void registerGlobalDtorsWithAtExit(); + + void emitMultiVersionFunctions(); + /// Emit any vtables which we deferred and still have a use for. void EmitDeferredVTables(); @@ -1329,16 +1385,16 @@ private: /// Emit the llvm.used and llvm.compiler.used metadata. void emitLLVMUsed(); - /// \brief Emit the link options introduced by imported modules. + /// Emit the link options introduced by imported modules. void EmitModuleLinkOptions(); - /// \brief Emit aliases for internal-linkage declarations inside "C" language + /// Emit aliases for internal-linkage declarations inside "C" language /// linkage specifications, giving them the "expected" name where possible. void EmitStaticExternCAliases(); void EmitDeclMetadata(); - /// \brief Emit the Clang version as llvm.ident metadata. + /// Emit the Clang version as llvm.ident metadata. void EmitVersionIdentMetadata(); /// Emits target specific Metadata for global declarations. @@ -1373,6 +1429,9 @@ private: void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, bool AttrOnCallSite, llvm::AttrBuilder &FuncAttrs); + + llvm::Metadata *CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, + StringRef Suffix); }; } // end namespace CodeGen diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index 295893c64fbc..c8c2a1b956b8 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -58,7 +58,7 @@ enum PGOHashVersion : unsigned { }; namespace { -/// \brief Stable hasher for PGO region counters. +/// Stable hasher for PGO region counters. /// /// PGOHash produces a stable hash of a given function's control flow. /// @@ -79,7 +79,7 @@ class PGOHash { static const unsigned TooBig = 1u << NumBitsPerType; public: - /// \brief Hash values for AST nodes. + /// Hash values for AST nodes. /// /// Distinct values for AST nodes that have region counters attached. /// @@ -978,7 +978,7 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, RegionCounts = ProfRecord->Counts; } -/// \brief Calculate what to divide by to scale weights. +/// Calculate what to divide by to scale weights. /// /// Given the maximum weight, calculate a divisor that will scale all the /// weights to strictly less than UINT32_MAX. @@ -986,7 +986,7 @@ static uint64_t calculateWeightScale(uint64_t MaxWeight) { return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1; } -/// \brief Scale an individual branch weight (and add 1). +/// Scale an individual branch weight (and add 1). /// /// Scale a 64-bit weight down to 32-bits using \c Scale. /// diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index ad473032db17..ec48231e5247 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -215,6 +215,19 @@ llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { return MetadataCache[Ty] = TypeNode; } +TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) { + // Pointee values may have incomplete types, but they shall never be + // dereferenced. + if (AccessType->isIncompleteType()) + return TBAAAccessInfo::getIncompleteInfo(); + + if (TypeHasMayAlias(AccessType)) + return TBAAAccessInfo::getMayAliasInfo(); + + uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity(); + return TBAAAccessInfo(getTypeInfo(AccessType), Size); +} + TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { llvm::DataLayout DL(&Module); unsigned Size = DL.getPointerTypeSize(VTablePtrType); @@ -391,3 +404,21 @@ CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, // access type regardless of their base types. return TBAAAccessInfo::getMayAliasInfo(); } + +TBAAAccessInfo +CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, + TBAAAccessInfo SrcInfo) { + if (DestInfo == SrcInfo) + return DestInfo; + + if (!DestInfo || !SrcInfo) + return TBAAAccessInfo(); + + if (DestInfo.isMayAlias() || SrcInfo.isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); + + // TODO: Implement the rest of the logic here. For example, two accesses + // with same final access types result in an access to an object of that final + // access type regardless of their base types. + return TBAAAccessInfo::getMayAliasInfo(); +} diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h index a5b1f66bcd1a..86ba407c05c6 100644 --- a/lib/CodeGen/CodeGenTBAA.h +++ b/lib/CodeGen/CodeGenTBAA.h @@ -177,6 +177,10 @@ public: /// given type. llvm::MDNode *getTypeInfo(QualType QTy); + /// getAccessInfo - Get TBAA information that describes an access to + /// an object of the given type. + TBAAAccessInfo getAccessInfo(QualType AccessType); + /// getVTablePtrAccessInfo - Get the TBAA information that describes an /// access to a virtual table pointer. TBAAAccessInfo getVTablePtrAccessInfo(llvm::Type *VTablePtrType); @@ -201,6 +205,11 @@ public: /// purpose of conditional operator. TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, TBAAAccessInfo InfoB); + + /// mergeTBAAInfoForMemoryTransfer - Get merged TBAA information for the + /// purpose of memory transfer calls. + TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, + TBAAAccessInfo SrcInfo); }; } // end namespace CodeGen diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h index fb096ac89987..901aed6c00b2 100644 --- a/lib/CodeGen/CodeGenTypeCache.h +++ b/lib/CodeGen/CodeGenTypeCache.h @@ -112,8 +112,6 @@ struct CodeGenTypeCache { llvm::CallingConv::ID RuntimeCC; llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; } - llvm::CallingConv::ID BuiltinCC; - llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; } LangAS getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } }; diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 529a13b7adc8..16ec1dd301aa 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -437,8 +437,33 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::ULongLong: case BuiltinType::WChar_S: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: + case BuiltinType::ShortAccum: + case BuiltinType::Accum: + case BuiltinType::LongAccum: + case BuiltinType::UShortAccum: + case BuiltinType::UAccum: + case BuiltinType::ULongAccum: + case BuiltinType::ShortFract: + case BuiltinType::Fract: + case BuiltinType::LongFract: + case BuiltinType::UShortFract: + case BuiltinType::UFract: + case BuiltinType::ULongFract: + case BuiltinType::SatShortAccum: + case BuiltinType::SatAccum: + case BuiltinType::SatLongAccum: + case BuiltinType::SatUShortAccum: + case BuiltinType::SatUAccum: + case BuiltinType::SatULongAccum: + case BuiltinType::SatShortFract: + case BuiltinType::SatFract: + case BuiltinType::SatLongFract: + case BuiltinType::SatUShortFract: + case BuiltinType::SatUFract: + case BuiltinType::SatULongFract: ResultType = llvm::IntegerType::get(getLLVMContext(), static_cast<unsigned>(Context.getTypeSize(T))); break; @@ -767,7 +792,7 @@ bool CodeGenTypes::isZeroInitializable(QualType T) { // Records are non-zero-initializable if they contain any // non-zero-initializable subobjects. if (const RecordType *RT = T->getAs<RecordType>()) { - auto RD = cast<RecordDecl>(RT->getDecl()); + const RecordDecl *RD = RT->getDecl(); return isZeroInitializable(RD); } diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index d082342bf592..fb8d31684290 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -184,7 +184,7 @@ public: /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); - /// \brief Converts the GlobalDecl into an llvm::Type. This should be used + /// Converts the GlobalDecl into an llvm::Type. This should be used /// when we know the target of the function we want to convert. This is /// because some functions (explicitly, those with pass_object_size /// parameters) may not have the same signature as their type portrays, and @@ -225,7 +225,7 @@ public: /// replace the 'opaque' type we previously made for it if applicable. void UpdateCompletedType(const TagDecl *TD); - /// \brief Remove stale types from the type cache when an inheritance model + /// Remove stale types from the type cache when an inheritance model /// gets assigned to a class. void RefreshTypeCacheForClass(const CXXRecordDecl *RD); @@ -313,7 +313,8 @@ public: const FunctionProtoType *type, RequiredArgs required, unsigned numPrefixArgs); - const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD); + const CGFunctionInfo & + arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD); const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD, CXXCtorType CT); const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD, @@ -334,7 +335,7 @@ public: ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs args); - /// \brief Compute a new LLVM record layout object for the given record. + /// Compute a new LLVM record layout object for the given record. CGRecordLayout *ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty); diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h index 90c9fcd8cf81..b4d1b65743c7 100644 --- a/lib/CodeGen/ConstantEmitter.h +++ b/lib/CodeGen/ConstantEmitter.h @@ -50,7 +50,7 @@ public: : CGM(CGM), CGF(CGF) {} /// Initialize this emission in the context of the given function. - /// Use this if the expression might contain contextaul references like + /// Use this if the expression might contain contextual references like /// block addresses or PredefinedExprs. ConstantEmitter(CodeGenFunction &CGF) : CGM(CGF.CGM), CGF(&CGF) {} diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 89a30dc7040c..2d8446463594 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -35,14 +35,14 @@ void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range, SourceLocation) { namespace { -/// \brief A region of source code that can be mapped to a counter. +/// A region of source code that can be mapped to a counter. class SourceMappingRegion { Counter Count; - /// \brief The region's starting location. + /// The region's starting location. Optional<SourceLocation> LocStart; - /// \brief The region's ending location. + /// The region's ending location. Optional<SourceLocation> LocEnd; /// Whether this region should be emitted after its parent is emitted. @@ -74,7 +74,10 @@ public: bool hasEndLoc() const { return LocEnd.hasValue(); } - void setEndLoc(SourceLocation Loc) { LocEnd = Loc; } + void setEndLoc(SourceLocation Loc) { + assert(Loc.isValid() && "Setting an invalid end location"); + LocEnd = Loc; + } SourceLocation getEndLoc() const { assert(LocEnd && "Region has no end location"); @@ -123,7 +126,7 @@ struct SpellingRegion { } }; -/// \brief Provides the common functionality for the different +/// Provides the common functionality for the different /// coverage mapping region builders. class CoverageMappingBuilder { public: @@ -132,17 +135,17 @@ public: const LangOptions &LangOpts; private: - /// \brief Map of clang's FileIDs to IDs used for coverage mapping. + /// Map of clang's FileIDs to IDs used for coverage mapping. llvm::SmallDenseMap<FileID, std::pair<unsigned, SourceLocation>, 8> FileIDMapping; public: - /// \brief The coverage mapping regions for this function + /// The coverage mapping regions for this function llvm::SmallVector<CounterMappingRegion, 32> MappingRegions; - /// \brief The source mapping regions for this function. + /// The source mapping regions for this function. std::vector<SourceMappingRegion> SourceRegions; - /// \brief A set of regions which can be used as a filter. + /// A set of regions which can be used as a filter. /// /// It is produced by emitExpansionRegions() and is used in /// emitSourceRegions() to suppress producing code regions if @@ -154,7 +157,7 @@ public: const LangOptions &LangOpts) : CVM(CVM), SM(SM), LangOpts(LangOpts) {} - /// \brief Return the precise end location for the given token. + /// Return the precise end location for the given token. SourceLocation getPreciseTokenLocEnd(SourceLocation Loc) { // We avoid getLocForEndOfToken here, because it doesn't do what we want for // macro locations, which we just treat as expanded files. @@ -163,14 +166,14 @@ public: return Loc.getLocWithOffset(TokLen); } - /// \brief Return the start location of an included file or expanded macro. + /// Return the start location of an included file or expanded macro. SourceLocation getStartOfFileOrMacro(SourceLocation Loc) { if (Loc.isMacroID()) return Loc.getLocWithOffset(-SM.getFileOffset(Loc)); return SM.getLocForStartOfFile(SM.getFileID(Loc)); } - /// \brief Return the end location of an included file or expanded macro. + /// Return the end location of an included file or expanded macro. SourceLocation getEndOfFileOrMacro(SourceLocation Loc) { if (Loc.isMacroID()) return Loc.getLocWithOffset(SM.getFileIDSize(SM.getFileID(Loc)) - @@ -178,18 +181,18 @@ public: return SM.getLocForEndOfFile(SM.getFileID(Loc)); } - /// \brief Find out where the current file is included or macro is expanded. + /// Find out where the current file is included or macro is expanded. SourceLocation getIncludeOrExpansionLoc(SourceLocation Loc) { - return Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).first + return Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin() : SM.getIncludeLoc(SM.getFileID(Loc)); } - /// \brief Return true if \c Loc is a location in a built-in macro. + /// Return true if \c Loc is a location in a built-in macro. bool isInBuiltin(SourceLocation Loc) { return SM.getBufferName(SM.getSpellingLoc(Loc)) == "<built-in>"; } - /// \brief Check whether \c Loc is included or expanded from \c Parent. + /// Check whether \c Loc is included or expanded from \c Parent. bool isNestedIn(SourceLocation Loc, FileID Parent) { do { Loc = getIncludeOrExpansionLoc(Loc); @@ -199,23 +202,23 @@ public: return true; } - /// \brief Get the start of \c S ignoring macro arguments and builtin macros. + /// Get the start of \c S ignoring macro arguments and builtin macros. SourceLocation getStart(const Stmt *S) { SourceLocation Loc = S->getLocStart(); while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc)) - Loc = SM.getImmediateExpansionRange(Loc).first; + Loc = SM.getImmediateExpansionRange(Loc).getBegin(); return Loc; } - /// \brief Get the end of \c S ignoring macro arguments and builtin macros. + /// Get the end of \c S ignoring macro arguments and builtin macros. SourceLocation getEnd(const Stmt *S) { SourceLocation Loc = S->getLocEnd(); while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc)) - Loc = SM.getImmediateExpansionRange(Loc).first; + Loc = SM.getImmediateExpansionRange(Loc).getBegin(); return getPreciseTokenLocEnd(Loc); } - /// \brief Find the set of files we have regions for and assign IDs + /// Find the set of files we have regions for and assign IDs /// /// Fills \c Mapping with the virtual file mapping needed to write out /// coverage and collects the necessary file information to emit source and @@ -255,7 +258,7 @@ public: } } - /// \brief Get the coverage mapping file ID for \c Loc. + /// Get the coverage mapping file ID for \c Loc. /// /// If such file id doesn't exist, return None. Optional<unsigned> getCoverageFileID(SourceLocation Loc) { @@ -265,7 +268,7 @@ public: return None; } - /// \brief Gather all the regions that were skipped by the preprocessor + /// Gather all the regions that were skipped by the preprocessor /// using the constructs like #if. void gatherSkippedRegions() { /// An array of the minimum lineStarts and the maximum lineEnds @@ -295,14 +298,14 @@ public: auto Region = CounterMappingRegion::makeSkipped( *CovFileID, SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd); // Make sure that we only collect the regions that are inside - // the souce code of this function. + // the source code of this function. if (Region.LineStart >= FileLineRanges[*CovFileID].first && Region.LineEnd <= FileLineRanges[*CovFileID].second) MappingRegions.push_back(Region); } } - /// \brief Generate the coverage counter mapping regions from collected + /// Generate the coverage counter mapping regions from collected /// source regions. void emitSourceRegions(const SourceRegionFilter &Filter) { for (const auto &Region : SourceRegions) { @@ -347,7 +350,7 @@ public: } } - /// \brief Generate expansion regions for each virtual file we've seen. + /// Generate expansion regions for each virtual file we've seen. SourceRegionFilter emitExpansionRegions() { SourceRegionFilter Filter; for (const auto &FM : FileIDMapping) { @@ -377,7 +380,7 @@ public: } }; -/// \brief Creates unreachable coverage regions for the functions that +/// Creates unreachable coverage regions for the functions that /// are not emitted. struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { EmptyCoverageMappingBuilder(CoverageMappingModuleGen &CVM, SourceManager &SM, @@ -411,7 +414,7 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { SourceRegions.emplace_back(Counter(), Start, End); } - /// \brief Write the mapping data to the output stream + /// Write the mapping data to the output stream void write(llvm::raw_ostream &OS) { SmallVector<unsigned, 16> FileIDMapping; gatherFileIDs(FileIDMapping); @@ -425,15 +428,15 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { } }; -/// \brief A StmtVisitor that creates coverage mapping regions which map +/// A StmtVisitor that creates coverage mapping regions which map /// from the source code locations to the PGO counters. struct CounterCoverageMappingBuilder : public CoverageMappingBuilder, public ConstStmtVisitor<CounterCoverageMappingBuilder> { - /// \brief The map of statements to count values. + /// The map of statements to count values. llvm::DenseMap<const Stmt *, unsigned> &CounterMap; - /// \brief A stack of currently live regions. + /// A stack of currently live regions. std::vector<SourceMappingRegion> RegionStack; /// The currently deferred region: its end location and count can be set once @@ -442,7 +445,7 @@ struct CounterCoverageMappingBuilder CounterExpressionBuilder Builder; - /// \brief A location in the most recently visited file or macro. + /// A location in the most recently visited file or macro. /// /// This is used to adjust the active source regions appropriately when /// expressions cross file or macro boundaries. @@ -451,12 +454,12 @@ struct CounterCoverageMappingBuilder /// Location of the last terminated region. Optional<std::pair<SourceLocation, size_t>> LastTerminatedRegion; - /// \brief Return a counter for the subtraction of \c RHS from \c LHS + /// Return a counter for the subtraction of \c RHS from \c LHS Counter subtractCounters(Counter LHS, Counter RHS) { return Builder.subtract(LHS, RHS); } - /// \brief Return a counter for the sum of \c LHS and \c RHS. + /// Return a counter for the sum of \c LHS and \c RHS. Counter addCounters(Counter LHS, Counter RHS) { return Builder.add(LHS, RHS); } @@ -465,14 +468,14 @@ struct CounterCoverageMappingBuilder return addCounters(addCounters(C1, C2), C3); } - /// \brief Return the region counter for the given statement. + /// Return the region counter for the given statement. /// /// This should only be called on statements that have a dedicated counter. Counter getRegionCounter(const Stmt *S) { return Counter::getCounter(CounterMap[S]); } - /// \brief Push a region onto the stack. + /// Push a region onto the stack. /// /// Returns the index on the stack where the region was pushed. This can be /// used with popRegions to exit a "scope", ending the region that was pushed. @@ -549,7 +552,7 @@ struct CounterCoverageMappingBuilder completeDeferred(Count, DeferredEndLoc); } - /// \brief Pop regions from the stack into the function's list of regions. + /// Pop regions from the stack into the function's list of regions. /// /// Adds all regions from \c ParentIndex to the top of the stack to the /// function's \c SourceRegions. @@ -616,13 +619,13 @@ struct CounterCoverageMappingBuilder assert(!ParentOfDeferredRegion && "Deferred region with no parent"); } - /// \brief Return the currently active region. + /// Return the currently active region. SourceMappingRegion &getRegion() { assert(!RegionStack.empty() && "statement has no region"); return RegionStack.back(); } - /// \brief Propagate counts through the children of \c S. + /// Propagate counts through the children of \c S. Counter propagateCounts(Counter TopCount, const Stmt *S) { SourceLocation StartLoc = getStart(S); SourceLocation EndLoc = getEnd(S); @@ -639,7 +642,7 @@ struct CounterCoverageMappingBuilder return ExitCount; } - /// \brief Check whether a region with bounds \c StartLoc and \c EndLoc + /// Check whether a region with bounds \c StartLoc and \c EndLoc /// is already added to \c SourceRegions. bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc) { return SourceRegions.rend() != @@ -650,7 +653,7 @@ struct CounterCoverageMappingBuilder }); } - /// \brief Adjust the most recently visited location to \c EndLoc. + /// Adjust the most recently visited location to \c EndLoc. /// /// This should be used after visiting any statements in non-source order. void adjustForOutOfOrderTraversal(SourceLocation EndLoc) { @@ -667,7 +670,7 @@ struct CounterCoverageMappingBuilder MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation); } - /// \brief Adjust regions and state when \c NewLoc exits a file. + /// Adjust regions and state when \c NewLoc exits a file. /// /// If moving from our most recently tracked location to \c NewLoc exits any /// files, this adjusts our current region stack and creates the file regions @@ -734,7 +737,7 @@ struct CounterCoverageMappingBuilder MostRecentLocation = NewLoc; } - /// \brief Ensure that \c S is included in the current region. + /// Ensure that \c S is included in the current region. void extendRegion(const Stmt *S) { SourceMappingRegion &Region = getRegion(); SourceLocation StartLoc = getStart(S); @@ -746,7 +749,7 @@ struct CounterCoverageMappingBuilder completeDeferred(Region.getCounter(), StartLoc); } - /// \brief Mark \c S as a terminator, starting a zero region. + /// Mark \c S as a terminator, starting a zero region. void terminateRegion(const Stmt *S) { extendRegion(S); SourceMappingRegion &Region = getRegion(); @@ -791,7 +794,7 @@ struct CounterCoverageMappingBuilder popRegions(Index); } - /// \brief Keep counts of breaks and continues inside loops. + /// Keep counts of breaks and continues inside loops. struct BreakContinue { Counter BreakCount; Counter ContinueCount; @@ -805,7 +808,7 @@ struct CounterCoverageMappingBuilder : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap), DeferredRegion(None) {} - /// \brief Write the mapping data to the output stream + /// Write the mapping data to the output stream void write(llvm::raw_ostream &OS) { llvm::SmallVector<unsigned, 8> VirtualFileMapping; gatherFileIDs(VirtualFileMapping); @@ -831,22 +834,6 @@ struct CounterCoverageMappingBuilder handleFileExit(getEnd(S)); } - /// Determine whether the final deferred region emitted in \p Body should be - /// discarded. - static bool discardFinalDeferredRegionInDecl(Stmt *Body) { - if (auto *CS = dyn_cast<CompoundStmt>(Body)) { - Stmt *LastStmt = CS->body_back(); - if (auto *IfElse = dyn_cast<IfStmt>(LastStmt)) { - if (auto *Else = dyn_cast_or_null<CompoundStmt>(IfElse->getElse())) - LastStmt = Else->body_back(); - else - LastStmt = IfElse->getElse(); - } - return dyn_cast_or_null<ReturnStmt>(LastStmt); - } - return false; - } - void VisitDecl(const Decl *D) { assert(!DeferredRegion && "Deferred region never completed"); @@ -856,17 +843,13 @@ struct CounterCoverageMappingBuilder if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) return; - Counter ExitCount = propagateCounts(getRegionCounter(Body), Body); + propagateCounts(getRegionCounter(Body), Body); assert(RegionStack.empty() && "Regions entered but never exited"); - if (DeferredRegion) { - // Complete (or discard) any deferred regions introduced by the last - // statement. - if (discardFinalDeferredRegionInDecl(Body)) - DeferredRegion = None; - else - popRegions(completeDeferred(ExitCount, getEnd(Body))); - } + // Discard the last uncompleted deferred region in a decl, if one exists. + // This prevents lines at the end of a function containing only whitespace + // or closing braces from being marked as uncovered. + DeferredRegion = None; } void VisitReturnStmt(const ReturnStmt *S) { @@ -889,6 +872,7 @@ struct CounterCoverageMappingBuilder Counter LabelCount = getRegionCounter(S); SourceLocation Start = getStart(S); completeTopLevelDeferredRegion(LabelCount, Start); + completeDeferred(LabelCount, Start); // We can't extendRegion here or we risk overlapping with our new region. handleFileExit(Start); pushRegion(LabelCount, Start); @@ -979,20 +963,28 @@ struct CounterCoverageMappingBuilder Counter ParentCount = getRegion().getCounter(); Counter BodyCount = getRegionCounter(S); + // The loop increment may contain a break or continue. + if (S->getInc()) + BreakContinueStack.emplace_back(); + // Handle the body first so that we can get the backedge count. - BreakContinueStack.push_back(BreakContinue()); + BreakContinueStack.emplace_back(); extendRegion(S->getBody()); Counter BackedgeCount = propagateCounts(BodyCount, S->getBody()); - BreakContinue BC = BreakContinueStack.pop_back_val(); + BreakContinue BodyBC = BreakContinueStack.pop_back_val(); // The increment is essentially part of the body but it needs to include // the count for all the continue statements. - if (const Stmt *Inc = S->getInc()) - propagateCounts(addCounters(BackedgeCount, BC.ContinueCount), Inc); + BreakContinue IncrementBC; + if (const Stmt *Inc = S->getInc()) { + propagateCounts(addCounters(BackedgeCount, BodyBC.ContinueCount), Inc); + IncrementBC = BreakContinueStack.pop_back_val(); + } // Go back to handle the condition. - Counter CondCount = - addCounters(ParentCount, BackedgeCount, BC.ContinueCount); + Counter CondCount = addCounters( + addCounters(ParentCount, BackedgeCount, BodyBC.ContinueCount), + IncrementBC.ContinueCount); if (const Expr *Cond = S->getCond()) { propagateCounts(CondCount, Cond); adjustForOutOfOrderTraversal(getEnd(S)); @@ -1004,8 +996,8 @@ struct CounterCoverageMappingBuilder if (Gap) fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount); - Counter OutCount = - addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount)); + Counter OutCount = addCounters(BodyBC.BreakCount, IncrementBC.BreakCount, + subtractCounters(CondCount, BodyCount)); if (OutCount != ParentCount) pushRegion(OutCount); } @@ -1361,8 +1353,7 @@ void CoverageMappingModuleGen::emit() { // and coverage mappings is a multiple of 8. if (size_t Rem = OS.str().size() % 8) { CoverageMappingSize += 8 - Rem; - for (size_t I = 0, S = 8 - Rem; I < S; ++I) - OS << '\0'; + OS.write_zeros(8 - Rem); } auto *FilenamesAndMappingsVal = llvm::ConstantDataArray::getString(Ctx, OS.str(), false); diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index d07ed5ebcf2b..b08ad896d7a5 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -31,7 +31,7 @@ class Preprocessor; class Decl; class Stmt; -/// \brief Stores additional source code information like skipped ranges which +/// Stores additional source code information like skipped ranges which /// is required by the coverage mapping generator and is obtained from /// the preprocessor. class CoverageSourceInfo : public PPCallbacks { @@ -46,7 +46,7 @@ namespace CodeGen { class CodeGenModule; -/// \brief Organizes the cross-function state that is used while generating +/// Organizes the cross-function state that is used while generating /// code coverage mapping data. class CoverageMappingModuleGen { CodeGenModule &CGM; @@ -65,7 +65,7 @@ public: return SourceInfo; } - /// \brief Add a function's coverage mapping record to the collection of the + /// Add a function's coverage mapping record to the collection of the /// function mapping records. void addFunctionMappingRecord(llvm::GlobalVariable *FunctionName, StringRef FunctionNameValue, @@ -73,15 +73,15 @@ public: const std::string &CoverageMapping, bool IsUsed = true); - /// \brief Emit the coverage mapping data for a translation unit. + /// Emit the coverage mapping data for a translation unit. void emit(); - /// \brief Return the coverage mapping translation unit file id + /// Return the coverage mapping translation unit file id /// for the given file. unsigned getFileID(const FileEntry *File); }; -/// \brief Organizes the per-function state that is used while generating +/// Organizes the per-function state that is used while generating /// code coverage mapping data. class CoverageMappingGen { CoverageMappingModuleGen &CVM; @@ -99,12 +99,12 @@ public: llvm::DenseMap<const Stmt *, unsigned> *CounterMap) : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(CounterMap) {} - /// \brief Emit the coverage mapping data which maps the regions of + /// Emit the coverage mapping data which maps the regions of /// code to counters that will be used to find the execution /// counts for those regions. void emitCounterMapping(const Decl *D, llvm::raw_ostream &OS); - /// \brief Emit the coverage mapping data for an unused function. + /// Emit the coverage mapping data for an unused function. /// It creates mapping regions with the counter of zero. void emitEmptyMapping(const Decl *D, llvm::raw_ostream &OS); }; diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index c375b82ea936..16fdd1c16a1d 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -31,9 +31,11 @@ #include "clang/AST/StmtCXX.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Value.h" +#include "llvm/Support/ScopedPrinter.h" using namespace clang; using namespace CodeGen; @@ -63,13 +65,6 @@ public: bool classifyReturnType(CGFunctionInfo &FI) const override; bool passClassIndirect(const CXXRecordDecl *RD) const { - // Clang <= 4 used the pre-C++11 rule, which ignores move operations. - // The PS4 platform ABI follows the behavior of Clang 3.2. - if (CGM.getCodeGenOpts().getClangABICompat() <= - CodeGenOptions::ClangABI::Ver4 || - CGM.getTriple().getOS() == llvm::Triple::PS4) - return RD->hasNonTrivialDestructor() || - RD->hasNonTrivialCopyConstructor(); return !canCopyArgument(RD); } @@ -187,8 +182,7 @@ public: emitTerminateForUnexpectedException(CodeGenFunction &CGF, llvm::Value *Exn) override; - void EmitFundamentalRTTIDescriptor(QualType Type, bool DLLExport); - void EmitFundamentalRTTIDescriptors(bool DLLExport); + void EmitFundamentalRTTIDescriptors(const CXXRecordDecl *RD); llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override; CatchTypeInfo getAddrOfCXXCatchHandlerType(QualType Ty, @@ -300,16 +294,11 @@ public: // linkage together with vtables when needed. if (ForVTable && !Thunk->hasLocalLinkage()) Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); - - // Propagate dllexport storage, to enable the linker to generate import - // thunks as necessary (e.g. when a parent class has a key function and a - // child class doesn't, and the construction vtable for the parent in the - // child needs to reference the parent's thunks). - const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - if (MD->hasAttr<DLLExportAttr>()) - Thunk->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + CGM.setGVProperties(Thunk, GD); } + bool exportThunk() override { return true; } + llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This, const ThisAdjustment &TA) override; @@ -480,6 +469,7 @@ public: explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM) : ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, /*UseARMGuardVarABI=*/true) {} + void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override; private: bool HasThisReturn(GlobalDecl GD) const override { @@ -632,13 +622,53 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty); VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy); } - VTable = Builder.CreateGEP(VTable, VTableOffset); + // Compute the address of the virtual function pointer. + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); + + // Check the address of the function pointer if CFI on member function + // pointers is enabled. + llvm::Constant *CheckSourceLocation; + llvm::Constant *CheckTypeDesc; + bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) && + CGM.HasHiddenLTOVisibility(RD); + if (ShouldEmitCFICheck) { + CodeGenFunction::SanitizerScope SanScope(&CGF); + + CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getLocStart()); + CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), + CheckSourceLocation, + CheckTypeDesc, + }; + + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); + llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + + llvm::Value *TypeTest = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId}); + + if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { + CGF.EmitTrapCheck(TypeTest); + } else { + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); + CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall), + SanitizerHandler::CFICheckFail, StaticData, + {VTable, ValidVtable}); + } + + FnVirtual = Builder.GetInsertBlock(); + } // Load the virtual function to call. - VTable = Builder.CreateBitCast(VTable, FTy->getPointerTo()->getPointerTo()); - llvm::Value *VirtualFn = - Builder.CreateAlignedLoad(VTable, CGF.getPointerAlign(), - "memptr.virtualfn"); + VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); + llvm::Value *VirtualFn = Builder.CreateAlignedLoad( + VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); CGF.EmitBranch(FnEnd); // In the non-virtual path, the function pointer is actually a @@ -647,6 +677,43 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Value *NonVirtualFn = Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn"); + // Check the function pointer if CFI on member function pointers is enabled. + if (ShouldEmitCFICheck) { + CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl(); + if (RD->hasDefinition()) { + CodeGenFunction::SanitizerScope SanScope(&CGF); + + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_NVMFCall), + CheckSourceLocation, + CheckTypeDesc, + }; + + llvm::Value *Bit = Builder.getFalse(); + llvm::Value *CastedNonVirtualFn = + Builder.CreateBitCast(NonVirtualFn, CGF.Int8PtrTy); + for (const CXXRecordDecl *Base : CGM.getMostBaseClasses(RD)) { + llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType( + getContext().getMemberPointerType( + MPT->getPointeeType(), + getContext().getRecordType(Base).getTypePtr())); + llvm::Value *TypeId = + llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + + llvm::Value *TypeTest = + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test), + {CastedNonVirtualFn, TypeId}); + Bit = Builder.CreateOr(Bit, TypeTest); + } + + CGF.EmitCheck(std::make_pair(Bit, SanitizerKind::CFIMFCall), + SanitizerHandler::CFICheckFail, StaticData, + {CastedNonVirtualFn, llvm::UndefValue::get(CGF.IntPtrTy)}); + + FnNonVirtual = Builder.GetInsertBlock(); + } + } + // We're done. CGF.EmitBlock(FnEnd); llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2); @@ -836,7 +903,6 @@ ItaniumCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) { llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD, CharUnits ThisAdjustment) { assert(MD->isInstance() && "Member function must not be static!"); - MD = MD->getCanonicalDecl(); CodeGenTypes &Types = CGM.getTypes(); @@ -1182,7 +1248,7 @@ static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) { return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast"); } -/// \brief Compute the src2dst_offset hint as described in the +/// Compute the src2dst_offset hint as described in the /// Itanium C++ ABI [2.9.7] static CharUnits computeOffsetHint(ASTContext &Context, const CXXRecordDecl *Src, @@ -1448,7 +1514,7 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { return; /// Initialize the 'this' slot. In the Itanium C++ ABI, no prologue - /// adjustments are required, becuase they are all handled by thunks. + /// adjustments are required, because they are all handled by thunks. setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF)); /// Initialize the 'vtt' slot if needed. @@ -1479,8 +1545,7 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( llvm::Value *VTT = CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating); QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); - Args.insert(Args.begin() + 1, - CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false)); + Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy)); return AddedStructorArgs::prefix(1); // Added one arg. } @@ -1531,7 +1596,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName())); // Set the right visibility. - CGM.setGlobalVisibility(VTable, RD, ForDefinition); + CGM.setGVProperties(VTable, RD); // Use pointer alignment for the vtable. Otherwise we would align them based // on the size of the initializer which doesn't make sense as only single @@ -1548,7 +1613,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, isa<NamespaceDecl>(DC) && cast<NamespaceDecl>(DC)->getIdentifier() && cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") && DC->getParent()->isTranslationUnit()) - EmitFundamentalRTTIDescriptors(RD->hasAttr<DLLExportAttr>()); + EmitFundamentalRTTIDescriptors(RD); if (!VTable->isDeclarationForLinker()) CGM.EmitVTableTypeMetadata(VTable, VTLayout); @@ -1641,12 +1706,8 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, VTable = CGM.CreateOrReplaceCXXRuntimeVariable( Name, VTableType, llvm::GlobalValue::ExternalLinkage); VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - CGM.setGlobalVisibility(VTable, RD, NotForDefinition); - if (RD->hasAttr<DLLImportAttr>()) - VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - else if (RD->hasAttr<DLLExportAttr>()) - VTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + CGM.setGVProperties(VTable, RD); return VTable; } @@ -1656,7 +1717,6 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, Address This, llvm::Type *Ty, SourceLocation Loc) { - GD = GD.getCanonicalDecl(); Ty = Ty->getPointerTo()->getPointerTo(); auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl()); llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent()); @@ -1690,7 +1750,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, VFunc = VFuncLoad; } - CGCallee Callee(MethodDecl, VFunc); + CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc); return Callee; } @@ -1702,10 +1762,9 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall( const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( Dtor, getFromDtorType(DtorType)); - llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); + llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); CGCallee Callee = - getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty, - CE ? CE->getLocStart() : SourceLocation()); + CGCallee::forVirtual(CE, GlobalDecl(Dtor, DtorType), This, Ty); CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(), This.getPointer(), /*ImplicitParam=*/nullptr, @@ -1725,11 +1784,19 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { if (CGM.getLangOpts().AppleKext) return false; - // If we don't have any not emitted inline virtual function, and if vtable is - // not hidden, then we are safe to emit available_externally copy of vtable. + // If the vtable is hidden then it is not safe to emit an available_externally + // copy of vtable. + if (isVTableHidden(RD)) + return false; + + if (CGM.getCodeGenOpts().ForceEmitVTables) + return true; + + // If we don't have any not emitted inline virtual function then we are safe + // to emit an available_externally copy of vtable. // FIXME we can still emit a copy of the vtable if we // can emit definition of the inline functions. - return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD); + return !hasAnyUnusedVirtualInlineFunction(RD); } static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, Address InitialPtr, @@ -1848,7 +1915,8 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, // Handle the array cookie specially in ASan. if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 && - expr->getOperatorNew()->isReplaceableGlobalAllocationFunction()) { + (expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() || + CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) { // The store to the CookiePtr does not need to be instrumented. CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI); llvm::FunctionType *FTy = @@ -2052,6 +2120,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, false, var->getLinkage(), llvm::ConstantInt::get(guardTy, 0), guardName.str()); + guard->setDSOLocal(var->isDSOLocal()); guard->setVisibility(var->getVisibility()); // If the variable is thread-local, so is its guard variable. guard->setThreadLocalMode(var->getThreadLocalMode()); @@ -2211,6 +2280,13 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts()); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (!addr) + // addr is null when we are trying to register a dtor annotated with + // __attribute__((destructor)) in a constructor function. Using null here is + // okay because this argument is just passed back to the destructor + // function. + addr = llvm::Constant::getNullValue(CGF.Int8PtrTy); + llvm::Value *args[] = { llvm::ConstantExpr::getBitCast(dtor, dtorTy), llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), @@ -2219,6 +2295,48 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, CGF.EmitNounwindRuntimeCall(atexit, args); } +void CodeGenModule::registerGlobalDtorsWithAtExit() { + for (const auto I : DtorsUsingAtExit) { + int Priority = I.first; + const llvm::TinyPtrVector<llvm::Function *> &Dtors = I.second; + + // Create a function that registers destructors that have the same priority. + // + // Since constructor functions are run in non-descending order of their + // priorities, destructors are registered in non-descending order of their + // priorities, and since destructor functions are run in the reverse order + // of their registration, destructor functions are run in non-ascending + // order of their priorities. + CodeGenFunction CGF(*this); + std::string GlobalInitFnName = + std::string("__GLOBAL_init_") + llvm::to_string(Priority); + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + llvm::Function *GlobalInitFn = CreateGlobalInitOrDestructFunction( + FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(), + SourceLocation()); + ASTContext &Ctx = getContext(); + FunctionDecl *FD = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), + &Ctx.Idents.get(GlobalInitFnName), Ctx.VoidTy, nullptr, SC_Static, + false, false); + CGF.StartFunction(GlobalDecl(FD), getContext().VoidTy, GlobalInitFn, + getTypes().arrangeNullaryFunction(), FunctionArgList(), + SourceLocation(), SourceLocation()); + + for (auto *Dtor : Dtors) { + // Register the destructor function calling __cxa_atexit if it is + // available. Otherwise fall back on calling atexit. + if (getCodeGenOpts().CXAAtExit) + emitGlobalDtorWithCXAAtExit(CGF, Dtor, nullptr, false); + else + CGF.registerGlobalDtorWithAtExit(Dtor); + } + + CGF.FinishFunction(); + AddGlobalCtor(GlobalInitFn, Priority, nullptr); + } +} + /// Register a global destructor as best as we know how. void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, @@ -2407,8 +2525,10 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init)); } - if (Init) + if (Init) { Init->setVisibility(Var->getVisibility()); + Init->setDSOLocal(Var->isDSOLocal()); + } llvm::LLVMContext &Context = CGM.getModule().getContext(); llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper); @@ -2416,8 +2536,12 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( if (InitIsInitFunc) { if (Init) { llvm::CallInst *CallVal = Builder.CreateCall(Init); - if (isThreadWrapperReplaceable(VD, CGM)) + if (isThreadWrapperReplaceable(VD, CGM)) { CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); + llvm::Function *Fn = + cast<llvm::Function>(cast<llvm::GlobalAlias>(Init)->getAliasee()); + Fn->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); + } } } else { // Don't know whether we have an init function. Call it if it exists. @@ -2574,12 +2698,16 @@ public: BCTI_Public = 0x2 }; + /// BuildTypeInfo - Build the RTTI type info struct for the given type, or + /// link to an existing RTTI descriptor if one already exists. + llvm::Constant *BuildTypeInfo(QualType Ty); + /// BuildTypeInfo - Build the RTTI type info struct for the given type. - /// - /// \param Force - true to force the creation of this RTTI value - /// \param DLLExport - true to mark the RTTI value as DLLExport - llvm::Constant *BuildTypeInfo(QualType Ty, bool Force = false, - bool DLLExport = false); + llvm::Constant *BuildTypeInfo( + QualType Ty, + llvm::GlobalVariable::LinkageTypes Linkage, + llvm::GlobalValue::VisibilityTypes Visibility, + llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass); }; } @@ -2622,11 +2750,8 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { /*Constant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, Name); - if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) { - const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl()); - if (RD->hasAttr<DLLImportAttr>()) - GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); - } + const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + CGM.setGVProperties(GV, RD); } return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); @@ -2673,6 +2798,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::LongDouble: case BuiltinType::Float16: case BuiltinType::Float128: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Int128: @@ -2687,6 +2813,30 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: + case BuiltinType::ShortAccum: + case BuiltinType::Accum: + case BuiltinType::LongAccum: + case BuiltinType::UShortAccum: + case BuiltinType::UAccum: + case BuiltinType::ULongAccum: + case BuiltinType::ShortFract: + case BuiltinType::Fract: + case BuiltinType::LongFract: + case BuiltinType::UShortFract: + case BuiltinType::UFract: + case BuiltinType::ULongFract: + case BuiltinType::SatShortAccum: + case BuiltinType::SatAccum: + case BuiltinType::SatLongAccum: + case BuiltinType::SatUShortAccum: + case BuiltinType::SatUAccum: + case BuiltinType::SatULongAccum: + case BuiltinType::SatShortFract: + case BuiltinType::SatFract: + case BuiltinType::SatLongFract: + case BuiltinType::SatUShortFract: + case BuiltinType::SatUFract: + case BuiltinType::SatULongFract: return false; case BuiltinType::Dependent: @@ -2761,6 +2911,11 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, // N.B. We must always emit the RTTI data ourselves if there exists a key // function. bool IsDLLImport = RD->hasAttr<DLLImportAttr>(); + + // Don't import the RTTI but emit it locally. + if (CGM.getTriple().isWindowsGNUEnvironment() && IsDLLImport) + return false; + if (CGM.getVTables().isVTableExternal(RD)) return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment() ? false @@ -2953,6 +3108,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { llvm::Constant *VTable = CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy); + CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts())); llvm::Type *PtrDiffTy = CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType()); @@ -2966,7 +3122,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { Fields.push_back(VTable); } -/// \brief Return the linkage that the type info and type info name constants +/// Return the linkage that the type info and type info name constants /// should have for the given type. static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, QualType Ty) { @@ -3020,8 +3176,7 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, llvm_unreachable("Invalid linkage!"); } -llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, - bool DLLExport) { +llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty) { // We want to operate on the canonical type. Ty = Ty.getCanonicalType(); @@ -3039,17 +3194,41 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, } // Check if there is already an external RTTI descriptor for this type. - bool IsStdLib = IsStandardLibraryRTTIDescriptor(Ty); - if (!Force && (IsStdLib || ShouldUseExternalRTTIDescriptor(CGM, Ty))) + if (IsStandardLibraryRTTIDescriptor(Ty) || + ShouldUseExternalRTTIDescriptor(CGM, Ty)) return GetAddrOfExternalRTTIDescriptor(Ty); // Emit the standard library with external linkage. - llvm::GlobalVariable::LinkageTypes Linkage; - if (IsStdLib) - Linkage = llvm::GlobalValue::ExternalLinkage; + llvm::GlobalVariable::LinkageTypes Linkage = getTypeInfoLinkage(CGM, Ty); + + // Give the type_info object and name the formal visibility of the + // type itself. + llvm::GlobalValue::VisibilityTypes llvmVisibility; + if (llvm::GlobalValue::isLocalLinkage(Linkage)) + // If the linkage is local, only default visibility makes sense. + llvmVisibility = llvm::GlobalValue::DefaultVisibility; + else if (CXXABI.classifyRTTIUniqueness(Ty, Linkage) == + ItaniumCXXABI::RUK_NonUniqueHidden) + llvmVisibility = llvm::GlobalValue::HiddenVisibility; else - Linkage = getTypeInfoLinkage(CGM, Ty); + llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility()); + + llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass = + llvm::GlobalValue::DefaultStorageClass; + if (CGM.getTriple().isWindowsItaniumEnvironment()) { + auto RD = Ty->getAsCXXRecordDecl(); + if (RD && RD->hasAttr<DLLExportAttr>()) + DLLStorageClass = llvm::GlobalValue::DLLExportStorageClass; + } + return BuildTypeInfo(Ty, Linkage, llvmVisibility, DLLStorageClass); +} + +llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( + QualType Ty, + llvm::GlobalVariable::LinkageTypes Linkage, + llvm::GlobalValue::VisibilityTypes Visibility, + llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass) { // Add the vtable pointer. BuildVTablePointer(cast<Type>(Ty)); @@ -3163,7 +3342,11 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields); + SmallString<256> Name; + llvm::raw_svector_ostream Out(Name); + CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out); llvm::Module &M = CGM.getModule(); + llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name); llvm::GlobalVariable *GV = new llvm::GlobalVariable(M, Init->getType(), /*Constant=*/true, Linkage, Init, Name); @@ -3195,37 +3378,14 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, // All of this is to say that it's important that both the type_info // object and the type_info name be uniqued when weakly emitted. - // Give the type_info object and name the formal visibility of the - // type itself. - llvm::GlobalValue::VisibilityTypes llvmVisibility; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) - // If the linkage is local, only default visibility makes sense. - llvmVisibility = llvm::GlobalValue::DefaultVisibility; - else if (RTTIUniqueness == ItaniumCXXABI::RUK_NonUniqueHidden) - llvmVisibility = llvm::GlobalValue::HiddenVisibility; - else - llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility()); + TypeName->setVisibility(Visibility); + CGM.setDSOLocal(TypeName); - TypeName->setVisibility(llvmVisibility); - GV->setVisibility(llvmVisibility); + GV->setVisibility(Visibility); + CGM.setDSOLocal(GV); - if (CGM.getTriple().isWindowsItaniumEnvironment()) { - auto RD = Ty->getAsCXXRecordDecl(); - if (DLLExport || (RD && RD->hasAttr<DLLExportAttr>())) { - TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - } else if (RD && RD->hasAttr<DLLImportAttr>() && - ShouldUseExternalRTTIDescriptor(CGM, Ty)) { - TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - - // Because the typename and the typeinfo are DLL import, convert them to - // declarations rather than definitions. The initializers still need to - // be constructed to calculate the type for the declarations. - TypeName->setInitializer(nullptr); - GV->setInitializer(nullptr); - } - } + TypeName->setDLLStorageClass(DLLStorageClass); + GV->setDLLStorageClass(DLLStorageClass); return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); } @@ -3433,11 +3593,9 @@ static unsigned extractPBaseFlags(ASTContext &Ctx, QualType &Type) { Flags |= ItaniumRTTIBuilder::PTI_Incomplete; if (auto *Proto = Type->getAs<FunctionProtoType>()) { - if (Proto->isNothrow(Ctx)) { + if (Proto->isNothrow()) { Flags |= ItaniumRTTIBuilder::PTI_Noexcept; - Type = Ctx.getFunctionType( - Proto->getReturnType(), Proto->getParamTypes(), - Proto->getExtProtoInfo().withExceptionSpec(EST_None)); + Type = Ctx.getFunctionTypeWithExceptionSpec(Type, EST_None); } } @@ -3502,18 +3660,7 @@ llvm::Constant *ItaniumCXXABI::getAddrOfRTTIDescriptor(QualType Ty) { return ItaniumRTTIBuilder(*this).BuildTypeInfo(Ty); } -void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type, - bool DLLExport) { - QualType PointerType = getContext().getPointerType(Type); - QualType PointerTypeConst = getContext().getPointerType(Type.withConst()); - ItaniumRTTIBuilder(*this).BuildTypeInfo(Type, /*Force=*/true, DLLExport); - ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerType, /*Force=*/true, - DLLExport); - ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, /*Force=*/true, - DLLExport); -} - -void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) { +void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(const CXXRecordDecl *RD) { // Types added here must also be added to TypeInfoIsInStandardLibrary. QualType FundamentalTypes[] = { getContext().VoidTy, getContext().NullPtrTy, @@ -3527,10 +3674,24 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) { getContext().UnsignedInt128Ty, getContext().HalfTy, getContext().FloatTy, getContext().DoubleTy, getContext().LongDoubleTy, getContext().Float128Ty, - getContext().Char16Ty, getContext().Char32Ty + getContext().Char8Ty, getContext().Char16Ty, + getContext().Char32Ty }; - for (const QualType &FundamentalType : FundamentalTypes) - EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport); + llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass = + RD->hasAttr<DLLExportAttr>() + ? llvm::GlobalValue::DLLExportStorageClass + : llvm::GlobalValue::DefaultStorageClass; + llvm::GlobalValue::VisibilityTypes Visibility = + CodeGenModule::GetLLVMVisibility(RD->getVisibility()); + for (const QualType &FundamentalType : FundamentalTypes) { + QualType PointerType = getContext().getPointerType(FundamentalType); + QualType PointerTypeConst = getContext().getPointerType( + FundamentalType.withConst()); + for (QualType Type : {FundamentalType, PointerType, PointerTypeConst}) + ItaniumRTTIBuilder(*this).BuildTypeInfo( + Type, llvm::GlobalValue::ExternalLinkage, + Visibility, DLLStorageClass); + } } /// What sort of uniqueness rules should we use for the RTTI for the @@ -3583,12 +3744,22 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM, } llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(AliasDecl); - if (llvm::GlobalValue::isDiscardableIfUnused(Linkage)) - return StructorCodegen::RAUW; + // All discardable structors can be RAUWed, but we don't want to do that in + // unoptimized code, as that makes complete structor symbol disappear + // completely, which degrades debugging experience. + // Symbols with private linkage can be safely aliased, so we special case them + // here. + if (llvm::GlobalValue::isLocalLinkage(Linkage)) + return CGM.getCodeGenOpts().OptimizationLevel > 0 ? StructorCodegen::RAUW + : StructorCodegen::Alias; + // Linkonce structors cannot be aliased nor placed in a comdat, so these need + // to be emitted separately. // FIXME: Should we allow available_externally aliases? - if (!llvm::GlobalAlias::isValidLinkage(Linkage)) - return StructorCodegen::RAUW; + if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) || + !llvm::GlobalAlias::isValidLinkage(Linkage)) + return CGM.getCodeGenOpts().OptimizationLevel > 0 ? StructorCodegen::RAUW + : StructorCodegen::Emit; if (llvm::GlobalValue::isWeakForLinker(Linkage)) { // Only ELF and wasm support COMDATs with arbitrary names (C5/D5). @@ -3616,6 +3787,9 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM, // Create the alias with no name. auto *Alias = llvm::GlobalAlias::create(Linkage, "", Aliasee); + // Constructors and destructors are always unnamed_addr. + Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + // Switch any previous uses to the alias. if (Entry) { assert(Entry->getType() == Aliasee->getType() && @@ -3628,7 +3802,7 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM, } // Finally, set up the alias with its proper name and attributes. - CGM.setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias); + CGM.SetCommonAttributes(AliasDecl, Alias); } void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, @@ -3904,7 +4078,9 @@ static void InitCatchParam(CodeGenFunction &CGF, llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true); Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy), caughtExnAlignment); - CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType); + LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType); + LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType); + CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap); return; } @@ -3931,7 +4107,8 @@ static void InitCatchParam(CodeGenFunction &CGF, AggValueSlot::forAddr(ParamAddr, Qualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); // Leave the terminate scope. CGF.EHStack.popTerminate(); @@ -4051,3 +4228,11 @@ ItaniumCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This, const CXXRecordDecl *RD) { return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD}; } + +void WebAssemblyCXXABI::emitBeginCatch(CodeGenFunction &CGF, + const CXXCatchStmt *C) { + if (CGF.getTarget().hasFeature("exception-handling")) + CGF.EHStack.pushCleanup<CatchRetScope>( + NormalCleanup, cast<llvm::CatchPadInst>(CGF.CurrentFuncletPad)); + ItaniumCXXABI::emitBeginCatch(CGF, C); +} diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index a6f21d8ddcfb..48dea7d54b1e 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -178,7 +178,8 @@ void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason, void MacroPPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, - StringRef SearchPath, StringRef RelativePath, const Module *Imported) { + StringRef SearchPath, StringRef RelativePath, const Module *Imported, + SrcMgr::CharacteristicKind FileType) { // Record the line location of the current included file. LastHashLoc = HashLoc; diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h index e117f96f47df..48c67e2d36ad 100644 --- a/lib/CodeGen/MacroPPCallbacks.h +++ b/lib/CodeGen/MacroPPCallbacks.h @@ -101,7 +101,8 @@ public: StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, StringRef SearchPath, StringRef RelativePath, - const Module *Imported) override; + const Module *Imported, + SrcMgr::CharacteristicKind FileType) override; /// Hook called whenever a macro definition is seen. void MacroDefined(const Token &MacroNameTok, diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index ffb3681c2585..81ed05059546 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -216,13 +216,20 @@ public: return DT != Dtor_Base; } + void setCXXDestructorDLLStorage(llvm::GlobalValue *GV, + const CXXDestructorDecl *Dtor, + CXXDtorType DT) const override; + + llvm::GlobalValue::LinkageTypes + getCXXDestructorLinkage(GVALinkage Linkage, const CXXDestructorDecl *Dtor, + CXXDtorType DT) const override; + void EmitCXXDestructors(const CXXDestructorDecl *D) override; const CXXRecordDecl * getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override { - MD = MD->getCanonicalDecl(); if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) { - MicrosoftVTableContext::MethodVFTableLocation ML = + MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(MD); // The vbases might be ordered differently in the final overrider object // and the complete object, so the "this" argument may sometimes point to @@ -357,9 +364,6 @@ public: void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD, bool ReturnAdjustment) override { - // Never dllimport/dllexport thunks. - Thunk->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); - GVALinkage Linkage = getContext().GetGVALinkageForFunction(cast<FunctionDecl>(GD.getDecl())); @@ -371,6 +375,8 @@ public: Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage); } + bool exportThunk() override { return false; } + llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This, const ThisAdjustment &TA) override; @@ -516,10 +522,12 @@ public: if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name)) return GV; - return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, - /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, Name); + auto *GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, + /*isConstant=*/true, + llvm::GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, Name); + CGM.setDSOLocal(GV); + return GV; } llvm::Constant *getImageRelativeConstant(llvm::Constant *PtrVal) { @@ -558,7 +566,7 @@ private: GetNullMemberPointerFields(const MemberPointerType *MPT, llvm::SmallVectorImpl<llvm::Constant *> &fields); - /// \brief Shared code for virtual base adjustment. Returns the offset from + /// Shared code for virtual base adjustment. Returns the offset from /// the vbptr to the virtual base. Optionally returns the address of the /// vbptr itself. llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF, @@ -582,14 +590,14 @@ private: performBaseAdjustment(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy); - /// \brief Performs a full virtual base adjustment. Used to dereference + /// Performs a full virtual base adjustment. Used to dereference /// pointers to members of virtual bases. llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E, const CXXRecordDecl *RD, Address Base, llvm::Value *VirtualBaseAdjustmentOffset, llvm::Value *VBPtrOffset /* optional */); - /// \brief Emits a full member pointer with the fields common to data and + /// Emits a full member pointer with the fields common to data and /// function member pointers. llvm::Constant *EmitFullMemberPointer(llvm::Constant *FirstField, bool IsMemberFunction, @@ -600,16 +608,15 @@ private: bool MemberPointerConstantIsNull(const MemberPointerType *MPT, llvm::Constant *MP); - /// \brief - Initialize all vbptrs of 'this' with RD as the complete type. + /// - Initialize all vbptrs of 'this' with RD as the complete type. void EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD); - /// \brief Caching wrapper around VBTableBuilder::enumerateVBTables(). + /// Caching wrapper around VBTableBuilder::enumerateVBTables(). const VBTableGlobals &enumerateVBTables(const CXXRecordDecl *RD); - /// \brief Generate a thunk for calling a virtual member function MD. - llvm::Function *EmitVirtualMemPtrThunk( - const CXXMethodDecl *MD, - const MicrosoftVTableContext::MethodVFTableLocation &ML); + /// Generate a thunk for calling a virtual member function MD. + llvm::Function *EmitVirtualMemPtrThunk(const CXXMethodDecl *MD, + const MethodVFTableLocation &ML); public: llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override; @@ -753,15 +760,15 @@ private: typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy; typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy; typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalValue *> VFTablesMapTy; - /// \brief All the vftables that have been referenced. + /// All the vftables that have been referenced. VFTablesMapTy VFTablesMap; VTablesMapTy VTablesMap; - /// \brief This set holds the record decls we've deferred vtable emission for. + /// This set holds the record decls we've deferred vtable emission for. llvm::SmallPtrSet<const CXXRecordDecl *, 4> DeferredVFTables; - /// \brief All the vbtables which have been referenced. + /// All the vbtables which have been referenced. llvm::DenseMap<const CXXRecordDecl *, VBTableGlobals> VBTablesMap; /// Info on the global variable used to guard initialization of static locals. @@ -820,45 +827,8 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { return RAA_Default; case llvm::Triple::x86_64: - // If a class has a destructor, we'd really like to pass it indirectly - // because it allows us to elide copies. Unfortunately, MSVC makes that - // impossible for small types, which it will pass in a single register or - // stack slot. Most objects with dtors are large-ish, so handle that early. - // We can't call out all large objects as being indirect because there are - // multiple x64 calling conventions and the C++ ABI code shouldn't dictate - // how we pass large POD types. - // - // Note: This permits small classes with nontrivial destructors to be - // passed in registers, which is non-conforming. - if (RD->hasNonTrivialDestructor() && - getContext().getTypeSize(RD->getTypeForDecl()) > 64) - return RAA_Indirect; - - // If a class has at least one non-deleted, trivial copy constructor, it - // is passed according to the C ABI. Otherwise, it is passed indirectly. - // - // Note: This permits classes with non-trivial copy or move ctors to be - // passed in registers, so long as they *also* have a trivial copy ctor, - // which is non-conforming. - if (RD->needsImplicitCopyConstructor()) { - // If the copy ctor has not yet been declared, we can read its triviality - // off the AST. - if (!RD->defaultedCopyConstructorIsDeleted() && - RD->hasTrivialCopyConstructor()) - return RAA_Default; - } else { - // Otherwise, we need to find the copy constructor(s) and ask. - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor()) { - // We had at least one nondeleted trivial copy ctor. Return directly. - if (!CD->isDeleted() && CD->isTrivial()) - return RAA_Default; - } - } - } - - // We have no trivial, non-deleted copy constructor. - return RAA_Indirect; + case llvm::Triple::aarch64: + return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default; } llvm_unreachable("invalid enum"); @@ -890,20 +860,6 @@ void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { CGF.EmitRuntimeCallOrInvoke(Fn, Args); } -namespace { -struct CatchRetScope final : EHScopeStack::Cleanup { - llvm::CatchPadInst *CPI; - - CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {} - - void Emit(CodeGenFunction &CGF, Flags flags) override { - llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest"); - CGF.Builder.CreateCatchRet(CPI, BB); - CGF.EmitBlock(BB); - } -}; -} - void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *S) { // In the MS ABI, the runtime handles the copy, and the catch handler is @@ -1105,10 +1061,22 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { // the second parameter. FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); + + // aarch64-windows requires that instance methods use X1 for the return + // address. So for aarch64-windows we do not mark the + // return as SRet. + FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() == + llvm::Triple::aarch64); return true; } else if (!RD->isPOD()) { // If it's a free function, non-POD types are returned indirectly. FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + + // aarch64-windows requires that non-POD, non-instance returns use X0 for + // the return address. So for aarch64-windows we do not mark the return as + // SRet. + FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() == + llvm::Triple::aarch64); return true; } @@ -1182,15 +1150,16 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers( unsigned AS = getThisAddress(CGF).getAddressSpace(); llvm::Value *Int8This = nullptr; // Initialize lazily. - for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end(); - I != E; ++I) { + for (const CXXBaseSpecifier &S : RD->vbases()) { + const CXXRecordDecl *VBase = S.getType()->getAsCXXRecordDecl(); + auto I = VBaseMap.find(VBase); + assert(I != VBaseMap.end()); if (!I->second.hasVtorDisp()) continue; llvm::Value *VBaseOffset = - GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first); - uint64_t ConstantVBaseOffset = - Layout.getVBaseClassOffset(I->first).getQuantity(); + GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, VBase); + uint64_t ConstantVBaseOffset = I->second.VBaseOffset.getQuantity(); // vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase). llvm::Value *VtorDispValue = Builder.CreateSub( @@ -1233,7 +1202,7 @@ void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { if (!hasDefaultCXXMethodCC(getContext(), D) || D->getNumParams() != 0) { llvm::Function *Fn = getAddrOfCXXCtorClosure(D, Ctor_DefaultClosure); Fn->setLinkage(llvm::GlobalValue::WeakODRLinkage); - Fn->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + CGM.setGVProperties(Fn, D); } } @@ -1295,6 +1264,52 @@ MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, return Added; } +void MicrosoftCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV, + const CXXDestructorDecl *Dtor, + CXXDtorType DT) const { + // Deleting destructor variants are never imported or exported. Give them the + // default storage class. + if (DT == Dtor_Deleting) { + GV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + } else { + const NamedDecl *ND = Dtor; + CGM.setDLLImportDLLExport(GV, ND); + } +} + +llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( + GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { + // Internal things are always internal, regardless of attributes. After this, + // we know the thunk is externally visible. + if (Linkage == GVA_Internal) + return llvm::GlobalValue::InternalLinkage; + + switch (DT) { + case Dtor_Base: + // The base destructor most closely tracks the user-declared constructor, so + // we delegate back to the normal declarator case. + return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, + /*isConstantVariable=*/false); + case Dtor_Complete: + // The complete destructor is like an inline function, but it may be + // imported and therefore must be exported as well. This requires changing + // the linkage if a DLL attribute is present. + if (Dtor->hasAttr<DLLExportAttr>()) + return llvm::GlobalValue::WeakODRLinkage; + if (Dtor->hasAttr<DLLImportAttr>()) + return llvm::GlobalValue::AvailableExternallyLinkage; + return llvm::GlobalValue::LinkOnceODRLinkage; + case Dtor_Deleting: + // Deleting destructors are like inline functions. They have vague linkage + // and are emitted everywhere they are used. They are internal if the class + // is internal. + return llvm::GlobalValue::LinkOnceODRLinkage; + case Dtor_Comdat: + llvm_unreachable("MS C++ ABI does not support comdat dtors"); + } + llvm_unreachable("invalid dtor type"); +} + void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { // The TU defining a dtor is only guaranteed to emit a base destructor. All // other destructor variants are delegating thunks. @@ -1303,10 +1318,8 @@ void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { CharUnits MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) { - GD = GD.getCanonicalDecl(); const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - GlobalDecl LookupGD = GD; if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) { // Complete destructors take a pointer to the complete object as a // parameter, thus don't need this adjustment. @@ -1315,11 +1328,11 @@ MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) { // There's no Dtor_Base in vftable but it shares the this adjustment with // the deleting one, so look it up instead. - LookupGD = GlobalDecl(DD, Dtor_Deleting); + GD = GlobalDecl(DD, Dtor_Deleting); } - MicrosoftVTableContext::MethodVFTableLocation ML = - CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); + MethodVFTableLocation ML = + CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); CharUnits Adjustment = ML.VFPtrOffset; // Normal virtual instance methods need to adjust from the vfptr that first @@ -1353,7 +1366,6 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( return CGF.Builder.CreateConstByteGEP(This, Adjustment); } - GD = GD.getCanonicalDecl(); const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); GlobalDecl LookupGD = GD; @@ -1367,7 +1379,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( // with the base one, so look up the deleting one instead. LookupGD = GlobalDecl(DD, Dtor_Deleting); } - MicrosoftVTableContext::MethodVFTableLocation ML = + MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); CharUnits StaticOffset = ML.VFPtrOffset; @@ -1523,8 +1535,7 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs( } RValue RV = RValue::get(MostDerivedArg); if (FPT->isVariadic()) { - Args.insert(Args.begin() + 1, - CallArg(RV, getContext().IntTy, /*needscopy=*/false)); + Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy)); return AddedStructorArgs::prefix(1); } Args.add(RV, getContext().IntTy); @@ -1535,6 +1546,12 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, bool Delegating, Address This) { + // Use the base destructor variant in place of the complete destructor variant + // if the class has no virtual bases. This effectively implements some of the + // -mconstructor-aliases optimization, but as part of the MS C++ ABI. + if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0) + Type = Dtor_Base; + CGCallee Callee = CGCallee::forDirect( CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), DD); @@ -1817,7 +1834,6 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, Address This, llvm::Type *Ty, SourceLocation Loc) { - GD = GD.getCanonicalDecl(); CGBuilderTy &Builder = CGF.Builder; Ty = Ty->getPointerTo()->getPointerTo(); @@ -1828,8 +1844,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty, MethodDecl->getParent()); MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext(); - MicrosoftVTableContext::MethodVFTableLocation ML = - VFTContext.getMethodVFTableLocation(GD); + MethodVFTableLocation ML = VFTContext.getMethodVFTableLocation(GD); // Compute the identity of the most derived class whose virtual table is // located at the MethodVFTableLocation ML. @@ -1857,7 +1872,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); } - CGCallee Callee(MethodDecl, VFunc); + CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc); return Callee; } @@ -1872,9 +1887,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( GlobalDecl GD(Dtor, Dtor_Deleting); const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( Dtor, StructorType::Deleting); - llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); - CGCallee Callee = getVirtualFunctionPointer( - CGF, GD, This, Ty, CE ? CE->getLocStart() : SourceLocation()); + llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); + CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); ASTContext &Context = getContext(); llvm::Value *ImplicitParam = llvm::ConstantInt::get( @@ -1915,23 +1929,24 @@ MicrosoftCXXABI::enumerateVBTables(const CXXRecordDecl *RD) { return VBGlobals; } -llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk( - const CXXMethodDecl *MD, - const MicrosoftVTableContext::MethodVFTableLocation &ML) { +llvm::Function * +MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD, + const MethodVFTableLocation &ML) { assert(!isa<CXXConstructorDecl>(MD) && !isa<CXXDestructorDecl>(MD) && "can't form pointers to ctors or virtual dtors"); // Calculate the mangled name. SmallString<256> ThunkName; llvm::raw_svector_ostream Out(ThunkName); - getMangleContext().mangleVirtualMemPtrThunk(MD, Out); + getMangleContext().mangleVirtualMemPtrThunk(MD, ML, Out); // If the thunk has been generated previously, just return it. if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName)) return cast<llvm::Function>(GV); // Create the llvm::Function. - const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSMemberPointerThunk(MD); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeUnprototypedMustTailThunk(MD); llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Function *ThunkFn = llvm::Function::Create(ThunkTy, llvm::Function::ExternalLinkage, @@ -2716,9 +2731,8 @@ llvm::Constant * MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) { assert(MD->isInstance() && "Member function must not be static!"); - MD = MD->getCanonicalDecl(); CharUnits NonVirtualBaseAdjustment = CharUnits::Zero(); - const CXXRecordDecl *RD = MD->getParent()->getMostRecentDecl(); + const CXXRecordDecl *RD = MD->getParent()->getMostRecentNonInjectedDecl(); CodeGenTypes &Types = CGM.getTypes(); unsigned VBTableIndex = 0; @@ -2738,8 +2752,7 @@ MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) { FirstField = CGM.GetAddrOfFunction(MD, Ty); } else { auto &VTableContext = CGM.getMicrosoftVTableContext(); - MicrosoftVTableContext::MethodVFTableLocation ML = - VTableContext.getMethodVFTableLocation(MD); + MethodVFTableLocation ML = VTableContext.getMethodVFTableLocation(MD); FirstField = EmitVirtualMemPtrThunk(MD, ML); // Include the vfptr adjustment if the method is in a non-primary vftable. NonVirtualBaseAdjustment += ML.VFPtrOffset; @@ -3336,14 +3349,14 @@ CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) { // a reference to the TypeInfo for the type and a reference to the // CompleteHierarchyDescriptor for the type. // -// ClassHieararchyDescriptor: Contains information about a class hierarchy. +// ClassHierarchyDescriptor: Contains information about a class hierarchy. // Used during dynamic_cast to walk a class hierarchy. References a base // class array and the size of said array. // // BaseClassArray: Contains a list of classes in a hierarchy. BaseClassArray is // somewhat of a misnomer because the most derived class is also in the list // as well as multiple copies of virtual bases (if they occur multiple times -// in the hiearchy.) The BaseClassArray contains one BaseClassDescriptor for +// in the hierarchy.) The BaseClassArray contains one BaseClassDescriptor for // every path in the hierarchy, in pre-order depth first order. Note, we do // not declare a specific llvm type for BaseClassArray, it's merely an array // of BaseClassDescriptor pointers. @@ -3356,7 +3369,7 @@ CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) { // mangled into them so they can be aggressively deduplicated by the linker. static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) { - StringRef MangledName("\01??_7type_info@@6B@"); + StringRef MangledName("??_7type_info@@6B@"); if (auto VTable = CGM.getModule().getNamedGlobal(MangledName)) return VTable; return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, @@ -3367,7 +3380,7 @@ static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) { namespace { -/// \brief A Helper struct that stores information about a class in a class +/// A Helper struct that stores information about a class in a class /// hierarchy. The information stored in these structs struct is used during /// the generation of ClassHierarchyDescriptors and BaseClassDescriptors. // During RTTI creation, MSRTTIClasses are stored in a contiguous array with @@ -3394,7 +3407,7 @@ struct MSRTTIClass { uint32_t Flags, NumBases, OffsetInVBase; }; -/// \brief Recursively initialize the base class array. +/// Recursively initialize the base class array. uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent, const CXXBaseSpecifier *Specifier) { Flags = HasHierarchyDescriptor; @@ -3441,7 +3454,7 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) { llvm_unreachable("Invalid linkage!"); } -/// \brief An ephemeral helper class for building MS RTTI types. It caches some +/// An ephemeral helper class for building MS RTTI types. It caches some /// calls to the module and information about the most derived class in a /// hierarchy. struct MSRTTIBuilder { @@ -3474,7 +3487,7 @@ struct MSRTTIBuilder { } // namespace -/// \brief Recursively serializes a class hierarchy in pre-order depth first +/// Recursively serializes a class hierarchy in pre-order depth first /// order. static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes, const CXXRecordDecl *RD) { @@ -3483,7 +3496,7 @@ static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes, serializeClassHierarchy(Classes, Base.getType()->getAsCXXRecordDecl()); } -/// \brief Find ambiguity among base classes. +/// Find ambiguity among base classes. static void detectAmbiguousBases(SmallVectorImpl<MSRTTIClass> &Classes) { llvm::SmallPtrSet<const CXXRecordDecl *, 8> VirtualBases; @@ -3749,7 +3762,7 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type, Flags}; } -/// \brief Gets a TypeDescriptor. Returns a llvm::Constant * rather than a +/// Gets a TypeDescriptor. Returns a llvm::Constant * rather than a /// llvm::GlobalVariable * because different type descriptors have different /// types, and need to be abstracted. They are abstracting by casting the /// address to an Int8PtrTy. @@ -3791,7 +3804,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy); } -/// \brief Gets or a creates a Microsoft CompleteObjectLocator. +/// Gets or a creates a Microsoft CompleteObjectLocator. llvm::GlobalVariable * MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD, const VPtrInfo &Info) { @@ -3808,19 +3821,12 @@ static void emitCXXConstructor(CodeGenModule &CGM, static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor, StructorType dtorType) { - // The complete destructor is equivalent to the base destructor for - // classes with no virtual bases, so try to emit it as an alias. - if (!dtor->getParent()->getNumVBases() && - (dtorType == StructorType::Complete || dtorType == StructorType::Base)) { - bool ProducedAlias = !CGM.TryEmitDefinitionAsAlias( - GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base)); - if (ProducedAlias) { - if (dtorType == StructorType::Complete) - return; - if (dtor->isVirtual()) - CGM.getVTables().EmitThunks(GlobalDecl(dtor, Dtor_Complete)); - } - } + // Emit the base destructor if the base and complete (vbase) destructors are + // equivalent. This effectively implements -mconstructor-aliases as part of + // the ABI. + if (dtorType == StructorType::Complete && + dtor->getParent()->getNumVBases() == 0) + dtorType = StructorType::Base; // The base destructor is equivalent to the base destructor of its // base class if there is exactly one non-virtual base class with a @@ -3898,7 +3904,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, SourceLocation(), &getContext().Idents.get("is_most_derived"), getContext().IntTy, ImplicitParamDecl::Other); - // Only add the parameter to the list if thie class has virtual bases. + // Only add the parameter to the list if the class has virtual bases. if (RD->getNumVBases() > 0) FunctionArgs.push_back(&IsMostDerived); diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index d0760b9cc2a6..c164cec5d942 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -71,9 +71,8 @@ class PCHContainerGenerator : public ASTConsumer { } bool VisitImportDecl(ImportDecl *D) { - auto *Import = cast<ImportDecl>(D); - if (!Import->getImportedOwningModule()) - DI.EmitImportDecl(*Import); + if (!D->getImportedOwningModule()) + DI.EmitImportDecl(*D); return true; } @@ -229,6 +228,11 @@ public: Builder->getModuleDebugInfo()->completeRequiredType(RD); } + void HandleImplicitImportDecl(ImportDecl *D) override { + if (!D->getImportedOwningModule()) + Builder->getModuleDebugInfo()->EmitImportDecl(*D); + } + /// Emit a container holding the serialized AST. void HandleTranslationUnit(ASTContext &Ctx) override { assert(M && VMContext && Builder); @@ -286,7 +290,7 @@ public: else ASTSym->setSection("__clangast"); - DEBUG({ + LLVM_DEBUG({ // Print the IR for the PCH container to the debug output. llvm::SmallString<0> Buffer; clang::EmitBackendOutput( diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp index f891cfbe4bb2..23cf9e490828 100644 --- a/lib/CodeGen/SanitizerMetadata.cpp +++ b/lib/CodeGen/SanitizerMetadata.cpp @@ -27,7 +27,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, bool IsBlacklisted) { if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress | - SanitizerKind::HWAddress)) + SanitizerKind::HWAddress | + SanitizerKind::KernelHWAddress)) return; IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init"); IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty); @@ -60,7 +61,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, const VarDecl &D, bool IsDynInit) { if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress | - SanitizerKind::HWAddress)) + SanitizerKind::HWAddress | + SanitizerKind::KernelHWAddress)) return; std::string QualName; llvm::raw_string_ostream OS(QualName); @@ -79,7 +81,8 @@ void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { // instrumentation. if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress | - SanitizerKind::HWAddress)) + SanitizerKind::HWAddress | + SanitizerKind::KernelHWAddress)) reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true); } diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp index fc8e36d2c599..3673a5597eac 100644 --- a/lib/CodeGen/SwiftCallingConv.cpp +++ b/lib/CodeGen/SwiftCallingConv.cpp @@ -579,11 +579,9 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const { // Empty types don't need to be passed indirectly. if (Entries.empty()) return false; - CharUnits totalSize = Entries.back().End; - // Avoid copying the array of types when there's just a single element. if (Entries.size() == 1) { - return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize, + return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift( Entries.back().Type, asReturnValue); } @@ -593,8 +591,14 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const { for (auto &entry : Entries) { componentTys.push_back(entry.Type); } - return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize, - componentTys, + return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys, + asReturnValue); +} + +bool swiftcall::shouldPassIndirectly(CodeGenModule &CGM, + ArrayRef<llvm::Type*> componentTys, + bool asReturnValue) { + return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys, asReturnValue); } @@ -736,24 +740,12 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize, components.append(numElts, eltTy); } -bool swiftcall::shouldPassCXXRecordIndirectly(CodeGenModule &CGM, - const CXXRecordDecl *record) { - // Following a recommendation from Richard Smith, pass a C++ type - // indirectly only if the destructor is non-trivial or *all* of the - // copy/move constructors are deleted or non-trivial. - - if (record->hasNonTrivialDestructor()) - return true; - - // It would be nice if this were summarized on the CXXRecordDecl. - for (auto ctor : record->ctors()) { - if (ctor->isCopyOrMoveConstructor() && !ctor->isDeleted() && - ctor->isTrivial()) { - return false; - } - } - - return true; +bool swiftcall::mustPassRecordIndirectly(CodeGenModule &CGM, + const RecordDecl *record) { + // FIXME: should we not rely on the standard computation in Sema, just in + // case we want to diverge from the platform ABI (e.g. on targets where + // that uses the MSVC rule)? + return !record->canPassInRegisters(); } static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering, @@ -775,10 +767,8 @@ static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type, auto record = recordType->getDecl(); auto &layout = CGM.getContext().getASTRecordLayout(record); - if (auto cxxRecord = dyn_cast<CXXRecordDecl>(record)) { - if (shouldPassCXXRecordIndirectly(CGM, cxxRecord)) - return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false); - } + if (mustPassRecordIndirectly(CGM, record)) + return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false); SwiftAggLowering lowering(CGM); lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout); diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 4b8006428f8f..fa9b0a27af28 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -140,8 +140,11 @@ bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI) { const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()); - if (!RD) + if (!RD) { + if (!RT->getDecl()->canPassInRegisters()) + return CGCXXABI::RAA_Indirect; return CGCXXABI::RAA_Default; + } return CXXABI.getRecordArgABI(RD); } @@ -153,6 +156,20 @@ static CGCXXABI::RecordArgABI getRecordArgABI(QualType T, return getRecordArgABI(RT, CXXABI); } +static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, + const ABIInfo &Info) { + QualType Ty = FI.getReturnType(); + + if (const auto *RT = Ty->getAs<RecordType>()) + if (!isa<CXXRecordDecl>(RT->getDecl()) && + !RT->getDecl()->canPassInRegisters()) { + FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty); + return true; + } + + return CXXABI.classifyReturnType(FI); +} + /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". static QualType useFirstFieldIfTransparentUnion(QualType Ty) { @@ -201,10 +218,6 @@ bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return false; } -bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const { - return false; -} - LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; @@ -682,8 +695,8 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { @@ -697,8 +710,8 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } //===----------------------------------------------------------------------===// @@ -734,9 +747,18 @@ class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + llvm::Function *Fn = cast<llvm::Function>(GV); + if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) + Fn->addFnAttr("no-prototype"); + } + } }; -/// \brief Classify argument of given type \p Ty. +/// Classify argument of given type \p Ty. ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); @@ -831,7 +853,7 @@ Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); } -/// \brief Classify argument of given type \p Ty. +/// Classify argument of given type \p Ty. ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -845,8 +867,8 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(); } - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { @@ -861,8 +883,8 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } /// IsX86_MMXType - Return true if this is an MMX type. @@ -932,7 +954,7 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { // X86-32 ABI Implementation //===----------------------------------------------------------------------===// -/// \brief Similar to llvm::CCState, but for Clang. +/// Similar to llvm::CCState, but for Clang. struct CCState { CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {} @@ -985,14 +1007,14 @@ class X86_32ABIInfo : public SwiftABIInfo { ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; - /// \brief Return the alignment to use for the given type on the stack. + /// Return the alignment to use for the given type on the stack. unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - /// \brief Updates the number of available free registers, returns + /// Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -1002,7 +1024,7 @@ class X86_32ABIInfo : public SwiftABIInfo { bool canExpandIndirectArgument(QualType Ty) const; - /// \brief Rewrite the function info so that all memory arguments use + /// Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; @@ -1028,8 +1050,7 @@ public: IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), DefaultNumRegisterParameters(NumRegisterParameters) {} - bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type*> scalars, + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { // LLVM's x86-32 lowering currently only assigns up to three // integer registers and three fp registers. Oddly, it'll use up to @@ -1057,8 +1078,7 @@ public: const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. @@ -1404,8 +1424,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } static bool isSSEVectorType(ASTContext &Context, QualType Ty) { @@ -1677,8 +1697,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, if (Ty->isPromotableIntegerType()) { if (InReg) - return ABIArgInfo::getExtendInReg(); - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtendInReg(Ty); + return ABIArgInfo::getExtend(Ty); } if (InReg) @@ -1755,7 +1775,7 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { } else State.FreeRegs = DefaultNumRegisterParameters; - if (!getCXXABI().classifyReturnType(FI)) { + if (!::classifyReturnType(getCXXABI(), FI, *this)) { FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); } else if (FI.getReturnInfo().isIndirect()) { // The C++ ABI is not aware of register usage, so we have to check if the @@ -1925,19 +1945,13 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( } void X86_32TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const { - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. llvm::Function *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttr("stackrealign"); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2121,8 +2135,8 @@ class X86_64ABIInfo : public SwiftABIInfo { /// classify it as INTEGER (for compatibility with older clang compilers). bool classifyIntegerMMXAsSSE() const { // Clang <= 3.8 did not do this. - if (getCodeGenOpts().getClangABICompat() <= - CodeGenOptions::ClangABI::Ver3_8) + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver3_8) return false; const llvm::Triple &Triple = getTarget().getTriple(); @@ -2168,8 +2182,7 @@ public: return Has64BitPointers; } - bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type*> scalars, + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } @@ -2201,8 +2214,7 @@ public: return isX86VectorCallAggregateSmallEnough(NumMembers); } - bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type *> scalars, + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } @@ -2286,19 +2298,13 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override { - if (!IsForDefinition) + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. - auto *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2346,8 +2352,7 @@ public: Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -2362,26 +2367,24 @@ public: } }; -static void addStackProbeSizeTargetAttribute(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) { - if (D && isa<FunctionDecl>(D)) { - if (CGM.getCodeGenOpts().StackProbeSize != 4096) { - llvm::Function *Fn = cast<llvm::Function>(GV); +static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) { + if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) { + if (CGM.getCodeGenOpts().StackProbeSize != 4096) Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize)); - } + if (CGM.getCodeGenOpts().NoStackArgProbe) + Fn->addFnAttr("no-stack-arg-probe"); } } void WinX86_32TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const { - X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) return; - addStackProbeSizeTargetAttribute(D, GV, CGM); + addStackProbeTargetAttributes(D, GV, CGM); } class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -2391,8 +2394,7 @@ public: : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; @@ -2422,20 +2424,14 @@ public: }; void WinX86_64TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const { - TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. - auto *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2443,7 +2439,7 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes( } } - addStackProbeSizeTargetAttribute(D, GV, CGM); + addStackProbeTargetAttributes(D, GV, CGM); } } @@ -2868,8 +2864,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); @@ -2901,8 +2897,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -3271,7 +3267,7 @@ classifyReturnType(QualType RetTy) const { if (RetTy->isIntegralOrEnumerationType() && RetTy->isPromotableIntegerType()) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(RetTy); } break; @@ -3416,7 +3412,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( if (Ty->isIntegralOrEnumerationType() && Ty->isPromotableIntegerType()) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); } break; @@ -3543,14 +3539,24 @@ ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty, void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; + const unsigned CallingConv = FI.getCallingConvention(); + // It is possible to force Win64 calling convention on any x86_64 target by + // using __attribute__((ms_abi)). In such case to correctly emit Win64 + // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. + if (CallingConv == llvm::CallingConv::Win64) { + WinX86_64ABIInfo Win64ABIInfo(CGT); + Win64ABIInfo.computeInfo(FI); + return; + } + + bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; // Keep track of the number of assigned registers. unsigned FreeIntRegs = IsRegCall ? 11 : 6; unsigned FreeSSERegs = IsRegCall ? 16 : 8; unsigned NeededInt, NeededSSE; - if (!getCXXABI().classifyReturnType(FI)) { + if (!::classifyReturnType(getCXXABI(), FI, *this)) { if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && !FI.getReturnType()->getTypePtr()->isUnionType()) { FI.getReturnInfo() = @@ -3797,17 +3803,18 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Address RegAddrHi = CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); - llvm::Type *DoubleTy = CGF.DoubleTy; - llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy); + llvm::Type *ST = AI.canHaveCoerceToType() + ? AI.getCoerceToType() + : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); - V = CGF.Builder.CreateLoad( - CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy)); + V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( + RegAddrLo, ST->getStructElementType(0))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); - V = CGF.Builder.CreateLoad( - CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy)); + V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( + RegAddrHi, ST->getStructElementType(1))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8))); @@ -3941,7 +3948,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // extended. const BuiltinType *BT = Ty->getAs<BuiltinType>(); if (BT && BT->getKind() == BuiltinType::Bool) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It // passes them indirectly through memory. @@ -4289,7 +4296,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. -class PPC64_SVR4_ABIInfo : public ABIInfo { +class PPC64_SVR4_ABIInfo : public SwiftABIInfo { public: enum ABIKind { ELFv1 = 0, @@ -4333,7 +4340,7 @@ private: public: PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX, bool SoftFloatABI) - : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), + : SwiftABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; @@ -4376,6 +4383,15 @@ public: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + + bool isSwiftErrorInRegister() const override { + return false; + } }; class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4543,7 +4559,7 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, // For compatibility with GCC, ignore empty bitfields in C++ mode. if (getContext().getLangOpts().CPlusPlus && - FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) + FD->isZeroLengthBitField(getContext())) continue; uint64_t FldMembers; @@ -4603,7 +4619,9 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) { + BT->getKind() == BuiltinType::LongDouble || + (getContext().getTargetInfo().hasFloat128Type() && + (BT->getKind() == BuiltinType::Float128))) { if (IsSoftFloatABI) return false; return true; @@ -4618,10 +4636,13 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( const Type *Base, uint64_t Members) const { - // Vector types require one register, floating point types require one - // or two registers depending on their size. + // Vector and fp128 types require one register, other floating point types + // require one or two registers depending on their size. uint32_t NumRegs = - Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64; + ((getContext().getTargetInfo().hasFloat128Type() && + Base->isFloat128Type()) || + Base->isVectorType()) ? 1 + : (getContext().getTypeSize(Base) + 63) / 64; // Homogeneous Aggregates may occupy at most 8 registers. return Members * NumRegs <= 8; @@ -4694,8 +4715,8 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { /*Realign=*/TyAlign > ABIAlign); } - return (isPromotableTypeForABI(Ty) ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo @@ -4749,8 +4770,8 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { return getNaturalAlignIndirect(RetTy); } - return (isPromotableTypeForABI(RetTy) ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. @@ -4899,7 +4920,7 @@ private: bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) + if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &it : FI.arguments()) @@ -4922,8 +4943,7 @@ private: Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type*> scalars, + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } @@ -5002,7 +5022,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() && isDarwinPCS() - ? ABIArgInfo::getExtend() + ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } @@ -5072,7 +5092,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() && isDarwinPCS() - ? ABIArgInfo::getExtend() + ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } @@ -5521,8 +5541,7 @@ private: llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); - bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type*> scalars, + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } @@ -5565,9 +5584,8 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override { - if (!IsForDefinition) + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) @@ -5610,8 +5628,7 @@ public: : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -5625,17 +5642,16 @@ public: }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const { - ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) return; - addStackProbeSizeTargetAttribute(D, GV, CGM); + addStackProbeTargetAttributes(D, GV, CGM); } } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) + if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); @@ -5682,18 +5698,6 @@ void ARMABIInfo::setCCs() { llvm::CallingConv::ID abiCC = getABIDefaultCC(); if (abiCC != getLLVMDefaultCC()) RuntimeCC = abiCC; - - // AAPCS apparently requires runtime support functions to be soft-float, but - // that's almost certainly for historic reasons (Thumb1 not supporting VFP - // most likely). It's more convenient for AAPCS16_VFP to be hard-float. - - // The Run-time ABI for the ARM Architecture section 4.1.2 requires - // AEABI-complying FP helper functions to use the base AAPCS. - // These AEABI functions are expanded in the ARM llvm backend, all the builtin - // support functions emitted by clang such as the _Complex helpers follow the - // abiCC. - if (abiCC != getLLVMDefaultCC()) - BuiltinCC = abiCC; } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, @@ -5730,10 +5734,11 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } - // __fp16 gets passed as if it were an int or float, but with the top 16 bits - // unspecified. This is not done for OpenCL as it handles the half type - // natively, and does not need to interwork with AAPCS code. - if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) { + // _Float16 and __fp16 get passed as if it were an int or float, but with + // the top 16 bits unspecified. This is not done for OpenCL as it handles the + // half type natively, and does not need to interwork with AAPCS code. + if ((Ty->isFloat16Type() || Ty->isHalfType()) && + !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); @@ -5746,7 +5751,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, Ty = EnumTy->getDecl()->getIntegerType(); } - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } @@ -5928,10 +5933,11 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, return getNaturalAlignIndirect(RetTy); } - // __fp16 gets returned as if it were an int or float, but with the top 16 - // bits unspecified. This is not done for OpenCL as it handles the half type - // natively, and does not need to interwork with AAPCS code. - if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) { + // _Float16 and __fp16 get returned as if it were an int or float, but with + // the top 16 bits unspecified. This is not done for OpenCL as it handles the + // half type natively, and does not need to interwork with AAPCS code. + if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && + !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); @@ -5943,7 +5949,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() + return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect(); } @@ -6155,8 +6161,8 @@ public: : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &M) const override; + bool shouldEmitStaticExternCAliases() const override; private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the @@ -6176,8 +6182,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { @@ -6189,8 +6195,8 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) return getNaturalAlignIndirect(Ty, /* byval */ true); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -6212,9 +6218,8 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } void NVPTXTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const { - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -6279,6 +6284,10 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } + +bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} } //===----------------------------------------------------------------------===// @@ -6313,8 +6322,7 @@ public: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - bool shouldPassIndirectlyForSwift(CharUnits totalSize, - ArrayRef<llvm::Type*> scalars, + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } @@ -6402,7 +6410,7 @@ QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { // Unlike isSingleElementStruct(), empty structure and array fields // do count. So do anonymous bitfields that aren't zero-sized. if (getContext().getLangOpts().CPlusPlus && - FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) + FD->isZeroLengthBitField(getContext())) continue; // Unlike isSingleElementStruct(), arrays do not count. @@ -6586,8 +6594,8 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); - return (isPromotableIntegerType(RetTy) ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { @@ -6597,7 +6605,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { // Integers and enums are extended to full register width. if (isPromotableIntegerType(Ty)) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); // Handle vector types and vector-like structure types. Note that // as opposed to float-like structure types, we do not allow any @@ -6651,16 +6659,14 @@ public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &M) const override; }; } void MSP430TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const { - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { @@ -6705,7 +6711,7 @@ public: void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - bool shouldSignExtUnsignedType(QualType Ty) const override; + ABIArgInfo extendType(QualType Ty) const; }; class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { @@ -6720,8 +6726,7 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override { + CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *Fn = cast<llvm::Function>(GV); @@ -6732,7 +6737,7 @@ public: Fn->addFnAttr("short-call"); // Other attributes do not have a meaning for declarations. - if (!IsForDefinition) + if (GV->isDeclaration()) return; if (FD->hasAttr<Mips16Attr>()) { @@ -6898,7 +6903,7 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { // All integral types are promoted to the GPR width. if (Ty->isIntegralOrEnumerationType()) - return ABIArgInfo::getExtend(); + return extendType(Ty); return ABIArgInfo::getDirect( nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); @@ -6980,8 +6985,8 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -7047,14 +7052,14 @@ Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, return Addr; } -bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const { +ABIArgInfo MipsABIInfo::extendType(QualType Ty) const { int TySize = getContext().getTypeSize(Ty); // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) - return true; + return ABIArgInfo::getSignExtend(Ty); - return false; + return ABIArgInfo::getExtend(Ty); } bool @@ -7096,9 +7101,8 @@ public: : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM, - ForDefinition_t IsForDefinition) const override { - if (!IsForDefinition) + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) return; const auto *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -7127,14 +7131,12 @@ public: : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &M) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const { - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -7227,8 +7229,8 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -7265,8 +7267,8 @@ ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } if (isEmptyRecord(getContext(), RetTy, true)) @@ -7409,7 +7411,7 @@ ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, if (Ty->isPromotableIntegerType()) { if (InReg) return ABIArgInfo::getDirectInReg(); - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); } if (InReg) return ABIArgInfo::getDirectInReg(); @@ -7639,8 +7641,7 @@ public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const override; + CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, @@ -7658,13 +7659,14 @@ public: createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; + bool shouldEmitStaticExternCAliases() const override; + void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; }; } void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const { - if (!IsForDefinition) + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) @@ -7674,6 +7676,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + + if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() && + (M.getTriple().getOS() == llvm::Triple::AMDHSA)) + F->addFnAttr("amdgpu-implicitarg-num-bytes", "48"); + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); if (ReqdWGS || FlatWGS) { unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; @@ -7785,6 +7792,16 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, return C.getOrInsertSyncScopeID(Name); } +bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} + +void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( + const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -7991,7 +8008,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { // Integer types smaller than a register are extended. if (Size < 64 && Ty->isIntegerType()) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); // Other non-aggregates go in registers. if (!isAggregateTypeForABI(Ty)) @@ -8521,7 +8538,7 @@ static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, // The ABI requires unions to be sorted but not structures. // See FieldEncoding::operator< for sort algorithm. if (RT->isUnionType()) - std::sort(FE.begin(), FE.end()); + llvm::sort(FE.begin(), FE.end()); // We can now complete the TypeString. unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { @@ -8565,7 +8582,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, EnumEnc += '}'; FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); } - std::sort(FE.begin(), FE.end()); + llvm::sort(FE.begin(), FE.end()); unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) @@ -8780,6 +8797,203 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D, return false; } +//===----------------------------------------------------------------------===// +// RISCV ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class RISCVABIInfo : public DefaultABIInfo { +private: + unsigned XLen; // Size of the integer ('x') registers in bits. + static const int NumArgGPRs = 8; + +public: + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) + : DefaultABIInfo(CGT), XLen(XLen) {} + + // DefaultABIInfo's classifyReturnType and classifyArgumentType are + // non-virtual, but computeInfo is virtual, so we overload it. + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, + int &ArgGPRsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; +}; +} // end anonymous namespace + +void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128 + // is passed direct in LLVM IR, relying on the backend lowering code to + // rewrite the argument list and pass indirectly on RV32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || + getContext().getTypeSize(RetTy) > (2 * XLen); + + // We must track the number of GPRs used in order to conform to the RISC-V + // ABI, as integer scalars passed in registers should have signext/zeroext + // when promoted, but are anyext if passed on the stack. As GPR usage is + // different for variadic arguments, we must also track whether we are + // examining a vararg or not. + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + bool IsFixed = ArgNum < NumFixedArgs; + ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft); + ArgNum++; + } +} + +ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &ArgGPRsLeft) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + bool MustUseStack = false; + // Determine the number of GPRs needed to pass the current argument + // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededArgGPRs = 1; + if (!IsFixed && NeededAlign == 2 * XLen) + NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + else if (Size > XLen && Size <= 2 * XLen) + NeededArgGPRs = 2; + + if (NeededArgGPRs > ArgGPRsLeft) { + MustUseStack = true; + NeededArgGPRs = ArgGPRsLeft; + } + + ArgGPRsLeft -= NeededArgGPRs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to XLen width, unless passed on the + // stack. + if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) { + return extendType(Ty); + } + + return ABIArgInfo::getDirect(); + } + + // Aggregates which are <= 2*XLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * XLen) { + unsigned Alignment = getContext().getTypeAlign(Ty); + + // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is + // required, and a 2-element XLen array if only XLen alignment is required. + if (Size <= XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), XLen)); + } else if (Alignment == 2 * XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * XLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), XLen), 2)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft); +} + +Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + std::pair<CharUnits, CharUnits> SizeAndAlign = + getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*Xlen bytes are passed indirectly. + bool IsIndirect = SizeAndAlign.first > 2 * SlotSize; + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, SizeAndAlign, + SlotSize, /*AllowHigherAlign=*/true); +} + +ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // RV64 ABI requires unsigned 32 bit integers to be sign extended. + if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +} + +namespace { +class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { +public: + RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) + : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + + const auto *Attr = FD->getAttr<RISCVInterruptAttr>(); + if (!Attr) + return; + + const char *Kind; + switch (Attr->getInterrupt()) { + case RISCVInterruptAttr::user: Kind = "user"; break; + case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break; + case RISCVInterruptAttr::machine: Kind = "machine"; break; + } + + auto *Fn = cast<llvm::Function>(GV); + + Fn->addFnAttr("interrupt", Kind); + } +}; +} // namespace //===----------------------------------------------------------------------===// // Driver code @@ -8894,6 +9108,11 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::msp430: return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); + case llvm::Triple::riscv32: + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32)); + case llvm::Triple::riscv64: + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64)); + case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector)); diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index d745e420c4a5..b530260ea48f 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -57,8 +57,7 @@ public: /// setTargetAttributes - Provides a convenient hook to handle extra /// target-specific attributes for the given global. virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M, - ForDefinition_t IsForDefinition) const {} + CodeGen::CodeGenModule &M) const {} /// emitTargetMD - Provides a convenient hook to handle extra /// target-specific metadata for the given global. @@ -267,7 +266,7 @@ public: virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const; - /// Inteface class for filling custom fields of a block literal for OpenCL. + /// Interface class for filling custom fields of a block literal for OpenCL. class TargetOpenCLBlockHelper { public: typedef std::pair<llvm::Value *, StringRef> ValueTy; @@ -297,6 +296,13 @@ public: createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const; + + /// \return true if the target supports alias from the unmangled name to the + /// mangled name of functions declared within an extern "C" region and marked + /// as 'used', and having internal linkage. + virtual bool shouldEmitStaticExternCAliases() const { return true; } + + virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} }; } // namespace CodeGen diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp index cfb93d6a9fcc..2f8a591a3e7f 100644 --- a/lib/CodeGen/VarBypassDetector.cpp +++ b/lib/CodeGen/VarBypassDetector.cpp @@ -95,7 +95,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S, case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: case Stmt::LabelStmtClass: - llvm_unreachable("the loop bellow handles labels and cases"); + llvm_unreachable("the loop below handles labels and cases"); break; default: |