diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:02:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:02:28 +0000 |
commit | 7442d6faa2719e4e7d33a7021c406c5a4facd74d (patch) | |
tree | c72b9241553fc9966179aba84f90f17bfa9235c3 /lib/CodeGen | |
parent | b52119637f743680a99710ce5fdb6646da2772af (diff) |
Notes
Diffstat (limited to 'lib/CodeGen')
56 files changed, 6030 insertions, 2027 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index ac31dfdaf3e4b..c0be60ef53bc5 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFO_H #define LLVM_CLANG_LIB_CODEGEN_ABIINFO_H +#include "clang/AST/CharUnits.h" #include "clang/AST/Type.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Type.h" diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index d2ce6ea48e419..855d6795b9d6c 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -61,6 +61,9 @@ using namespace llvm; namespace { +// Default filename used for profile generation. +static constexpr StringLiteral DefaultProfileGenName = "default_%m.profraw"; + class EmitAssemblyHelper { DiagnosticsEngine &Diags; const HeaderSearchOptions &HSOpts; @@ -73,7 +76,6 @@ class EmitAssemblyHelper { std::unique_ptr<raw_pwrite_stream> OS; -private: TargetIRAnalysis getTargetIRAnalysis() const { if (TM) return TM->getTargetIRAnalysis(); @@ -262,7 +264,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, TLII->disableAllFunctions(); else { // Disable individual libc/libm calls in TargetLibraryInfo. - LibFunc::Func F; + LibFunc F; for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs()) if (TLII->getLibFunc(FuncName, F)) TLII->setUnavailable(F); @@ -292,6 +294,140 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts, MPM->add(createRewriteSymbolsPass(DL)); } +static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) { + switch (CodeGenOpts.OptimizationLevel) { + default: + llvm_unreachable("Invalid optimization level!"); + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; // O2/Os/Oz + case 3: + return CodeGenOpt::Aggressive; + } +} + +static llvm::CodeModel::Model getCodeModel(const CodeGenOptions &CodeGenOpts) { + unsigned CodeModel = + llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) + .Case("small", llvm::CodeModel::Small) + .Case("kernel", llvm::CodeModel::Kernel) + .Case("medium", llvm::CodeModel::Medium) + .Case("large", llvm::CodeModel::Large) + .Case("default", llvm::CodeModel::Default) + .Default(~0u); + assert(CodeModel != ~0u && "invalid code model!"); + return static_cast<llvm::CodeModel::Model>(CodeModel); +} + +static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) { + // Keep this synced with the equivalent code in + // lib/Frontend/CompilerInvocation.cpp + llvm::Optional<llvm::Reloc::Model> RM; + RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) + .Case("static", llvm::Reloc::Static) + .Case("pic", llvm::Reloc::PIC_) + .Case("ropi", llvm::Reloc::ROPI) + .Case("rwpi", llvm::Reloc::RWPI) + .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) + .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); + assert(RM.hasValue() && "invalid PIC model!"); + return *RM; +} + +static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) { + if (Action == Backend_EmitObj) + return TargetMachine::CGFT_ObjectFile; + else if (Action == Backend_EmitMCNull) + return TargetMachine::CGFT_Null; + else { + assert(Action == Backend_EmitAssembly && "Invalid action!"); + return TargetMachine::CGFT_AssemblyFile; + } +} + +static void initTargetOptions(llvm::TargetOptions &Options, + const CodeGenOptions &CodeGenOpts, + const clang::TargetOptions &TargetOpts, + const LangOptions &LangOpts, + const HeaderSearchOptions &HSOpts) { + Options.ThreadModel = + llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel) + .Case("posix", llvm::ThreadModel::POSIX) + .Case("single", llvm::ThreadModel::Single); + + // Set float ABI type. + assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" || + CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) && + "Invalid Floating Point ABI!"); + Options.FloatABIType = + llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI) + .Case("soft", llvm::FloatABI::Soft) + .Case("softfp", llvm::FloatABI::Soft) + .Case("hard", llvm::FloatABI::Hard) + .Default(llvm::FloatABI::Default); + + // Set FP fusion mode. + switch (LangOpts.getDefaultFPContractMode()) { + case LangOptions::FPC_Off: + Options.AllowFPOpFusion = llvm::FPOpFusion::Strict; + break; + case LangOptions::FPC_On: + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; + break; + case LangOptions::FPC_Fast: + Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; + break; + } + + Options.UseInitArray = CodeGenOpts.UseInitArray; + Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; + Options.CompressDebugSections = CodeGenOpts.CompressDebugSections; + Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; + + // Set EABI version. + Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion) + .Case("4", llvm::EABI::EABI4) + .Case("5", llvm::EABI::EABI5) + .Case("gnu", llvm::EABI::GNU) + .Default(llvm::EABI::Default); + + if (LangOpts.SjLjExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::SjLj; + + Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; + Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; + Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; + Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; + Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; + Options.FunctionSections = CodeGenOpts.FunctionSections; + Options.DataSections = CodeGenOpts.DataSections; + Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; + Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); + + Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; + Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; + Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; + Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; + Options.MCOptions.MCIncrementalLinkerCompatible = + CodeGenOpts.IncrementalLinkerCompatible; + Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; + Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; + Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; + Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; + Options.MCOptions.ABIName = TargetOpts.ABI; + for (const auto &Entry : HSOpts.UserEntries) + if (!Entry.IsFramework && + (Entry.Group == frontend::IncludeDirGroup::Quoted || + Entry.Group == frontend::IncludeDirGroup::Angled || + Entry.Group == frontend::IncludeDirGroup::System)) + Options.MCOptions.IASSearchPaths.push_back( + Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); +} + void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { // Handle disabling of all LLVM passes, where we want to preserve the @@ -316,8 +452,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, !CodeGenOpts.DisableLifetimeMarkers); PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics); } else { + // We do not want to inline hot callsites for SamplePGO module-summary build + // because profile annotation will happen again in ThinLTO backend, and we + // want the IR of the hot path to match the profile. PMBuilder.Inliner = createFunctionInliningPass( - CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize); + CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize, + (!CodeGenOpts.SampleProfileFile.empty() && + CodeGenOpts.EmitSummaryIndex)); } PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; @@ -334,16 +475,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, MPM.add(new TargetLibraryInfoWrapperPass(*TLII)); - // Add target-specific passes that need to run as early as possible. if (TM) - PMBuilder.addExtension( - PassManagerBuilder::EP_EarlyAsPossible, - [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { - TM->addEarlyAsPossiblePasses(PM); - }); + TM->adjustPassManager(PMBuilder); - PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, - addAddDiscriminatorsPass); + if (CodeGenOpts.DebugInfoForProfiling || + !CodeGenOpts.SampleProfileFile.empty()) + PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, + addAddDiscriminatorsPass); // In ObjC ARC mode, add the main ARC optimization passes. if (LangOpts.ObjCAutoRefCount) { @@ -454,7 +592,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (!CodeGenOpts.InstrProfileOutput.empty()) PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; else - PMBuilder.PGOInstrGen = "default_%m.profraw"; + PMBuilder.PGOInstrGen = DefaultProfileGenName; } if (CodeGenOpts.hasProfileIRUse()) PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; @@ -495,126 +633,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { return; } - unsigned CodeModel = - llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) - .Case("small", llvm::CodeModel::Small) - .Case("kernel", llvm::CodeModel::Kernel) - .Case("medium", llvm::CodeModel::Medium) - .Case("large", llvm::CodeModel::Large) - .Case("default", llvm::CodeModel::Default) - .Default(~0u); - assert(CodeModel != ~0u && "invalid code model!"); - llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel); - + llvm::CodeModel::Model CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); - - // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp. - llvm::Optional<llvm::Reloc::Model> RM; - RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) - .Case("static", llvm::Reloc::Static) - .Case("pic", llvm::Reloc::PIC_) - .Case("ropi", llvm::Reloc::ROPI) - .Case("rwpi", llvm::Reloc::RWPI) - .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) - .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); - assert(RM.hasValue() && "invalid PIC model!"); - - CodeGenOpt::Level OptLevel; - switch (CodeGenOpts.OptimizationLevel) { - default: - llvm_unreachable("Invalid optimization level!"); - case 0: - OptLevel = CodeGenOpt::None; - break; - case 1: - OptLevel = CodeGenOpt::Less; - break; - case 2: - OptLevel = CodeGenOpt::Default; - break; // O2/Os/Oz - case 3: - OptLevel = CodeGenOpt::Aggressive; - break; - } + llvm::Reloc::Model RM = getRelocModel(CodeGenOpts); + CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts); llvm::TargetOptions Options; - - Options.ThreadModel = - llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel) - .Case("posix", llvm::ThreadModel::POSIX) - .Case("single", llvm::ThreadModel::Single); - - // Set float ABI type. - assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" || - CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) && - "Invalid Floating Point ABI!"); - Options.FloatABIType = - llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI) - .Case("soft", llvm::FloatABI::Soft) - .Case("softfp", llvm::FloatABI::Soft) - .Case("hard", llvm::FloatABI::Hard) - .Default(llvm::FloatABI::Default); - - // Set FP fusion mode. - switch (CodeGenOpts.getFPContractMode()) { - case CodeGenOptions::FPC_Off: - Options.AllowFPOpFusion = llvm::FPOpFusion::Strict; - break; - case CodeGenOptions::FPC_On: - Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; - break; - case CodeGenOptions::FPC_Fast: - Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; - break; - } - - Options.UseInitArray = CodeGenOpts.UseInitArray; - Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; - Options.CompressDebugSections = CodeGenOpts.CompressDebugSections; - Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; - - // Set EABI version. - Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion) - .Case("4", llvm::EABI::EABI4) - .Case("5", llvm::EABI::EABI5) - .Case("gnu", llvm::EABI::GNU) - .Default(llvm::EABI::Default); - - if (LangOpts.SjLjExceptions) - Options.ExceptionModel = llvm::ExceptionHandling::SjLj; - - Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD; - Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; - Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; - Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; - Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; - Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; - Options.FunctionSections = CodeGenOpts.FunctionSections; - Options.DataSections = CodeGenOpts.DataSections; - Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; - Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); - - Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; - Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; - Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; - Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; - Options.MCOptions.MCIncrementalLinkerCompatible = - CodeGenOpts.IncrementalLinkerCompatible; - Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; - Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; - Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; - Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; - Options.MCOptions.ABIName = TargetOpts.ABI; - for (const auto &Entry : HSOpts.UserEntries) - if (!Entry.IsFramework && - (Entry.Group == frontend::IncludeDirGroup::Quoted || - Entry.Group == frontend::IncludeDirGroup::Angled || - Entry.Group == frontend::IncludeDirGroup::System)) - Options.MCOptions.IASSearchPaths.push_back( - Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); - + initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts); TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, Options, RM, CM, OptLevel)); } @@ -630,13 +656,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. - TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile; - if (Action == Backend_EmitObj) - CGFT = TargetMachine::CGFT_ObjectFile; - else if (Action == Backend_EmitMCNull) - CGFT = TargetMachine::CGFT_Null; - else - assert(Action == Backend_EmitAssembly && "Invalid action!"); + TargetMachine::CodeGenFileType CGFT = getCodeGenFileType(Action); // Add ObjC ARC final-cleanup optimizations. This is done as part of the // "codegen" passes so that it isn't run multiple times when there is @@ -683,14 +703,31 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + std::unique_ptr<raw_fd_ostream> ThinLinkOS; + switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - PerModulePasses.add(createBitcodeWriterPass( - *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex, - CodeGenOpts.EmitSummaryIndex)); + if (CodeGenOpts.EmitSummaryIndex) { + if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { + std::error_code EC; + ThinLinkOS.reset(new llvm::raw_fd_ostream( + CodeGenOpts.ThinLinkBitcodeFile, EC, + llvm::sys::fs::F_None)); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile + << EC.message(); + return; + } + } + PerModulePasses.add( + createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get())); + } + else + PerModulePasses.add( + createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists)); break; case Backend_EmitLL: @@ -779,7 +816,24 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( return; TheModule->setDataLayout(TM->createDataLayout()); - PassBuilder PB(TM.get()); + PGOOptions PGOOpt; + + // -fprofile-generate. + PGOOpt.RunProfileGen = CodeGenOpts.hasProfileIRInstr(); + if (PGOOpt.RunProfileGen) + PGOOpt.ProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() ? + DefaultProfileGenName : CodeGenOpts.InstrProfileOutput; + + // -fprofile-use. + if (CodeGenOpts.hasProfileIRUse()) + PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath; + + // Only pass a PGO options struct if -fprofile-generate or + // -fprofile-use were passed on the cmdline. + PassBuilder PB(TM.get(), + (PGOOpt.RunProfileGen || + !PGOOpt.ProfileUseFile.empty()) ? + Optional<PGOOptions>(PGOOpt) : None); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -861,8 +915,31 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } +Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) { + Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + // The bitcode file may contain multiple modules, we want the one with a + // summary. + for (BitcodeModule &BM : *BMsOrErr) { + Expected<bool> HasSummary = BM.hasSummary(); + if (HasSummary && *HasSummary) + return BM; + } + + return make_error<StringError>("Could not find module summary", + inconvertibleErrorCode()); +} + static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, - std::unique_ptr<raw_pwrite_stream> OS) { + const HeaderSearchOptions &HeaderOpts, + const CodeGenOptions &CGOpts, + const clang::TargetOptions &TOpts, + const LangOptions &LOpts, + std::unique_ptr<raw_pwrite_stream> OS, + std::string SampleProfile, + BackendAction Action) { StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); @@ -897,32 +974,15 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, return; } - Expected<std::vector<BitcodeModule>> BMsOrErr = - getBitcodeModuleList(**MBOrErr); - if (!BMsOrErr) { - handleAllErrors(BMsOrErr.takeError(), [&](ErrorInfoBase &EIB) { + Expected<BitcodeModule> BMOrErr = FindThinLTOModule(**MBOrErr); + if (!BMOrErr) { + handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) { errs() << "Error loading imported file '" << I.first() << "': " << EIB.message() << '\n'; }); return; } - - // The bitcode file may contain multiple modules, we want the one with a - // summary. - bool FoundModule = false; - for (BitcodeModule &BM : *BMsOrErr) { - Expected<bool> HasSummary = BM.hasSummary(); - if (HasSummary && *HasSummary) { - ModuleMap.insert({I.first(), BM}); - FoundModule = true; - break; - } - } - if (!FoundModule) { - errs() << "Error loading imported file '" << I.first() - << "': Could not find module summary\n"; - return; - } + ModuleMap.insert({I.first(), *BMOrErr}); OwnedImports.push_back(std::move(*MBOrErr)); } @@ -930,6 +990,35 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, return llvm::make_unique<lto::NativeObjectStream>(std::move(OS)); }; lto::Config Conf; + Conf.CPU = TOpts.CPU; + Conf.CodeModel = getCodeModel(CGOpts); + Conf.MAttrs = TOpts.Features; + Conf.RelocModel = getRelocModel(CGOpts); + Conf.CGOptLevel = getCGOptLevel(CGOpts); + initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); + Conf.SampleProfile = std::move(SampleProfile); + switch (Action) { + case Backend_EmitNothing: + Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { + return false; + }; + break; + case Backend_EmitLL: + Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists); + return false; + }; + break; + case Backend_EmitBC: + Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists); + return false; + }; + break; + default: + Conf.CGFileType = getCodeGenFileType(Action); + break; + } if (Error E = thinBackend( Conf, 0, AddStream, *M, *CombinedIndex, ImportList, ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) { @@ -965,7 +1054,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // of an error). bool DoThinLTOBackend = CombinedIndex != nullptr; if (DoThinLTOBackend) { - runThinLTOBackend(CombinedIndex.get(), M, std::move(OS)); + runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, + LOpts, std::move(OS), CGOpts.SampleProfileFile, Action); return; } } @@ -996,6 +1086,7 @@ static const char* getSectionNameForBitcode(const Triple &T) { return "__LLVM,__bitcode"; case Triple::COFF: case Triple::ELF: + case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmbc"; } @@ -1008,6 +1099,7 @@ static const char* getSectionNameForCommandline(const Triple &T) { return "__LLVM,__cmdline"; case Triple::COFF: case Triple::ELF: + case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmcmd"; } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 9287e46127bd5..28e20b53d6562 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -1181,7 +1181,7 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, if (LVal.isBitField()) return CGF.EmitLoadOfBitfieldLValue( LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(), - LVal.getAlignmentSource())); + LVal.getAlignmentSource()), loc); if (LVal.isVectorElt()) return CGF.EmitLoadOfLValue( LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(), diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index b250b9a32b181..1a57b3e6608dc 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -16,7 +16,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/DeclObjC.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" @@ -266,7 +266,7 @@ static bool isSafeForCXXConstantCapture(QualType type) { static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, CodeGenFunction *CGF, const VarDecl *var) { - // Return if this is a function paramter. We shouldn't try to + // Return if this is a function parameter. We shouldn't try to // rematerialize default arguments of function parameters. if (isa<ParmVarDecl>(var)) return nullptr; @@ -318,6 +318,19 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, elementTypes.push_back(CGM.getBlockDescriptorType()); } +static QualType getCaptureFieldType(const CodeGenFunction &CGF, + const BlockDecl::Capture &CI) { + const VarDecl *VD = CI.getVariable(); + + // If the variable is captured by an enclosing block or lambda expression, + // use the type of the capture field. + if (CGF.BlockInfo && CI.isNested()) + return CGF.BlockInfo->getCapture(VD).fieldType(); + if (auto *FD = CGF.LambdaCaptureFields.lookup(VD)) + return FD->getType(); + return VD->getType(); +} + /// Compute the layout of the given block. Attempts to lay the block /// out with minimal space requirements. static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, @@ -432,15 +445,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, } } - QualType VT = variable->getType(); - - // If the variable is captured by an enclosing block or lambda expression, - // use the type of the capture field. - if (CGF->BlockInfo && CI.isNested()) - VT = CGF->BlockInfo->getCapture(variable).fieldType(); - else if (auto *FD = CGF->LambdaCaptureFields.lookup(variable)) - VT = FD->getType(); - + QualType VT = getCaptureFieldType(*CGF, CI); CharUnits size = C.getTypeSizeInChars(VT); CharUnits align = C.getDeclAlign(variable); @@ -606,8 +611,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { if (capture.isConstant()) continue; // Ignore objects that aren't destructed. - QualType::DestructionKind dtorKind = - variable->getType().isDestructedType(); + QualType VT = getCaptureFieldType(CGF, CI); + QualType::DestructionKind dtorKind = VT.isDestructedType(); if (dtorKind == QualType::DK_none) continue; CodeGenFunction::Destroyer *destroyer; @@ -634,7 +639,7 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { if (useArrayEHCleanup) cleanupKind = InactiveNormalAndEHCleanup; - CGF.pushDestroy(cleanupKind, addr, variable->getType(), + CGF.pushDestroy(cleanupKind, addr, VT, destroyer, useArrayEHCleanup); // Remember where that cleanup was. @@ -718,7 +723,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Otherwise, we have to emit this as a local block. - llvm::Constant *isa = CGM.getNSConcreteStackBlock(); + llvm::Constant *isa = + (!CGM.getContext().getLangOpts().OpenCL) + ? CGM.getNSConcreteStackBlock() + : CGM.getNullPointer(cast<llvm::PointerType>( + CGM.getNSConcreteStackBlock()->getType()), + QualType(getContext().VoidPtrTy)); isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy); // Build the block descriptor. @@ -906,9 +916,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Cast to the converted block-pointer type, which happens (somewhat // unfortunately) to be a pointer to function type. - llvm::Value *result = - Builder.CreateBitCast(blockAddr.getPointer(), - ConvertType(blockInfo.getBlockExpr()->getType())); + llvm::Value *result = Builder.CreatePointerCast( + blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); return result; } @@ -976,21 +985,41 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); // Get a pointer to the generic block literal. + // For OpenCL we generate generic AS void ptr to be able to reuse the same + // block definition for blocks with captures generated as private AS local + // variables and without captures generated as global AS program scope + // variables. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); + llvm::Type *BlockLiteralTy = - llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType()); + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); // Bitcast the callee to a block literal. - BlockPtr = Builder.CreateBitCast(BlockPtr, BlockLiteralTy, "block.literal"); + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); // Get the function pointer from the literal. llvm::Value *FuncPtr = Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - BlockPtr = Builder.CreateBitCast(BlockPtr, VoidPtrTy); // Add the block literal. CallArgList Args; - Args.add(RValue::get(BlockPtr), getContext().VoidPtrTy); + + QualType VoidPtrQualTy = getContext().VoidPtrTy; + llvm::Type *GenericVoidPtrTy = VoidPtrTy; + if (getLangOpts().OpenCL) { + GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); + VoidPtrQualTy = + getContext().getPointerType(getContext().getAddrSpaceQualType( + getContext().VoidTy, LangAS::opencl_generic)); + } + + BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); + Args.add(RValue::get(BlockPtr), VoidPtrQualTy); QualType FnType = BPT->getPointeeType(); @@ -1097,7 +1126,12 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, auto fields = builder.beginStruct(); // isa - fields.add(CGM.getNSConcreteGlobalBlock()); + fields.add( + (!CGM.getContext().getLangOpts().OpenCL) + ? CGM.getNSConcreteGlobalBlock() + : CGM.getNullPointer(cast<llvm::PointerType>( + CGM.getNSConcreteGlobalBlock()->getType()), + QualType(CGM.getContext().VoidPtrTy))); // __flags BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; @@ -1114,16 +1148,19 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); - llvm::Constant *literal = - fields.finishAndCreateGlobal("__block_literal_global", - blockInfo.BlockAlign, - /*constant*/ true); + unsigned AddrSpace = 0; + if (CGM.getContext().getLangOpts().OpenCL) + AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + + llvm::Constant *literal = fields.finishAndCreateGlobal( + "__block_literal_global", blockInfo.BlockAlign, + /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace); // Return a constant of the appropriately-casted type. llvm::Type *RequiredType = CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType()); llvm::Constant *Result = - llvm::ConstantExpr::getBitCast(literal, RequiredType); + llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); return Result; } @@ -1155,9 +1192,13 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. - BlockPointer = Builder.CreateBitCast(arg, - BlockInfo->StructureType->getPointerTo(), - "block"); + BlockPointer = Builder.CreatePointerCast( + arg, + BlockInfo->StructureType->getPointerTo( + getContext().getLangOpts().OpenCL + ? getContext().getTargetAddressSpace(LangAS::opencl_generic) + : 0), + "block"); } Address CodeGenFunction::LoadBlockStruct() { @@ -1196,6 +1237,15 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; + + // For OpenCL passed block pointer can be private AS local variable or + // global AS program scope variable (for the case with and without captures). + // Generic AS is used therefore to be able to accomodate both private and + // generic AS in one implementation. + if (getLangOpts().OpenCL) + selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType( + getContext().VoidTy, LangAS::opencl_generic)); + IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl), @@ -1323,23 +1373,102 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, return fn; } -/* - notes.push_back(HelperInfo()); - HelperInfo ¬e = notes.back(); - note.index = capture.getIndex(); - note.RequiresCopying = (ci->hasCopyExpr() || BlockRequiresCopying(type)); - note.cxxbar_import = ci->getCopyExpr(); - - if (ci->isByRef()) { - note.flag = BLOCK_FIELD_IS_BYREF; - if (type.isObjCGCWeak()) - note.flag |= BLOCK_FIELD_IS_WEAK; - } else if (type->isBlockPointerType()) { - note.flag = BLOCK_FIELD_IS_BLOCK; - } else { - note.flag = BLOCK_FIELD_IS_OBJECT; - } - */ +namespace { + +/// Represents a type of copy/destroy operation that should be performed for an +/// entity that's captured by a block. +enum class BlockCaptureEntityKind { + CXXRecord, // Copy or destroy + ARCWeak, + ARCStrong, + BlockObject, // Assign or release + None +}; + +/// Represents a captured entity that requires extra operations in order for +/// this entity to be copied or destroyed correctly. +struct BlockCaptureManagedEntity { + BlockCaptureEntityKind Kind; + BlockFieldFlags Flags; + const BlockDecl::Capture &CI; + const CGBlockInfo::Capture &Capture; + + BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags, + const BlockDecl::Capture &CI, + const CGBlockInfo::Capture &Capture) + : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {} +}; + +} // end anonymous namespace + +static std::pair<BlockCaptureEntityKind, BlockFieldFlags> +computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, + const LangOptions &LangOpts) { + if (CI.getCopyExpr()) { + assert(!CI.isByRef()); + // don't bother computing flags + return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); + } + BlockFieldFlags Flags; + if (CI.isByRef()) { + Flags = BLOCK_FIELD_IS_BYREF; + if (T.isObjCGCWeak()) + Flags |= BLOCK_FIELD_IS_WEAK; + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + } + if (!T->isObjCRetainableType()) + // For all other types, the memcpy is fine. + return std::make_pair(BlockCaptureEntityKind::None, Flags); + + Flags = BLOCK_FIELD_IS_OBJECT; + bool isBlockPointer = T->isBlockPointerType(); + if (isBlockPointer) + Flags = BLOCK_FIELD_IS_BLOCK; + + // Special rules for ARC captures: + Qualifiers QS = T.getQualifiers(); + + // We need to register __weak direct captures with the runtime. + if (QS.getObjCLifetime() == Qualifiers::OCL_Weak) + return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); + + // We need to retain the copied value for __strong direct captures. + if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) { + // If it's a block pointer, we have to copy the block and + // assign that to the destination pointer, so we might as + // well use _Block_object_assign. Otherwise we can avoid that. + return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong + : BlockCaptureEntityKind::BlockObject, + Flags); + } + + // Non-ARC captures of retainable pointers are strong and + // therefore require a call to _Block_object_assign. + if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount) + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + + // Otherwise the memcpy is fine. + return std::make_pair(BlockCaptureEntityKind::None, Flags); +} + +/// Find the set of block captures that need to be explicitly copied or destroy. +static void findBlockCapturedManagedEntities( + const CGBlockInfo &BlockInfo, const LangOptions &LangOpts, + SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures, + llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>( + const BlockDecl::Capture &, QualType, const LangOptions &)> + Predicate) { + for (const auto &CI : BlockInfo.getBlockDecl()->captures()) { + const VarDecl *Variable = CI.getVariable(); + const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable); + if (Capture.isConstant()) + continue; + + auto Info = Predicate(CI, Variable->getType(), LangOpts); + if (Info.first != BlockCaptureEntityKind::None) + ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture); + } +} /// Generate the copy-helper function for a block closure object: /// static void block_copy_helper(block_t *dst, block_t *src); @@ -1399,78 +1528,28 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign); dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest"); - const BlockDecl *blockDecl = blockInfo.getBlockDecl(); + SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures; + findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures, + computeCopyInfoForBlockCapture); - for (const auto &CI : blockDecl->captures()) { - const VarDecl *variable = CI.getVariable(); - QualType type = variable->getType(); - - const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); - if (capture.isConstant()) continue; - - const Expr *copyExpr = CI.getCopyExpr(); - BlockFieldFlags flags; - - bool useARCWeakCopy = false; - bool useARCStrongCopy = false; - - if (copyExpr) { - assert(!CI.isByRef()); - // don't bother computing flags - - } else if (CI.isByRef()) { - flags = BLOCK_FIELD_IS_BYREF; - if (type.isObjCGCWeak()) - flags |= BLOCK_FIELD_IS_WEAK; - - } else if (type->isObjCRetainableType()) { - flags = BLOCK_FIELD_IS_OBJECT; - bool isBlockPointer = type->isBlockPointerType(); - if (isBlockPointer) - flags = BLOCK_FIELD_IS_BLOCK; - - // Special rules for ARC captures: - Qualifiers qs = type.getQualifiers(); - - // We need to register __weak direct captures with the runtime. - if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) { - useARCWeakCopy = true; - - // We need to retain the copied value for __strong direct captures. - } else if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) { - // If it's a block pointer, we have to copy the block and - // assign that to the destination pointer, so we might as - // well use _Block_object_assign. Otherwise we can avoid that. - if (!isBlockPointer) - useARCStrongCopy = true; - - // Non-ARC captures of retainable pointers are strong and - // therefore require a call to _Block_object_assign. - } else if (!qs.getObjCLifetime() && !getLangOpts().ObjCAutoRefCount) { - // fall through - - // Otherwise the memcpy is fine. - } else { - continue; - } - - // For all other types, the memcpy is fine. - } else { - continue; - } + for (const auto &CopiedCapture : CopiedCaptures) { + const BlockDecl::Capture &CI = CopiedCapture.CI; + const CGBlockInfo::Capture &capture = CopiedCapture.Capture; + BlockFieldFlags flags = CopiedCapture.Flags; unsigned index = capture.getIndex(); Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); // If there's an explicit copy expression, we do that. - if (copyExpr) { - EmitSynthesizedCXXCopyCtor(dstField, srcField, copyExpr); - } else if (useARCWeakCopy) { + if (CI.getCopyExpr()) { + assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord); + EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr()); + } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { EmitARCCopyWeak(dstField, srcField); } else { llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); - if (useARCStrongCopy) { + if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { // At -O0, store null into the destination field (so that the // storeStrong doesn't over-release) and then call storeStrong. // This is a workaround to not having an initStrong call. @@ -1491,6 +1570,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent(); } } else { + assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject); srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy); llvm::Value *dstAddr = Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy); @@ -1498,6 +1578,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) }; + const VarDecl *variable = CI.getVariable(); bool copyCanThrow = false; if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) { const Expr *copyExpr = @@ -1521,6 +1602,52 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } +static std::pair<BlockCaptureEntityKind, BlockFieldFlags> +computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, + const LangOptions &LangOpts) { + BlockFieldFlags Flags; + if (CI.isByRef()) { + Flags = BLOCK_FIELD_IS_BYREF; + if (T.isObjCGCWeak()) + Flags |= BLOCK_FIELD_IS_WEAK; + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + } + + if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) { + if (Record->hasTrivialDestructor()) + return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); + return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); + } + + // Other types don't need to be destroy explicitly. + if (!T->isObjCRetainableType()) + return std::make_pair(BlockCaptureEntityKind::None, Flags); + + Flags = BLOCK_FIELD_IS_OBJECT; + if (T->isBlockPointerType()) + Flags = BLOCK_FIELD_IS_BLOCK; + + // Special rules for ARC captures. + Qualifiers QS = T.getQualifiers(); + + // Use objc_storeStrong for __strong direct captures; the + // dynamic tools really like it when we do this. + if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) + return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags); + + // Support __weak direct captures. + if (QS.getObjCLifetime() == Qualifiers::OCL_Weak) + return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); + + // Non-ARC captures are strong, and we need to use + // _Block_object_dispose. + if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount) + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + + // Otherwise, we have nothing to do. + return std::make_pair(BlockCaptureEntityKind::None, Flags); +} + /// Generate the destroy-helper function for a block closure object: /// static void block_destroy_helper(block_t *theBlock); /// @@ -1570,79 +1697,39 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block"); - const BlockDecl *blockDecl = blockInfo.getBlockDecl(); - CodeGenFunction::RunCleanupsScope cleanups(*this); - for (const auto &CI : blockDecl->captures()) { - const VarDecl *variable = CI.getVariable(); - QualType type = variable->getType(); - - const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); - if (capture.isConstant()) continue; - - BlockFieldFlags flags; - const CXXDestructorDecl *dtor = nullptr; + SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures; + findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures, + computeDestroyInfoForBlockCapture); - bool useARCWeakDestroy = false; - bool useARCStrongDestroy = false; - - if (CI.isByRef()) { - flags = BLOCK_FIELD_IS_BYREF; - if (type.isObjCGCWeak()) - flags |= BLOCK_FIELD_IS_WEAK; - } else if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) { - if (record->hasTrivialDestructor()) - continue; - dtor = record->getDestructor(); - } else if (type->isObjCRetainableType()) { - flags = BLOCK_FIELD_IS_OBJECT; - if (type->isBlockPointerType()) - flags = BLOCK_FIELD_IS_BLOCK; - - // Special rules for ARC captures. - Qualifiers qs = type.getQualifiers(); - - // Use objc_storeStrong for __strong direct captures; the - // dynamic tools really like it when we do this. - if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) { - useARCStrongDestroy = true; - - // Support __weak direct captures. - } else if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) { - useARCWeakDestroy = true; - - // Non-ARC captures are strong, and we need to use _Block_object_dispose. - } else if (!qs.hasObjCLifetime() && !getLangOpts().ObjCAutoRefCount) { - // fall through - - // Otherwise, we have nothing to do. - } else { - continue; - } - } else { - continue; - } + for (const auto &DestroyedCapture : DestroyedCaptures) { + const BlockDecl::Capture &CI = DestroyedCapture.CI; + const CGBlockInfo::Capture &capture = DestroyedCapture.Capture; + BlockFieldFlags flags = DestroyedCapture.Flags; Address srcField = Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); - // If there's an explicit copy expression, we do that. - if (dtor) { - PushDestructorCleanup(dtor, srcField); + // If the captured record has a destructor then call it. + if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) { + const auto *Dtor = + CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor(); + PushDestructorCleanup(Dtor, srcField); - // If this is a __weak capture, emit the release directly. - } else if (useARCWeakDestroy) { + // If this is a __weak capture, emit the release directly. + } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { EmitARCDestroyWeak(srcField); // Destroy strong objects with a call if requested. - } else if (useARCStrongDestroy) { + } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { EmitARCDestroyStrong(srcField, ARCImpreciseLifetime); // Otherwise we call _Block_object_dispose. It wouldn't be too // hard to just emit this as a cleanup if we wanted to make sure // that things were done in reverse. } else { + assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject); llvm::Value *value = Builder.CreateLoad(srcField); value = Builder.CreateBitCast(value, VoidPtrTy); BuildBlockRelease(value, flags); diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index b3d02f1f51c61..6ea0a325a4296 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -420,10 +420,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType) { + llvm::IntegerType *ResType, + llvm::Value *EmittedE) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -432,9 +433,14 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, /// - A llvm::Argument (if E is a param with the pass_object_size attribute on /// it) /// - A call to the @llvm.objectsize intrinsic +/// +/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null +/// and we wouldn't otherwise try to reference a pass_object_size parameter, +/// we'll call @llvm.objectsize on EmittedE, rather than emitting E. llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType) { + llvm::IntegerType *ResType, + llvm::Value *EmittedE) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -457,16 +463,20 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. - if (Type == 3 || E->HasSideEffects(getContext())) + if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) return getDefaultBuiltinObjectSizeResult(Type, ResType); - // LLVM only supports 0 and 2, make sure that we pass along that - // as a boolean. - auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); - // FIXME: Get right address space. - llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); - return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); + Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); + assert(Ptr->getType()->isPointerTy() && + "Non-pointer passed to __builtin_object_size?"); + + Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + + // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. + Value *Min = Builder.getInt1((Type & 2) != 0); + // For GCC compatability, __builtin_object_size treat NULL as unknown size. + Value *NullIsUnknown = Builder.getTrue(); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); } // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we @@ -482,10 +492,12 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _interlockedbittestandset, + __fastfail, }; Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, - const CallExpr *E) { + const CallExpr *E) { switch (BuiltinID) { case MSVCIntrin::_BitScanForward: case MSVCIntrin::_BitScanReverse: { @@ -548,6 +560,22 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); + case MSVCIntrin::_interlockedbittestandset: { + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Or, Addr, + Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), + llvm::AtomicOrdering::SequentiallyConsistent); + // Shift the relevant bit to the least significant position, truncate to + // the result type, and test the low bit. + llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); + llvm::Value *Truncated = + Builder.CreateTrunc(Shifted, ConvertType(E->getType())); + return Builder.CreateAnd(Truncated, + ConstantInt::get(Truncated->getType(), 1)); + } + case MSVCIntrin::_InterlockedDecrement: { llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( @@ -566,6 +594,37 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, llvm::AtomicOrdering::SequentiallyConsistent); return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); } + + case MSVCIntrin::__fastfail: { + // Request immediate process termination from the kernel. The instruction + // sequences to do this are documented on MSDN: + // https://msdn.microsoft.com/en-us/library/dn774154.aspx + llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); + StringRef Asm, Constraints; + switch (ISA) { + default: + ErrorUnsupported(E, "__fastfail call for this architecture"); + break; + case llvm::Triple::x86: + case llvm::Triple::x86_64: + Asm = "int $$0x29"; + Constraints = "{cx}"; + break; + case llvm::Triple::thumb: + Asm = "udf #251"; + Constraints = "{r0}"; + break; + } + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CS.setAttributes(NoReturnAttr); + return CS.getInstruction(); + } } llvm_unreachable("Incorrect MSVC intrinsic!"); } @@ -932,7 +991,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. - return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); + return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, + /*EmittedE=*/nullptr)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -2195,16 +2255,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedXor16: case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); - case Builtin::BI__readfsdword: { - llvm::Type *IntTy = ConvertType(E->getType()); - Value *IntToPtr = - Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 257)); - LoadInst *Load = Builder.CreateAlignedLoad( - IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType())); - Load->setVolatile(true); - return RValue::get(Load); - } + case Builtin::BI_interlockedbittestandset: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); case Builtin::BI__exception_code: case Builtin::BI_exception_code: @@ -2218,9 +2271,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_setjmpex: { if (getTarget().getTriple().isOSMSVCRT()) { llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); @@ -2238,9 +2291,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI_setjmp: { if (getTarget().getTriple().isOSMSVCRT()) { - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Value *Buf = Builder.CreateBitOrPointerCast( EmitScalarExpr(E->getArg(0)), Int8PtrTy); llvm::CallSite CS; @@ -2276,6 +2329,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } + case Builtin::BI__fastfail: + return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); + case Builtin::BI__builtin_coro_size: { auto & Context = getContext(); auto SizeTy = Context.getSizeType(); @@ -2492,25 +2548,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned NumArgs = E->getNumArgs(); llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); - llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); - llvm::Value *Range = EmitScalarExpr(E->getArg(2)); + LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); + llvm::Value *Range = NDRangeL.getAddress().getPointer(); + llvm::Type *RangeTy = NDRangeL.getAddress().getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); - return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); + AttrBuilder B; + B.addAttribute(Attribute::ByVal); + llvm::AttributeList ByValAttrSet = + llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); + + auto RTCall = + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), + {Queue, Flags, Range, Block}); + RTCall->setAttributes(ByValAttrSet); + return RValue::get(RTCall); } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); @@ -2518,14 +2585,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, - IntTy}; + std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, + GenericVoidPtrTy, IntTy}; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -2555,12 +2622,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); - std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy, - Int32Ty, EventPtrTy, EventPtrTy, - Int8PtrTy}; + std::vector<llvm::Type *> ArgTys = { + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrTy, EventPtrTy, GenericVoidPtrTy}; std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, EventList, ClkEvent, Block}; @@ -2596,26 +2663,30 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), - "__get_kernel_work_group_size_impl"), - Arg)); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + "__get_kernel_work_group_size_impl"), + Arg)); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), "__get_kernel_preferred_work_group_multiple_impl"), Arg)); } case Builtin::BIprintf: - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) - return EmitCUDADevicePrintfCallExpr(E, ReturnValue); + if (getTarget().getTriple().isNVPTX()) + return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: @@ -7115,6 +7186,13 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, return EmitX86Select(CGF, Ops[3], Res, Ops[2]); } +static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, + llvm::Type *DstTy) { + unsigned NumberOfElements = DstTy->getVectorNumElements(); + Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); + return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -7321,7 +7399,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - return UndefValue::get(ConvertType(E->getType())); + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return llvm::Constant::getNullValue(ConvertType(E->getType())); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: @@ -7408,6 +7491,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -7922,6 +8020,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // instruction, but it will create a memset that won't be optimized away. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); } + case X86::BI__ud2: + // llvm.trap makes a ud2a instruction on x86. + return EmitTrapCall(Intrinsic::trap); + case X86::BI__int2c: { + // This syscall signals a driver assertion failure in x86 NT kernels. + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + CallSite CS = Builder.CreateCall(IA); + CS.setAttributes(NoReturnAttr); + return CS.getInstruction(); + } + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 257)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 256)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } } } @@ -8326,6 +8463,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp: { + llvm::SmallVector<llvm::Value *, 5> Args; + for (unsigned I = 0; I != 5; ++I) + Args.push_back(EmitScalarExpr(E->getArg(I))); + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, + Args[0]->getType()); + return Builder.CreateCall(F, Args); + } case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: @@ -8391,7 +8536,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); - + case AMDGPU::BI__builtin_amdgcn_fmed3f: + case AMDGPU::BI__builtin_amdgcn_fmed3h: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 83febcb4af8cf..813cd74001867 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -15,7 +15,7 @@ #include "CGCUDARuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 59010f4407c29..0f3141ab76d0d 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -256,7 +256,7 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor( return GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, - /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeSet(), IsForDefinition); + /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); } static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index d53fd4cb63b2f..7b912e3aca576 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -291,11 +291,26 @@ public: /// Emit constructor variants required by this ABI. virtual void EmitCXXConstructors(const CXXConstructorDecl *D) = 0; + /// Notes how many arguments were added to the beginning (Prefix) and ending + /// (Suffix) of an arg list. + /// + /// Note that Prefix actually refers to the number of args *after* the first + /// one: `this` arguments always come first. + struct AddedStructorArgs { + unsigned Prefix = 0; + unsigned Suffix = 0; + AddedStructorArgs() = default; + AddedStructorArgs(unsigned P, unsigned S) : Prefix(P), Suffix(S) {} + static AddedStructorArgs prefix(unsigned N) { return {N, 0}; } + static AddedStructorArgs suffix(unsigned N) { return {0, N}; } + }; + /// Build the signature of the given constructor or destructor variant by /// adding any required parameters. For convenience, ArgTys has been /// initialized with the type of 'this'. - virtual void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl<CanQualType> &ArgTys) = 0; + virtual AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl<CanQualType> &ArgTys) = 0; /// Returns true if the given destructor type should be emitted as a linkonce /// delegating thunk, regardless of whether the dtor is defined in this TU or @@ -355,9 +370,9 @@ public: /// Add any ABI-specific implicit arguments needed to call a constructor. /// - /// \return The number of args added to the call, which is typically zero or - /// one. - virtual unsigned + /// \return The number of arguments added at the beginning and end of the + /// call, which is typically zero or one. + virtual AddedStructorArgs addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) = 0; @@ -377,7 +392,7 @@ public: isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) = 0; - /// Checks if ABI requires to initilize vptrs for given dynamic class. + /// Checks if ABI requires to initialize vptrs for given dynamic class. virtual bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) = 0; /// Get the address point of the vtable for the given base subobject. diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index c7c61e0c8ecb1..8af32055fc4c2 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -101,39 +101,64 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) { FTNP->getExtInfo(), {}, RequiredArgs(0)); } +static void addExtParameterInfosForCall( + llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, + const FunctionProtoType *proto, + unsigned prefixArgs, + unsigned totalArgs) { + assert(proto->hasExtParameterInfos()); + assert(paramInfos.size() <= prefixArgs); + assert(proto->getNumParams() + prefixArgs <= totalArgs); + + paramInfos.reserve(totalArgs); + + // Add default infos for any prefix args that don't already have infos. + paramInfos.resize(prefixArgs); + + // Add infos for the prototype. + for (const auto &ParamInfo : proto->getExtParameterInfos()) { + paramInfos.push_back(ParamInfo); + // pass_object_size params have no parameter info. + if (ParamInfo.hasPassObjectSize()) + paramInfos.emplace_back(); + } + + assert(paramInfos.size() <= totalArgs && + "Did we forget to insert pass_object_size args?"); + // Add default infos for the variadic and/or suffix arguments. + paramInfos.resize(totalArgs); +} + /// Adds the formal paramaters in FPT to the given prefix. If any parameter in /// FPT has pass_object_size attrs, then we'll add parameters for those, too. static void appendParameterTypes(const CodeGenTypes &CGT, SmallVectorImpl<CanQualType> &prefix, SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, - CanQual<FunctionProtoType> FPT, - const FunctionDecl *FD) { - // Fill out paramInfos. - if (FPT->hasExtParameterInfos() || !paramInfos.empty()) { - assert(paramInfos.size() <= prefix.size()); - auto protoParamInfos = FPT->getExtParameterInfos(); - paramInfos.reserve(prefix.size() + protoParamInfos.size()); - paramInfos.resize(prefix.size()); - paramInfos.append(protoParamInfos.begin(), protoParamInfos.end()); - } - - // Fast path: unknown target. - if (FD == nullptr) { + CanQual<FunctionProtoType> FPT) { + // Fast path: don't touch param info if we don't need to. + if (!FPT->hasExtParameterInfos()) { + assert(paramInfos.empty() && + "We have paramInfos, but the prototype doesn't?"); prefix.append(FPT->param_type_begin(), FPT->param_type_end()); return; } - // In the vast majority cases, we'll have precisely FPT->getNumParams() + unsigned PrefixSize = prefix.size(); + // In the vast majority of cases, we'll have precisely FPT->getNumParams() // parameters; the only thing that can change this is the presence of // pass_object_size. So, we preallocate for the common case. prefix.reserve(prefix.size() + FPT->getNumParams()); - assert(FD->getNumParams() == FPT->getNumParams()); + auto ExtInfos = FPT->getExtParameterInfos(); + assert(ExtInfos.size() == FPT->getNumParams()); for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) { prefix.push_back(FPT->getParamType(I)); - if (FD->getParamDecl(I)->hasAttr<PassObjectSizeAttr>()) + if (ExtInfos[I].hasPassObjectSize()) prefix.push_back(CGT.getContext().getSizeType()); } + + addExtParameterInfosForCall(paramInfos, FPT.getTypePtr(), PrefixSize, + prefix.size()); } /// Arrange the LLVM function layout for a value of the given function @@ -147,7 +172,7 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); // FIXME: Kill copy. - appendParameterTypes(CGT, prefix, paramInfos, FTP, FD); + appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod, @@ -286,9 +311,19 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, // Add the formal parameters. if (PassParams) - appendParameterTypes(*this, argTypes, paramInfos, FTP, MD); - - TheCXXABI.buildStructorSignature(MD, Type, argTypes); + appendParameterTypes(*this, argTypes, paramInfos, FTP); + + CGCXXABI::AddedStructorArgs AddedArgs = + TheCXXABI.buildStructorSignature(MD, Type, argTypes); + if (!paramInfos.empty()) { + // Note: prefix implies after the first param. + if (AddedArgs.Prefix) + paramInfos.insert(paramInfos.begin() + 1, AddedArgs.Prefix, + FunctionProtoType::ExtParameterInfo{}); + if (AddedArgs.Suffix) + paramInfos.append(AddedArgs.Suffix, + FunctionProtoType::ExtParameterInfo{}); + } RequiredArgs required = (PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size()) @@ -321,26 +356,6 @@ getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) { return argTypes; } -static void addExtParameterInfosForCall( - llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, - const FunctionProtoType *proto, - unsigned prefixArgs, - unsigned totalArgs) { - assert(proto->hasExtParameterInfos()); - assert(paramInfos.size() <= prefixArgs); - assert(proto->getNumParams() + prefixArgs <= totalArgs); - - // Add default infos for any prefix args that don't already have infos. - paramInfos.resize(prefixArgs); - - // Add infos for the prototype. - auto protoInfos = proto->getExtParameterInfos(); - paramInfos.append(protoInfos.begin(), protoInfos.end()); - - // Add default infos for the variadic arguments. - paramInfos.resize(totalArgs); -} - static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> getExtParameterInfosForCall(const FunctionProtoType *proto, unsigned prefixArgs, unsigned totalArgs) { @@ -352,18 +367,31 @@ getExtParameterInfosForCall(const FunctionProtoType *proto, } /// Arrange a call to a C++ method, passing the given arguments. +/// +/// ExtraPrefixArgs is the number of ABI-specific args passed after the `this` +/// parameter. +/// ExtraSuffixArgs is the number of ABI-specific args passed at the end of +/// args. +/// PassProtoArgs indicates whether `args` has args for the parameters in the +/// given CXXConstructorDecl. const CGFunctionInfo & CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, const CXXConstructorDecl *D, CXXCtorType CtorKind, - unsigned ExtraArgs) { + unsigned ExtraPrefixArgs, + unsigned ExtraSuffixArgs, + bool PassProtoArgs) { // FIXME: Kill copy. SmallVector<CanQualType, 16> ArgTypes; for (const auto &Arg : args) ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); + // +1 for implicit this, which should always be args[0]. + unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs; + CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D); + RequiredArgs Required = + RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D); GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -372,8 +400,14 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, : Context.VoidTy; FunctionType::ExtInfo Info = FPT->getExtInfo(); - auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs, - ArgTypes.size()); + llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos; + // If the prototype args are elided, we should only have ABI-specific args, + // which never have param info. + if (PassProtoArgs && FPT->hasExtParameterInfos()) { + // ABI-specific suffix arguments are treated the same as variadic arguments. + addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs, + ArgTypes.size()); + } return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true, /*chainCall=*/false, ArgTypes, Info, ParamInfos, Required); @@ -617,15 +651,20 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, } /// Arrange a call to a C++ method, passing the given arguments. +/// +/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It +/// does not count `this`. const CGFunctionInfo & CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, const FunctionProtoType *proto, - RequiredArgs required) { - unsigned numRequiredArgs = - (proto->isVariadic() ? required.getNumRequiredArgs() : args.size()); - unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams(); + RequiredArgs required, + unsigned numPrefixArgs) { + assert(numPrefixArgs + 1 <= args.size() && + "Emitting a call with less args than the required prefix?"); + // Add one to account for `this`. It's a bit awkward here, but we don't count + // `this` in similar places elsewhere. auto paramInfos = - getExtParameterInfosForCall(proto, numPrefixArgs, args.size()); + getExtParameterInfosForCall(proto, numPrefixArgs + 1, args.size()); // FIXME: Kill copy. auto argTypes = getArgTypesForCall(Context, args); @@ -680,7 +719,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { assert(std::all_of(argTypes.begin(), argTypes.end(), - std::mem_fun_ref(&CanQualType::isCanonicalAsParam))); + [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; @@ -1620,15 +1659,113 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } +void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. + if (!HasOptnone) { + if (CodeGenOpts.OptimizeSize) + FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize); + if (CodeGenOpts.OptimizeSize == 2) + FuncAttrs.addAttribute(llvm::Attribute::MinSize); + } + + if (CodeGenOpts.DisableRedZone) + FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); + if (CodeGenOpts.NoImplicitFloat) + FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); + + if (AttrOnCallSite) { + // Attributes that should go on the call site only. + if (!CodeGenOpts.SimplifyLibCalls || + CodeGenOpts.isNoBuiltinFunc(Name.data())) + FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin); + if (!CodeGenOpts.TrapFuncName.empty()) + FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); + } else { + // Attributes that should go on the function, but not the call site. + if (!CodeGenOpts.DisableFPElim) { + FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); + } else if (CodeGenOpts.OmitLeafFramePointer) { + FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); + FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + } else { + FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); + FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + } + + FuncAttrs.addAttribute("less-precise-fpmad", + llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); + + if (!CodeGenOpts.FPDenormalMode.empty()) + FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode); + + FuncAttrs.addAttribute("no-trapping-math", + llvm::toStringRef(CodeGenOpts.NoTrappingMath)); + + // TODO: Are these all needed? + // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. + FuncAttrs.addAttribute("no-infs-fp-math", + llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); + FuncAttrs.addAttribute("no-nans-fp-math", + llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); + FuncAttrs.addAttribute("unsafe-fp-math", + llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); + FuncAttrs.addAttribute("use-soft-float", + llvm::toStringRef(CodeGenOpts.SoftFloat)); + FuncAttrs.addAttribute("stack-protector-buffer-size", + llvm::utostr(CodeGenOpts.SSPBufferSize)); + FuncAttrs.addAttribute("no-signed-zeros-fp-math", + llvm::toStringRef(CodeGenOpts.NoSignedZeros)); + FuncAttrs.addAttribute( + "correctly-rounded-divide-sqrt-fp-math", + llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); + + // TODO: Reciprocal estimate codegen options should apply to instructions? + std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals; + if (!Recips.empty()) + FuncAttrs.addAttribute("reciprocal-estimates", + llvm::join(Recips.begin(), Recips.end(), ",")); + + if (CodeGenOpts.StackRealignment) + FuncAttrs.addAttribute("stackrealign"); + if (CodeGenOpts.Backchain) + FuncAttrs.addAttribute("backchain"); + } + + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + + // Exceptions aren't supported in CUDA device code. + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + + // Respect -fcuda-flush-denormals-to-zero. + if (getLangOpts().CUDADeviceFlushDenormalsToZero) + FuncAttrs.addAttribute("nvptx-f32ftz", "true"); + } +} + +void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { + llvm::AttrBuilder FuncAttrs; + ConstructDefaultFnAttrList(F.getName(), + F.hasFnAttribute(llvm::Attribute::OptimizeNone), + /* AttrOnCallsite = */ false, FuncAttrs); + llvm::AttributeList AS = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); + F.addAttributes(llvm::AttributeList::FunctionIndex, AS); +} + void CodeGenModule::ConstructAttributeList( StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo, AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) { llvm::AttrBuilder FuncAttrs; llvm::AttrBuilder RetAttrs; - bool HasOptnone = false; CallingConv = FI.getEffectiveCallingConvention(); - if (FI.isNoReturn()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); @@ -1639,7 +1776,7 @@ void CodeGenModule::ConstructAttributeList( const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); - bool HasAnyX86InterruptAttr = false; + bool HasOptnone = false; // FIXME: handle sseregparm someday... if (TargetDecl) { if (TargetDecl->hasAttr<ReturnsTwiceAttr>()) @@ -1679,7 +1816,6 @@ void CodeGenModule::ConstructAttributeList( if (TargetDecl->hasAttr<ReturnsNonNullAttr>()) RetAttrs.addAttribute(llvm::Attribute::NonNull); - HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>(); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { Optional<unsigned> NumElemsParam; @@ -1691,86 +1827,19 @@ void CodeGenModule::ConstructAttributeList( } } - // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. - if (!HasOptnone) { - if (CodeGenOpts.OptimizeSize) - FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize); - if (CodeGenOpts.OptimizeSize == 2) - FuncAttrs.addAttribute(llvm::Attribute::MinSize); - } + ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); - if (CodeGenOpts.DisableRedZone) - FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); - if (CodeGenOpts.NoImplicitFloat) - FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); if (CodeGenOpts.EnableSegmentedStacks && !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) FuncAttrs.addAttribute("split-stack"); - if (AttrOnCallSite) { - // Attributes that should go on the call site only. - if (!CodeGenOpts.SimplifyLibCalls || - CodeGenOpts.isNoBuiltinFunc(Name.data())) - FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin); - if (!CodeGenOpts.TrapFuncName.empty()) - FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); - } else { - // Attributes that should go on the function, but not the call site. - if (!CodeGenOpts.DisableFPElim) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - } else if (CodeGenOpts.OmitLeafFramePointer) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } else { - FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } - + if (!AttrOnCallSite) { bool DisableTailCalls = - CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr || - (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>()); - FuncAttrs.addAttribute( - "disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); - - FuncAttrs.addAttribute("less-precise-fpmad", - llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); - - if (!CodeGenOpts.FPDenormalMode.empty()) - FuncAttrs.addAttribute("denormal-fp-math", - CodeGenOpts.FPDenormalMode); - - FuncAttrs.addAttribute("no-trapping-math", - llvm::toStringRef(CodeGenOpts.NoTrappingMath)); - - // TODO: Are these all needed? - // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. - FuncAttrs.addAttribute("no-infs-fp-math", - llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); - FuncAttrs.addAttribute("no-nans-fp-math", - llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); - FuncAttrs.addAttribute("unsafe-fp-math", - llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); - FuncAttrs.addAttribute("use-soft-float", - llvm::toStringRef(CodeGenOpts.SoftFloat)); - FuncAttrs.addAttribute("stack-protector-buffer-size", - llvm::utostr(CodeGenOpts.SSPBufferSize)); - FuncAttrs.addAttribute("no-signed-zeros-fp-math", - llvm::toStringRef(CodeGenOpts.NoSignedZeros)); - FuncAttrs.addAttribute( - "correctly-rounded-divide-sqrt-fp-math", - llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); - - // TODO: Reciprocal estimate codegen options should apply to instructions? - std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals; - if (!Recips.empty()) - FuncAttrs.addAttribute("reciprocal-estimates", - llvm::join(Recips.begin(), Recips.end(), ",")); - - if (CodeGenOpts.StackRealignment) - FuncAttrs.addAttribute("stackrealign"); - if (CodeGenOpts.Backchain) - FuncAttrs.addAttribute("backchain"); + CodeGenOpts.DisableTailCalls || + (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() || + TargetDecl->hasAttr<AnyX86InterruptAttr>())); + FuncAttrs.addAttribute("disable-tail-calls", + llvm::toStringRef(DisableTailCalls)); // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then @@ -1819,21 +1888,6 @@ void CodeGenModule::ConstructAttributeList( } } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all functions and calls in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as - // __syncthreads(), and so can't have certain optimizations applied around - // them). LLVM will remove this attribute where it safely can. - FuncAttrs.addAttribute(llvm::Attribute::Convergent); - - // Exceptions aren't supported in CUDA device code. - FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); - - // Respect -fcuda-flush-denormals-to-zero. - if (getLangOpts().CUDADeviceFlushDenormalsToZero) - FuncAttrs.addAttribute("nvptx-f32ftz", "true"); - } - ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); @@ -1878,8 +1932,8 @@ void CodeGenModule::ConstructAttributeList( // Attach return attributes. if (RetAttrs.hasAttributes()) { - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs)); + PAL.push_back(llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::ReturnIndex, RetAttrs)); } bool hasUsedSRet = false; @@ -1891,7 +1945,7 @@ void CodeGenModule::ConstructAttributeList( hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); - PAL.push_back(llvm::AttributeSet::get( + PAL.push_back(llvm::AttributeList::get( getLLVMContext(), IRFunctionArgs.getSRetArgNo() + 1, SRETAttrs)); } @@ -1899,7 +1953,7 @@ void CodeGenModule::ConstructAttributeList( if (IRFunctionArgs.hasInallocaArg()) { llvm::AttrBuilder Attrs; Attrs.addAttribute(llvm::Attribute::InAlloca); - PAL.push_back(llvm::AttributeSet::get( + PAL.push_back(llvm::AttributeList::get( getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs)); } @@ -1914,7 +1968,7 @@ void CodeGenModule::ConstructAttributeList( // Add attribute for padding argument, if necessary. if (IRFunctionArgs.hasPaddingArg(ArgNo)) { if (AI.getPaddingInReg()) - PAL.push_back(llvm::AttributeSet::get( + PAL.push_back(llvm::AttributeList::get( getLLVMContext(), IRFunctionArgs.getPaddingArgNo(ArgNo) + 1, llvm::Attribute::InReg)); } @@ -2031,17 +2085,15 @@ void CodeGenModule::ConstructAttributeList( unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); for (unsigned i = 0; i < NumIRArgs; i++) - PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), - FirstIRArg + i + 1, Attrs)); + PAL.push_back(llvm::AttributeList::get(getLLVMContext(), + FirstIRArg + i + 1, Attrs)); } } assert(ArgNo == FI.arg_size()); if (FuncAttrs.hasAttributes()) - PAL.push_back(llvm:: - AttributeSet::get(getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - FuncAttrs)); + PAL.push_back(llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs)); } /// An argument came in as a promoted argument; demote it back to its @@ -2152,8 +2204,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (IRFunctionArgs.hasSRetArg()) { auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]); AI->setName("agg.result"); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1, - llvm::Attribute::NoAlias)); + AI->addAttr(llvm::AttributeList::get(getLLVMContext(), AI->getArgNo() + 1, + llvm::Attribute::NoAlias)); } // Track if we received the parameter as a pointer (indirect, byval, or @@ -2244,9 +2296,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), PVD->getFunctionScopeIndex())) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::AttributeList::get(getLLVMContext(), + AI->getArgNo() + 1, + llvm::Attribute::NonNull)); QualType OTy = PVD->getOriginalType(); if (const auto *ArrTy = @@ -2263,12 +2315,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::AttrBuilder Attrs; Attrs.addDereferenceableAttr( getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, Attrs)); + AI->addAttr(llvm::AttributeList::get( + getLLVMContext(), AI->getArgNo() + 1, Attrs)); } else if (getContext().getTargetAddressSpace(ETy) == 0) { - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::AttributeList::get(getLLVMContext(), + AI->getArgNo() + 1, + llvm::Attribute::NonNull)); } } } else if (const auto *ArrTy = @@ -2278,9 +2330,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // we know that it must be nonnull. if (ArrTy->getSizeModifier() == VariableArrayType::Static && !getContext().getTargetAddressSpace(ArrTy->getElementType())) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::AttributeList::get(getLLVMContext(), + AI->getArgNo() + 1, + llvm::Attribute::NonNull)); } const auto *AVAttr = PVD->getAttr<AlignValueAttr>(); @@ -2298,15 +2350,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::AttrBuilder Attrs; Attrs.addAlignmentAttr(Alignment); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, Attrs)); + AI->addAttr(llvm::AttributeList::get(getLLVMContext(), + AI->getArgNo() + 1, Attrs)); } } if (Arg->getType().isRestrictQualified()) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NoAlias)); + AI->addAttr(llvm::AttributeList::get( + getLLVMContext(), AI->getArgNo() + 1, llvm::Attribute::NoAlias)); // LLVM expects swifterror parameters to be used in very restricted // ways. Copy the value into a less-restricted temporary. @@ -2858,19 +2909,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, llvm::Instruction *Ret; if (RV) { - if (CurCodeDecl && SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) { - if (auto RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>()) { - SanitizerScope SanScope(this); - llvm::Value *Cond = Builder.CreateICmpNE( - RV, llvm::Constant::getNullValue(RV->getType())); - llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(EndLoc), - EmitCheckSourceLocation(RetNNAttr->getLocation()), - }; - EmitCheck(std::make_pair(Cond, SanitizerKind::ReturnsNonnullAttribute), - SanitizerHandler::NonnullReturn, StaticData, None); - } - } + EmitReturnValueCheck(RV, EndLoc); Ret = Builder.CreateRet(RV); } else { Ret = Builder.CreateRetVoid(); @@ -2880,6 +2919,63 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, Ret->setDebugLoc(std::move(RetDbgLoc)); } +void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV, + SourceLocation EndLoc) { + // A current decl may not be available when emitting vtable thunks. + if (!CurCodeDecl) + return; + + ReturnsNonNullAttr *RetNNAttr = nullptr; + if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) + RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>(); + + if (!RetNNAttr && !requiresReturnValueNullabilityCheck()) + return; + + // Prefer the returns_nonnull attribute if it's present. + SourceLocation AttrLoc; + SanitizerMask CheckKind; + SanitizerHandler Handler; + if (RetNNAttr) { + assert(!requiresReturnValueNullabilityCheck() && + "Cannot check nullability and the nonnull attribute"); + AttrLoc = RetNNAttr->getLocation(); + CheckKind = SanitizerKind::ReturnsNonnullAttribute; + Handler = SanitizerHandler::NonnullReturn; + } else { + if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl)) + if (auto *TSI = DD->getTypeSourceInfo()) + if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>()) + AttrLoc = FTL.getReturnLoc().findNullabilityLoc(); + CheckKind = SanitizerKind::NullabilityReturn; + Handler = SanitizerHandler::NullabilityReturn; + } + + SanitizerScope SanScope(this); + + llvm::BasicBlock *Check = nullptr; + llvm::BasicBlock *NoCheck = nullptr; + if (requiresReturnValueNullabilityCheck()) { + // Before doing the nullability check, make sure that the preconditions for + // the check are met. + Check = createBasicBlock("nullcheck"); + NoCheck = createBasicBlock("no.nullcheck"); + Builder.CreateCondBr(RetValNullabilityPrecondition, Check, NoCheck); + EmitBlock(Check); + } + + // Now do the null check. If the returns_nonnull attribute is present, this + // is done unconditionally. + llvm::Value *Cond = Builder.CreateIsNotNull(RV); + llvm::Constant *StaticData[] = { + EmitCheckSourceLocation(EndLoc), EmitCheckSourceLocation(AttrLoc), + }; + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None); + + if (requiresReturnValueNullabilityCheck()) + EmitBlock(NoCheck); +} + static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) { const CXXRecordDecl *RD = type->getAsCXXRecordDecl(); return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory; @@ -3188,50 +3284,63 @@ void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const { void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, - const FunctionDecl *FD, + AbstractCallee AC, unsigned ParmNum) { - if (!SanOpts.has(SanitizerKind::NonnullAttribute) || !FD) + if (!AC.getDecl() || !(SanOpts.has(SanitizerKind::NonnullAttribute) || + SanOpts.has(SanitizerKind::NullabilityArg))) return; - auto PVD = ParmNum < FD->getNumParams() ? FD->getParamDecl(ParmNum) : nullptr; + + // The param decl may be missing in a variadic function. + auto PVD = ParmNum < AC.getNumParams() ? AC.getParamDecl(ParmNum) : nullptr; unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum; - auto NNAttr = getNonNullAttr(FD, PVD, ArgType, ArgNo); - if (!NNAttr) + + // Prefer the nonnull attribute if it's present. + const NonNullAttr *NNAttr = nullptr; + if (SanOpts.has(SanitizerKind::NonnullAttribute)) + NNAttr = getNonNullAttr(AC.getDecl(), PVD, ArgType, ArgNo); + + bool CanCheckNullability = false; + if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) { + auto Nullability = PVD->getType()->getNullability(getContext()); + CanCheckNullability = Nullability && + *Nullability == NullabilityKind::NonNull && + PVD->getTypeSourceInfo(); + } + + if (!NNAttr && !CanCheckNullability) return; + + SourceLocation AttrLoc; + SanitizerMask CheckKind; + SanitizerHandler Handler; + if (NNAttr) { + AttrLoc = NNAttr->getLocation(); + CheckKind = SanitizerKind::NonnullAttribute; + Handler = SanitizerHandler::NonnullArg; + } else { + AttrLoc = PVD->getTypeSourceInfo()->getTypeLoc().findNullabilityLoc(); + CheckKind = SanitizerKind::NullabilityArg; + Handler = SanitizerHandler::NullabilityArg; + } + SanitizerScope SanScope(this); assert(RV.isScalar()); llvm::Value *V = RV.getScalarVal(); llvm::Value *Cond = Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType())); llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(ArgLoc), - EmitCheckSourceLocation(NNAttr->getLocation()), + EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc), llvm::ConstantInt::get(Int32Ty, ArgNo + 1), }; - EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute), - SanitizerHandler::NonnullArg, StaticData, None); + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None); } void CodeGenFunction::EmitCallArgs( CallArgList &Args, ArrayRef<QualType> ArgTypes, llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange, - const FunctionDecl *CalleeDecl, unsigned ParamsToSkip, - EvaluationOrder Order) { + AbstractCallee AC, unsigned ParamsToSkip, EvaluationOrder Order) { assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin())); - auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg) { - if (CalleeDecl == nullptr || I >= CalleeDecl->getNumParams()) - return; - auto *PS = CalleeDecl->getParamDecl(I)->getAttr<PassObjectSizeAttr>(); - if (PS == nullptr) - return; - - const auto &Context = getContext(); - auto SizeTy = Context.getSizeType(); - auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T); - Args.add(RValue::get(V), SizeTy); - }; - // We *have* to evaluate arguments from right to left in the MS C++ ABI, // because arguments are destroyed left to right in the callee. As a special // case, there are certain language constructs that require left-to-right @@ -3242,6 +3351,27 @@ void CodeGenFunction::EmitCallArgs( ? Order == EvaluationOrder::ForceLeftToRight : Order != EvaluationOrder::ForceRightToLeft; + auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg, + RValue EmittedArg) { + if (!AC.hasFunctionDecl() || I >= AC.getNumParams()) + return; + auto *PS = AC.getParamDecl(I)->getAttr<PassObjectSizeAttr>(); + if (PS == nullptr) + return; + + const auto &Context = getContext(); + auto SizeTy = Context.getSizeType(); + auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); + assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?"); + llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T, + EmittedArg.getScalarVal()); + Args.add(RValue::get(V), SizeTy); + // If we're emitting args in reverse, be sure to do so with + // pass_object_size, as well. + if (!LeftToRight) + std::swap(Args.back(), *(&Args.back() - 1)); + }; + // Insert a stack save if we're going to need any inalloca args. bool HasInAllocaArgs = false; if (CGM.getTarget().getCXXABI().isMicrosoft()) { @@ -3259,11 +3389,20 @@ void CodeGenFunction::EmitCallArgs( for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) { unsigned Idx = LeftToRight ? I : E - I - 1; CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx; - if (!LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg); + unsigned InitialArgSize = Args.size(); EmitCallArg(Args, *Arg, ArgTypes[Idx]); - EmitNonNullArgCheck(Args.back().RV, ArgTypes[Idx], (*Arg)->getExprLoc(), - CalleeDecl, ParamsToSkip + Idx); - if (LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg); + // In particular, we depend on it being the last arg in Args, and the + // objectsize bits depend on there only being one arg if !LeftToRight. + assert(InitialArgSize + 1 == Args.size() && + "The code below depends on only adding one arg per EmitCallArg"); + (void)InitialArgSize; + RValue RVArg = Args.back().RV; + EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC, + ParamsToSkip + Idx); + // @llvm.objectsize should never have side-effects and shouldn't need + // destruction/cleanups, so we can safely "emit" it after its arg, + // regardless of right-to-leftness + MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg); } if (!LeftToRight) { @@ -3571,12 +3710,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address ArgMemory = Address::invalid(); const llvm::StructLayout *ArgMemoryLayout = nullptr; if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { - ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct); + const llvm::DataLayout &DL = CGM.getDataLayout(); + ArgMemoryLayout = DL.getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); llvm::AllocaInst *AI; if (IP) { IP = IP->getNextNode(); - AI = new llvm::AllocaInst(ArgStruct, "argmem", IP); + AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(), + "argmem", IP); } else { AI = CreateTempAlloca(ArgStruct, "argmem"); } @@ -3977,8 +4118,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Callee.getAbstractInfo(), AttributeList, CallingConv, /*AttrOnCallSite=*/true); - llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(), - AttributeList); + llvm::AttributeList Attrs = + llvm::AttributeList::get(getLLVMContext(), AttributeList); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -3989,15 +4130,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, !(Callee.getAbstractInfo().getCalleeDecl() && Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) { Attrs = - Attrs.addAttribute(getLLVMContext(), - llvm::AttributeSet::FunctionIndex, + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); } // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex, + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoInline); } @@ -4014,7 +4154,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = true; } else { // Otherwise, nounwind call sites will never throw. - CannotThrow = Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex, + CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); } llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); @@ -4210,6 +4350,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(), OffsetValue); + } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { + llvm::Value *ParamVal = + CallArgs[AA->getParamIndex() - 1].RV.getScalarVal(); + EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal); } } diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index 031ce831cb375..97221e20c1959 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -25,10 +25,10 @@ #include "ABIInfo.h" namespace llvm { - class AttributeSet; - class Function; - class Type; - class Value; +class AttributeList; +class Function; +class Type; +class Value; } namespace clang { @@ -39,28 +39,28 @@ namespace clang { class VarDecl; namespace CodeGen { - typedef SmallVector<llvm::AttributeSet, 8> AttributeListType; - - /// Abstract information about a function or function prototype. - class CGCalleeInfo { - /// \brief The function prototype of the callee. - const FunctionProtoType *CalleeProtoTy; - /// \brief The function declaration of the callee. - const Decl *CalleeDecl; - - public: - explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} - CGCalleeInfo(const Decl *calleeDecl) - : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} - - const FunctionProtoType *getCalleeFunctionProtoType() const { - return CalleeProtoTy; - } - const Decl *getCalleeDecl() const { return CalleeDecl; } +typedef SmallVector<llvm::AttributeList, 8> AttributeListType; + +/// Abstract information about a function or function prototype. +class CGCalleeInfo { + /// \brief The function prototype of the callee. + const FunctionProtoType *CalleeProtoTy; + /// \brief The function declaration of the callee. + const Decl *CalleeDecl; + +public: + explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) + : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy) + : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} + CGCalleeInfo(const Decl *calleeDecl) + : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} + + const FunctionProtoType *getCalleeFunctionProtoType() const { + return CalleeProtoTy; + } + const Decl *getCalleeDecl() const { return CalleeDecl; } }; /// All available information about a concrete callee. diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 05d056739524d..7ba03a0d42dd4 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -309,8 +309,10 @@ Address CodeGenFunction::GetAddressOfBaseClass( // just do a bitcast; null checks are unnecessary. if (NonVirtualOffset.isZero() && !VBase) { if (sanitizePerformTypeCheck()) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, !NullCheckValue); EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(), - DerivedTy, DerivedAlign, !NullCheckValue); + DerivedTy, DerivedAlign, SkippedChecks); } return Builder.CreateBitCast(Value, BasePtrTy); } @@ -331,8 +333,10 @@ Address CodeGenFunction::GetAddressOfBaseClass( } if (sanitizePerformTypeCheck()) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck(VBase ? TCK_UpcastToVirtualBase : TCK_Upcast, Loc, - Value.getPointer(), DerivedTy, DerivedAlign, true); + Value.getPointer(), DerivedTy, DerivedAlign, SkippedChecks); } // Compute the virtual offset. @@ -685,7 +689,8 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, /// complete-to-base constructor delegation optimization, i.e. /// emitting the complete constructor as a simple call to the base /// constructor. -static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor) { +bool CodeGenFunction::IsConstructorDelegationValid( + const CXXConstructorDecl *Ctor) { // Currently we disable the optimization for classes with virtual // bases because (1) the addresses of parameter variables need to be @@ -1131,10 +1136,11 @@ namespace { RHS = EC->getSubExpr(); if (!RHS) return nullptr; - MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS); - if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field) - return nullptr; - return Field; + if (MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS)) { + if (ME2->getMemberDecl() == Field) + return Field; + } + return nullptr; } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) { CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl()); if (!(MD && isMemcpyEquivalentSpecialMember(MD))) @@ -1384,6 +1390,20 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl()); CXXDtorType DtorType = CurGD.getDtorType(); + // For an abstract class, non-base destructors are never used (and can't + // be emitted in general, because vbase dtors may not have been validated + // by Sema), but the Itanium ABI doesn't make them optional and Clang may + // in fact emit references to them from other compilations, so emit them + // as functions containing a trap instruction. + if (DtorType != Dtor_Base && Dtor->getParent()->isAbstract()) { + llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); + return; + } + Stmt *Body = Dtor->getBody(); if (Body) incrementProfileCounter(Body); @@ -1416,9 +1436,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // we'd introduce *two* handler blocks. In the Microsoft ABI, we // always delegate because we might not have a definition in this TU. switch (DtorType) { - case Dtor_Comdat: - llvm_unreachable("not expecting a COMDAT"); - + case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); case Dtor_Deleting: llvm_unreachable("already handled deleting case"); case Dtor_Complete: @@ -1433,7 +1451,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { /*Delegating=*/false, LoadCXXThisAddress()); break; } + // Fallthrough: act like we're in the base variant. + LLVM_FALLTHROUGH; case Dtor_Base: assert(Body); @@ -1950,7 +1970,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, // Add the rest of the user-supplied arguments. const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); - EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor()); + EvaluationOrder Order = E->isListInitialization() + ? EvaluationOrder::ForceLeftToRight + : EvaluationOrder::Default; + EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(), + /*ParamsToSkip*/ 0, Order); EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); } @@ -1970,7 +1994,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, // Likewise if they're inalloca. const CGFunctionInfo &Info = - CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0); + CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0, 0); if (Info.usesInAlloca()) return false; } @@ -2012,10 +2036,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, return; } + bool PassPrototypeArgs = true; // Check whether we can actually emit the constructor before trying to do so. if (auto Inherited = D->getInheritedConstructor()) { - if (getTypes().inheritingCtorHasParams(Inherited, Type) && - !canEmitDelegateCallArgs(*this, D, Type, Args)) { + PassPrototypeArgs = getTypes().inheritingCtorHasParams(Inherited, Type); + if (PassPrototypeArgs && !canEmitDelegateCallArgs(*this, D, Type, Args)) { EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase, Delegating, Args); return; @@ -2023,14 +2048,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, } // Insert any ABI-specific implicit constructor arguments. - unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs( - *this, D, Type, ForVirtualBase, Delegating, Args); + CGCXXABI::AddedStructorArgs ExtraArgs = + CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase, + Delegating, Args); // Emit the call. llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); - const CGFunctionInfo &Info = - CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs); + const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( + Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); CGCallee Callee = CGCallee::forDirect(CalleePtr, D); EmitCall(Info, Callee, ReturnValueSlot(), Args); @@ -2102,7 +2128,9 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall( void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase, bool Delegating, CallArgList &Args) { - InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType)); + GlobalDecl GD(Ctor, CtorType); + InlinedInheritingConstructorScope Scope(*this, GD); + ApplyInlineDebugLocation DebugScope(*this, GD); // Save the arguments to be passed to the inherited constructor. CXXInheritedCtorInitExprArgs = Args; diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 3666858e63d24..437ab7dd46493 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -418,11 +418,15 @@ void CodeGenFunction::ResolveBranchFixups(llvm::BasicBlock *Block) { } /// Pops cleanup blocks until the given savepoint is reached. -void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) { +void CodeGenFunction::PopCleanupBlocks( + EHScopeStack::stable_iterator Old, + std::initializer_list<llvm::Value **> ValuesToReload) { assert(Old.isValid()); + bool HadBranches = false; while (EHStack.stable_begin() != Old) { EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.begin()); + HadBranches |= Scope.hasBranches(); // As long as Old strictly encloses the scope's enclosing normal // cleanup, we're going to emit another normal cleanup which @@ -432,14 +436,41 @@ void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) { PopCleanupBlock(FallThroughIsBranchThrough); } + + // If we didn't have any branches, the insertion point before cleanups must + // dominate the current insertion point and we don't need to reload any + // values. + if (!HadBranches) + return; + + // Spill and reload all values that the caller wants to be live at the current + // insertion point. + for (llvm::Value **ReloadedValue : ValuesToReload) { + auto *Inst = dyn_cast_or_null<llvm::Instruction>(*ReloadedValue); + if (!Inst) + continue; + Address Tmp = + CreateDefaultAlignTempAlloca(Inst->getType(), "tmp.exprcleanup"); + + // Find an insertion point after Inst and spill it to the temporary. + llvm::BasicBlock::iterator InsertBefore; + if (auto *Invoke = dyn_cast<llvm::InvokeInst>(Inst)) + InsertBefore = Invoke->getNormalDest()->getFirstInsertionPt(); + else + InsertBefore = std::next(Inst->getIterator()); + CGBuilderTy(CGM, &*InsertBefore).CreateStore(Inst, Tmp); + + // Reload the value at the current insertion point. + *ReloadedValue = Builder.CreateLoad(Tmp); + } } /// Pops cleanup blocks until the given savepoint is reached, then add the /// cleanups from the given savepoint in the lifetime-extended cleanups stack. -void -CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old, - size_t OldLifetimeExtendedSize) { - PopCleanupBlocks(Old); +void CodeGenFunction::PopCleanupBlocks( + EHScopeStack::stable_iterator Old, size_t OldLifetimeExtendedSize, + std::initializer_list<llvm::Value **> ValuesToReload) { + PopCleanupBlocks(Old, ValuesToReload); // Move our deferred cleanups onto the EH stack. for (size_t I = OldLifetimeExtendedSize, @@ -578,7 +609,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF, llvm::SwitchInst *si = cast<llvm::SwitchInst>(use.getUser()); if (si->getNumCases() == 1 && si->getDefaultDest() == unreachableBB) { // Replace the switch with a branch. - llvm::BranchInst::Create(si->case_begin().getCaseSuccessor(), si); + llvm::BranchInst::Create(si->case_begin()->getCaseSuccessor(), si); // The switch operand is a load from the cleanup-dest alloca. llvm::LoadInst *condition = cast<llvm::LoadInst>(si->getCondition()); diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 2fdb1279ece97..0ef680ef66092 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -12,28 +12,58 @@ //===----------------------------------------------------------------------===// #include "CodeGenFunction.h" +#include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" using namespace clang; using namespace CodeGen; -namespace clang { -namespace CodeGen { +using llvm::Value; +using llvm::BasicBlock; + +namespace { +enum class AwaitKind { Init, Normal, Yield, Final }; +static constexpr llvm::StringLiteral AwaitKindStr[] = {"init", "await", "yield", + "final"}; +} + +struct clang::CodeGen::CGCoroData { + // What is the current await expression kind and how many + // await/yield expressions were encountered so far. + // These are used to generate pretty labels for await expressions in LLVM IR. + AwaitKind CurrentAwaitKind = AwaitKind::Init; + unsigned AwaitNum = 0; + unsigned YieldNum = 0; + + // How many co_return statements are in the coroutine. Used to decide whether + // we need to add co_return; equivalent at the end of the user authored body. + unsigned CoreturnCount = 0; + + // A branch to this block is emitted when coroutine needs to suspend. + llvm::BasicBlock *SuspendBB = nullptr; + + // Stores the jump destination just before the coroutine memory is freed. + // This is the destination that every suspend point jumps to for the cleanup + // branch. + CodeGenFunction::JumpDest CleanupJD; + + // Stores the jump destination just before the final suspend. The co_return + // statements jumps to this point after calling return_xxx promise member. + CodeGenFunction::JumpDest FinalJD; -struct CGCoroData { // Stores the llvm.coro.id emitted in the function so that we can supply it // as the first argument to coro.begin, coro.alloc and coro.free intrinsics. // Note: llvm.coro.id returns a token that cannot be directly expressed in a // builtin. llvm::CallInst *CoroId = nullptr; + // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by // EmitCoroutineBody. CallExpr const *CoroIdExpr = nullptr; }; -} -} +// Defining these here allows to keep CGCoroData private to this file. clang::CodeGen::CodeGenFunction::CGCoroInfo::CGCoroInfo() {} CodeGenFunction::CGCoroInfo::~CGCoroInfo() {} @@ -59,19 +89,250 @@ static void createCoroData(CodeGenFunction &CGF, CurCoro.Data->CoroIdExpr = CoroIdExpr; } +// Synthesize a pretty name for a suspend point. +static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) { + unsigned No = 0; + switch (Kind) { + case AwaitKind::Init: + case AwaitKind::Final: + break; + case AwaitKind::Normal: + No = ++Coro.AwaitNum; + break; + case AwaitKind::Yield: + No = ++Coro.YieldNum; + break; + } + SmallString<32> Prefix(AwaitKindStr[static_cast<unsigned>(Kind)]); + if (No > 1) { + Twine(No).toVector(Prefix); + } + return Prefix; +} + +// Emit suspend expression which roughly looks like: +// +// auto && x = CommonExpr(); +// if (!x.await_ready()) { +// llvm_coro_save(); +// x.await_suspend(...); (*) +// llvm_coro_suspend(); (**) +// } +// x.await_resume(); +// +// where the result of the entire expression is the result of x.await_resume() +// +// (*) If x.await_suspend return type is bool, it allows to veto a suspend: +// if (x.await_suspend(...)) +// llvm_coro_suspend(); +// +// (**) llvm_coro_suspend() encodes three possible continuations as +// a switch instruction: +// +// %where-to = call i8 @llvm.coro.suspend(...) +// switch i8 %where-to, label %coro.ret [ ; jump to epilogue to suspend +// i8 0, label %yield.ready ; go here when resumed +// i8 1, label %yield.cleanup ; go here when destroyed +// ] +// +// See llvm's docs/Coroutines.rst for more details. +// +static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro, + CoroutineSuspendExpr const &S, + AwaitKind Kind, AggValueSlot aggSlot, + bool ignoreResult) { + auto *E = S.getCommonExpr(); + auto Binder = + CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); + auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); + + auto Prefix = buildSuspendPrefixStr(Coro, Kind); + BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready")); + BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend")); + BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup")); + + // If expression is ready, no need to suspend. + CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0); + + // Otherwise, emit suspend logic. + CGF.EmitBlock(SuspendBlock); + + auto &Builder = CGF.Builder; + llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save); + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); + auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); + + auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); + if (SuspendRet != nullptr) { + // Veto suspension if requested by bool returning await_suspend. + assert(SuspendRet->getType()->isIntegerTy(1) && + "Sema should have already checked that it is void or bool"); + BasicBlock *RealSuspendBlock = + CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); + CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); + SuspendBlock = RealSuspendBlock; + CGF.EmitBlock(RealSuspendBlock); + } + + // Emit the suspend point. + const bool IsFinalSuspend = (Kind == AwaitKind::Final); + llvm::Function *CoroSuspend = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend); + auto *SuspendResult = Builder.CreateCall( + CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)}); + + // Create a switch capturing three possible continuations. + auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2); + Switch->addCase(Builder.getInt8(0), ReadyBlock); + Switch->addCase(Builder.getInt8(1), CleanupBlock); + + // Emit cleanup for this suspend point. + CGF.EmitBlock(CleanupBlock); + CGF.EmitBranchThroughCleanup(Coro.CleanupJD); + + // Emit await_resume expression. + CGF.EmitBlock(ReadyBlock); + return CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult); +} + +RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpression(*this, *CurCoro.Data, E, + CurCoro.Data->CurrentAwaitKind, aggSlot, + ignoreResult); +} +RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpression(*this, *CurCoro.Data, E, AwaitKind::Yield, + aggSlot, ignoreResult); +} + +void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { + ++CurCoro.Data->CoreturnCount; + EmitStmt(S.getPromiseCall()); + EmitBranchThroughCleanup(CurCoro.Data->FinalJD); +} + +// For WinEH exception representation backend need to know what funclet coro.end +// belongs to. That information is passed in a funclet bundle. +static SmallVector<llvm::OperandBundleDef, 1> +getBundlesForCoroEnd(CodeGenFunction &CGF) { + SmallVector<llvm::OperandBundleDef, 1> BundleList; + + if (llvm::Instruction *EHPad = CGF.CurrentFuncletPad) + BundleList.emplace_back("funclet", EHPad); + + return BundleList; +} + +namespace { +// We will insert coro.end to cut any of the destructors for objects that +// do not need to be destroyed once the coroutine is resumed. +// See llvm/docs/Coroutines.rst for more details about coro.end. +struct CallCoroEnd final : public EHScopeStack::Cleanup { + void Emit(CodeGenFunction &CGF, Flags flags) override { + auto &CGM = CGF.CGM; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end); + // See if we have a funclet bundle to associate coro.end with. (WinEH) + auto Bundles = getBundlesForCoroEnd(CGF); + auto *CoroEnd = CGF.Builder.CreateCall( + CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles); + if (Bundles.empty()) { + // Otherwise, (landingpad model), create a conditional branch that leads + // either to a cleanup block or a block with EH resume instruction. + auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true); + auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont"); + CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB); + CGF.EmitBlock(CleanupContBB); + } + } +}; +} + +namespace { +// Make sure to call coro.delete on scope exit. +struct CallCoroDelete final : public EHScopeStack::Cleanup { + Stmt *Deallocate; + + // TODO: Wrap deallocate in if(coro.free(...)) Deallocate. + void Emit(CodeGenFunction &CGF, Flags) override { + // Note: That deallocation will be emitted twice: once for a normal exit and + // once for exceptional exit. This usage is safe because Deallocate does not + // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() + // builds a single call to a deallocation function which is safe to emit + // multiple times. + CGF.EmitStmt(Deallocate); + } + explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} +}; +} + void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); auto &TI = CGM.getContext().getTargetInfo(); unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *FinalBB = createBasicBlock("coro.final"); + auto *RetBB = createBasicBlock("coro.ret"); + auto *CoroId = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_id), {Builder.getInt32(NewAlign), NullPtr, NullPtr, NullPtr}); createCoroData(*this, CurCoro, CoroId); + CurCoro.Data->SuspendBB = RetBB; + + auto *AllocateCall = EmitScalarExpr(S.getAllocate()); + + // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. + if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { + auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); + auto *InitBB = createBasicBlock("coro.init"); + + // See if allocation was successful. + auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); + auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); + Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); + + // If not, return OnAllocFailure object. + EmitBlock(RetOnFailureBB); + EmitStmt(RetOnAllocFailure); + + EmitBlock(InitBB); + } + + CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); + { + CodeGenFunction::RunCleanupsScope ResumeScope(*this); + EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate()); + + EmitStmt(S.getPromiseDeclStmt()); + + EHStack.pushCleanup<CallCoroEnd>(EHCleanup); + + CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); + + // FIXME: Emit initial suspend and more before the body. + + CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal; + EmitStmt(S.getBody()); + + // See if we need to generate final suspend. + const bool CanFallthrough = Builder.GetInsertBlock(); + const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0; + if (CanFallthrough || HasCoreturns) { + EmitBlock(FinalBB); + // FIXME: Emit final suspend. + } + } + + EmitBlock(RetBB); + llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); + Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); - EmitScalarExpr(S.getAllocate()); - // FIXME: Emit the rest of the coroutine. - EmitStmt(S.getDeallocate()); + // FIXME: Emit return for the coroutine return object. } // Emit coroutine intrinsic and patch up arguments of the token type. diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 12a68036b09c6..818b51543d30a 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -107,8 +107,8 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation, // Construct a location that has a valid scope, but no line info. assert(!DI->LexicalBlockStack.empty()); - CGF->Builder.SetCurrentDebugLocation( - llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back())); + CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( + 0, 0, DI->LexicalBlockStack.back(), DI->getInlinedAt())); } ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E) @@ -134,6 +134,30 @@ ApplyDebugLocation::~ApplyDebugLocation() { CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation)); } +ApplyInlineDebugLocation::ApplyInlineDebugLocation(CodeGenFunction &CGF, + GlobalDecl InlinedFn) + : CGF(&CGF) { + if (!CGF.getDebugInfo()) { + this->CGF = nullptr; + return; + } + auto &DI = *CGF.getDebugInfo(); + SavedLocation = DI.getLocation(); + assert((DI.getInlinedAt() == + CGF.Builder.getCurrentDebugLocation()->getInlinedAt()) && + "CGDebugInfo and IRBuilder are out of sync"); + + DI.EmitInlineFunctionStart(CGF.Builder, InlinedFn); +} + +ApplyInlineDebugLocation::~ApplyInlineDebugLocation() { + if (!CGF) + return; + auto &DI = *CGF->getDebugInfo(); + DI.EmitInlineFunctionEnd(CGF->Builder); + DI.EmitLocation(CGF->Builder, SavedLocation); +} + void CGDebugInfo::setLocation(SourceLocation Loc) { // If the new location isn't valid return. if (Loc.isInvalid()) @@ -249,8 +273,8 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { << OC->getIdentifier()->getNameStart() << ')'; } } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { - OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '(' - << OCD->getIdentifier()->getNameStart() << ')'; + OS << OCD->getClassInterface()->getName() << '(' + << OCD->getName() << ')'; } else if (isa<ObjCProtocolDecl>(DC)) { // We can extract the type of the class from the self pointer. if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) { @@ -509,7 +533,8 @@ void CGDebugInfo::CreateCompileUnit() { Checksum), Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */, - CGM.getCodeGenOpts().SplitDwarfInlining); + CGM.getCodeGenOpts().SplitDwarfInlining, + CGM.getCodeGenOpts().DebugInfoForProfiling); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -581,8 +606,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy); case BuiltinType::OCLQueue: return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy); - case BuiltinType::OCLNDRange: - return getOrCreateStructPtrType("opencl_ndrange_t", OCLNDRangeDITy); case BuiltinType::OCLReserveID: return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy); @@ -793,17 +816,19 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, // Bit size, align and offset of the type. // Size is always the size of a pointer. We can't use getTypeSize here // because that does not return the correct value for references. - unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy); - uint64_t Size = CGM.getTarget().getPointerWidth(AS); + unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy); + uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace); auto Align = getTypeAlignIfRequired(Ty, CGM.getContext()); + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(AddressSpace); if (Tag == llvm::dwarf::DW_TAG_reference_type || Tag == llvm::dwarf::DW_TAG_rvalue_reference_type) return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit), - Size, Align); + Size, Align, DWARFAddressSpace); else return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size, - Align); + Align, DWARFAddressSpace); } llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, @@ -1608,8 +1633,13 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy); llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements); unsigned Size = Context.getTypeSize(Context.VoidPtrTy); + unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); + llvm::DIType *vtbl_ptr_type = - DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type"); + DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace, + "__vtbl_ptr_type"); VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size); return VTablePtrType; } @@ -1648,10 +1678,14 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, unsigned VSlotCount = VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData; unsigned VTableWidth = PtrWidth * VSlotCount; + unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); // Create a very wide void* type and insert it directly in the element list. llvm::DIType *VTableType = - DBuilder.createPointerType(nullptr, VTableWidth, 0, "__vtbl_ptr_type"); + DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace, + "__vtbl_ptr_type"); EltTys.push_back(VTableType); // The vptr is a pointer to this special vtable type. @@ -1714,7 +1748,27 @@ void CGDebugInfo::completeType(const RecordDecl *RD) { completeRequiredType(RD); } +/// Return true if the class or any of its methods are marked dllimport. +static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { + if (RD->hasAttr<DLLImportAttr>()) + return true; + for (const CXXMethodDecl *MD : RD->methods()) + if (MD->hasAttr<DLLImportAttr>()) + return true; + return false; +} + void CGDebugInfo::completeClassData(const RecordDecl *RD) { + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->isDynamicClass() && + CGM.getVTableLinkage(CXXRD) == + llvm::GlobalValue::AvailableExternallyLinkage && + !isClassOrMethodDLLImport(CXXRD)) + return; + completeClass(RD); +} + +void CGDebugInfo::completeClass(const RecordDecl *RD) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getRecordType(RD); @@ -1760,22 +1814,16 @@ static bool isDefinedInClangModule(const RecordDecl *RD) { return true; } -/// Return true if the class or any of its methods are marked dllimport. -static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { - if (RD->hasAttr<DLLImportAttr>()) - return true; - for (const CXXMethodDecl *MD : RD->methods()) - if (MD->hasAttr<DLLImportAttr>()) - return true; - return false; -} - static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) return true; + if (auto *ES = RD->getASTContext().getExternalSource()) + if (ES->hasExternalDefinitions(RD) == ExternalASTSource::EK_Always) + return true; + if (DebugKind > codegenoptions::LimitedDebugInfo) return false; @@ -2009,7 +2057,11 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, if (CreateSkeletonCU && IsRootModule) { // PCH files don't have a signature field in the control block, // but LLVM detects skeleton CUs by looking for a non-zero DWO id. - uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL; + // We use the lower 64 bits for debug info. + uint64_t Signature = + Mod.getSignature() + ? (uint64_t)Mod.getSignature()[1] << 32 | Mod.getSignature()[0] + : ~1ULL; llvm::DIBuilder DIB(CGM.getModule()); DIB.createCompileUnit(TheCU->getSourceLanguage(), DIB.createFile(Mod.getModuleName(), Mod.getPath()), @@ -2408,6 +2460,21 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { FullName); } +llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent, + unsigned MType, SourceLocation LineLoc, + StringRef Name, StringRef Value) { + unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc); + return DBuilder.createMacro(Parent, Line, MType, Name, Value); +} + +llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, + SourceLocation LineLoc, + SourceLocation FileLoc) { + llvm::DIFile *FName = getOrCreateFile(FileLoc); + unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc); + return DBuilder.createTempMacroFile(Parent, Line, FName); +} + static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { Qualifiers Quals; do { @@ -2451,8 +2518,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { case Type::SubstTemplateTypeParm: T = cast<SubstTemplateTypeParmType>(T)->getReplacementType(); break; - case Type::Auto: { - QualType DT = cast<AutoType>(T)->getDeducedType(); + case Type::Auto: + case Type::DeducedTemplateSpecialization: { + QualType DT = cast<DeducedType>(T)->getDeducedType(); assert(!DT.isNull() && "Undeduced types shouldn't reach here."); T = DT; break; @@ -2488,11 +2556,17 @@ void CGDebugInfo::completeTemplateDefinition( const ClassTemplateSpecializationDecl &SD) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; + completeUnusedClass(SD); +} + +void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { + if (DebugKind <= codegenoptions::DebugLineTablesOnly) + return; - completeClassData(&SD); + completeClassData(&D); // In case this type has no member function definitions being emitted, ensure // it is retained - RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr()); + RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr()); } llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { @@ -2618,6 +2692,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Attributed: case Type::Adjusted: case Type::Decayed: + case Type::DeducedTemplateSpecialization: case Type::Elaborated: case Type::Paren: case Type::SubstTemplateTypeParm: @@ -2774,9 +2849,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, } // No need to replicate the linkage name if it isn't different from the // subprogram name, no need to have it at all unless coverage is enabled or - // debug is set to more than just line tables. + // debug is set to more than just line tables or extra debug info is needed. if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs && !CGM.getCodeGenOpts().EmitGcovNotes && + !CGM.getCodeGenOpts().DebugInfoForProfiling && DebugKind <= codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); @@ -2844,28 +2920,40 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU); } -llvm::DISubprogram * -CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { +llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, + bool Stub) { llvm::DINodeArray TParamsArray; StringRef Name, LinkageName; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; - SourceLocation Loc = FD->getLocation(); + SourceLocation Loc = GD.getDecl()->getLocation(); llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; unsigned Line = getLineNumber(Loc); - - collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext, + collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray, Flags); + auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); + // Build function type. SmallVector<QualType, 16> ArgTypes; - for (const ParmVarDecl *Parm: FD->parameters()) - ArgTypes.push_back(Parm->getType()); + if (FD) + for (const ParmVarDecl *Parm : FD->parameters()) + ArgTypes.push_back(Parm->getType()); CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); + if (Stub) { + return DBuilder.createFunction( + DContext, Name, LinkageName, Unit, Line, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), + !FD->isExternallyVisible(), + /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get(), getFunctionDeclaration(FD)); + } + llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(), + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), + !FD->isExternallyVisible(), /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(FD)); const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); @@ -2875,6 +2963,16 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { return SP; } +llvm::DISubprogram * +CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) { + return getFunctionFwdDeclOrStub(GD, /* Stub = */ false); +} + +llvm::DISubprogram * +CGDebugInfo::getFunctionStub(GlobalDecl GD) { + return getFunctionFwdDeclOrStub(GD, /* Stub = */ true); +} + llvm::DIGlobalVariable * CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { QualType T; @@ -3146,6 +3244,27 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, TParamsArray.get(), getFunctionDeclaration(D))); } +void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + // If there is a subprogram for this function available then use it. + auto FI = SPCache.find(FD->getCanonicalDecl()); + llvm::DISubprogram *SP = nullptr; + if (FI != SPCache.end()) + SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second); + if (!SP) + SP = getFunctionStub(GD); + FnBeginRegionCount.push_back(LexicalBlockStack.size()); + LexicalBlockStack.emplace_back(SP); + setInlinedAt(Builder.getCurrentDebugLocation()); + EmitLocation(Builder, FD->getLocation()); +} + +void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) { + assert(CurInlinedAt && "unbalanced inline scope stack"); + EmitFunctionEnd(Builder); + setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt()); +} + void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { // Update our current location setLocation(Loc); @@ -3155,7 +3274,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { llvm::MDNode *Scope = LexicalBlockStack.back(); Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( - getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope)); + getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope, CurInlinedAt)); } void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { @@ -3167,14 +3286,29 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { getColumnNumber(CurLoc))); } +void CGDebugInfo::AppendAddressSpaceXDeref( + unsigned AddressSpace, + SmallVectorImpl<int64_t> &Expr) const { + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(AddressSpace); + if (!DWARFAddressSpace) + return; + + Expr.push_back(llvm::dwarf::DW_OP_constu); + Expr.push_back(DWARFAddressSpace.getValue()); + Expr.push_back(llvm::dwarf::DW_OP_swap); + Expr.push_back(llvm::dwarf::DW_OP_xderef); +} + void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder, SourceLocation Loc) { // Set our current location. setLocation(Loc); // Emit a line table change for the current location inside the new scope. - Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( - getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back())); + Builder.SetCurrentDebugLocation( + llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), + LexicalBlockStack.back(), CurInlinedAt)); if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; @@ -3316,13 +3450,16 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Line = getLineNumber(VD->getLocation()); Column = getColumnNumber(VD->getLocation()); } - SmallVector<int64_t, 9> Expr; + SmallVector<int64_t, 13> Expr; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; if (VD->isImplicit()) Flags |= llvm::DINode::FlagArtificial; auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); + unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType()); + AppendAddressSpaceXDeref(AddressSpace, Expr); + // If this is the first argument and it is implicit then // give it an object pointer flag. // FIXME: There has to be a better way to do this, but for static @@ -3360,9 +3497,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Line, Ty, Align); // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), - Builder.GetInsertBlock()); + DBuilder.insertDeclare( + Storage, D, DBuilder.createExpression(Expr), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); return; } else if (isa<VariableArrayType>(VD->getType())) Expr.push_back(llvm::dwarf::DW_OP_deref); @@ -3393,9 +3531,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Flags | llvm::DINode::FlagArtificial, FieldAlign); // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), - Builder.GetInsertBlock()); + DBuilder.insertDeclare( + Storage, D, DBuilder.createExpression(Expr), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); } } } @@ -3411,7 +3550,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3492,7 +3631,8 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( Line, Ty, false, llvm::DINode::FlagZero, Align); // Insert an llvm.dbg.declare into the current block. - auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back()); + auto DL = + llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt); if (InsertPoint) DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL, InsertPoint); @@ -3660,12 +3800,13 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Insert an llvm.dbg.value into the current block. DBuilder.insertDbgValueIntrinsic( LocalAddr, 0, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope), Builder.GetInsertBlock()); + llvm::DebugLoc::get(line, column, scope, CurInlinedAt), + Builder.GetInsertBlock()); } // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope), + llvm::DebugLoc::get(line, column, scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3747,9 +3888,16 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, GVE = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext); } else { auto Align = getDeclAlignIfRequired(D, CGM.getContext()); + + SmallVector<int64_t, 4> Expr; + unsigned AddressSpace = + CGM.getContext().getTargetAddressSpace(D->getType()); + AppendAddressSpaceXDeref(AddressSpace, Expr); + GVE = DBuilder.createGlobalVariableExpression( DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasLocalLinkage(), /*Expr=*/nullptr, + Var->hasLocalLinkage(), + Expr.empty() ? nullptr : DBuilder.createExpression(Expr), getOrCreateStaticDataMemberDeclarationOrNull(D), Align); Var->addDebugInfo(GVE); } diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index ac2e8dd2e0a45..5050ca0ad3fa0 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -61,6 +61,7 @@ class CGDebugInfo { ModuleMap *ClangModuleMap = nullptr; ExternalASTSource::ASTSourceDescriptor PCHDescriptor; SourceLocation CurLoc; + llvm::MDNode *CurInlinedAt = nullptr; llvm::DIType *VTablePtrType = nullptr; llvm::DIType *ClassTy = nullptr; llvm::DICompositeType *ObjTy = nullptr; @@ -292,6 +293,15 @@ class CGDebugInfo { /// Create a new lexical block node and push it on the stack. void CreateLexicalBlock(SourceLocation Loc); + /// If target-specific LLVM \p AddressSpace directly maps to target-specific + /// DWARF address space, appends extended dereferencing mechanism to complex + /// expression \p Expr. Otherwise, does nothing. + /// + /// Extended dereferencing mechanism is has the following format: + /// DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef + void AppendAddressSpaceXDeref(unsigned AddressSpace, + SmallVectorImpl<int64_t> &Expr) const; + public: CGDebugInfo(CodeGenModule &CGM); ~CGDebugInfo(); @@ -320,6 +330,17 @@ public: /// ignored. void setLocation(SourceLocation Loc); + /// Return the current source location. This does not necessarily correspond + /// to the IRBuilder's current DebugLoc. + SourceLocation getLocation() const { return CurLoc; } + + /// Update the current inline scope. All subsequent calls to \p EmitLocation + /// will create a location with this inlinedAt field. + void setInlinedAt(llvm::MDNode *InlinedAt) { CurInlinedAt = InlinedAt; } + + /// \return the current inline scope. + llvm::MDNode *getInlinedAt() const { return CurInlinedAt; } + // Converts a SourceLocation to a DebugLoc llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Loc); @@ -336,6 +357,11 @@ public: SourceLocation ScopeLoc, QualType FnType, llvm::Function *Fn, CGBuilderTy &Builder); + /// Start a new scope for an inlined function. + void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD); + /// End an inlined function scope. + void EmitInlineFunctionEnd(CGBuilderTy &Builder); + /// Emit debug info for a function declaration. void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType); @@ -409,9 +435,21 @@ public: void completeType(const RecordDecl *RD); void completeRequiredType(const RecordDecl *RD); void completeClassData(const RecordDecl *RD); + void completeClass(const RecordDecl *RD); void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD); - + void completeUnusedClass(const CXXRecordDecl &D); + + /// Create debug info for a macro defined by a #define directive or a macro + /// undefined by a #undef directive. + llvm::DIMacro *CreateMacro(llvm::DIMacroFile *Parent, unsigned MType, + SourceLocation LineLoc, StringRef Name, + StringRef Value); + + /// Create debug info for a file referenced by an #include directive. + llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent, + SourceLocation LineLoc, + SourceLocation FileLoc); private: /// Emit call to llvm.dbg.declare for a variable declaration. void EmitDeclare(const VarDecl *decl, llvm::Value *AI, @@ -491,11 +529,18 @@ private: llvm::DIDerivedType * getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D); + /// Helper that either creates a forward declaration or a stub. + llvm::DISubprogram *getFunctionFwdDeclOrStub(GlobalDecl GD, bool Stub); + /// Create a subprogram describing the forward declaration - /// represented in the given FunctionDecl. - llvm::DISubprogram *getFunctionForwardDeclaration(const FunctionDecl *FD); + /// represented in the given FunctionDecl wrapped in a GlobalDecl. + llvm::DISubprogram *getFunctionForwardDeclaration(GlobalDecl GD); + + /// Create a DISubprogram describing the function + /// represented in the given FunctionDecl wrapped in a GlobalDecl. + llvm::DISubprogram *getFunctionStub(GlobalDecl GD); - /// Create a global variable describing the forward decalration + /// Create a global variable describing the forward declaration /// represented in the given VarDecl. llvm::DIGlobalVariable * getGlobalVariableForwardDeclaration(const VarDecl *VD); @@ -622,6 +667,20 @@ public: }; +/// A scoped helper to set the current debug location to an inlined location. +class ApplyInlineDebugLocation { + SourceLocation SavedLocation; + CodeGenFunction *CGF; + +public: + /// Set up the CodeGenFunction's DebugInfo to produce inline locations for the + /// function \p InlinedFn. The current debug location becomes the inlined call + /// site of the inlined function. + ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn); + /// Restore everything back to the orginial state. + ~ApplyInlineDebugLocation(); +}; + } // namespace CodeGen } // namespace clang diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 0a88b2310beb4..0f959043a22ed 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -50,6 +50,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::TemplateTypeParm: case Decl::UnresolvedUsingValue: case Decl::NonTypeTemplateParm: + case Decl::CXXDeductionGuide: case Decl::CXXMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: @@ -671,6 +672,27 @@ static void drillIntoBlockVariable(CodeGenFunction &CGF, lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var)); } +void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, + SourceLocation Loc) { + if (!SanOpts.has(SanitizerKind::NullabilityAssign)) + return; + + auto Nullability = LHS.getType()->getNullability(getContext()); + if (!Nullability || *Nullability != NullabilityKind::NonNull) + return; + + // Check if the right hand side of the assignment is nonnull, if the left + // hand side must be nonnull. + SanitizerScope SanScope(this); + llvm::Value *IsNotNull = Builder.CreateIsNotNull(RHS); + llvm::Constant *StaticData[] = { + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(LHS.getType()), + llvm::ConstantInt::get(Int8Ty, 0), // The LogAlignment info is unused. + llvm::ConstantInt::get(Int8Ty, TCK_NonnullAssign)}; + EmitCheck({{IsNotNull, SanitizerKind::NullabilityAssign}}, + SanitizerHandler::TypeMismatch, StaticData, RHS); +} + void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime(); @@ -678,6 +700,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, llvm::Value *value = EmitScalarExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D)); + EmitNullabilityCheck(lvalue, value, init->getExprLoc()); EmitStoreThroughLValue(RValue::get(value), lvalue, true); return; } @@ -766,6 +789,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D)); + EmitNullabilityCheck(lvalue, value, init->getExprLoc()); + // If the variable might have been accessed by its initializer, we // might have to initialize with a barrier. We have to do this for // both __weak and __strong, but __weak got filtered out above. @@ -1022,11 +1047,21 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit a lifetime intrinsic if meaningful. There's no point in doing this // if we don't have a valid insertion point (?). if (HaveInsertPoint() && !IsMSCatchParam) { - // goto or switch-case statements can break lifetime into several - // regions which need more efforts to handle them correctly. PR28267 - // This is rare case, but it's better just omit intrinsics than have - // them incorrectly placed. - if (!Bypasses.IsBypassed(&D)) { + // If there's a jump into the lifetime of this variable, its lifetime + // gets broken up into several regions in IR, which requires more work + // to handle correctly. For now, just omit the intrinsics; this is a + // rare case, and it's better to just be conservatively correct. + // PR28267. + // + // We have to do this in all language modes if there's a jump past the + // declaration. We also have to do it in C if there's a jump to an + // earlier point in the current block because non-VLA lifetimes begin as + // soon as the containing block is entered, not when its variables + // actually come into scope; suppressing the lifetime annotations + // completely in this case is unnecessarily pessimistic, but again, this + // is rare. + if (!Bypasses.IsBypassed(&D) && + !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy); emission.SizeForLifetimeMarkers = EmitLifetimeStart(size, address.getPointer()); @@ -1083,6 +1118,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, address.getPointer()); + // Make sure we call @llvm.lifetime.end. + if (emission.useLifetimeMarkers()) + EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, + emission.getAllocatedAddress(), + emission.getSizeForLifetimeMarkers()); + return emission; } @@ -1373,13 +1414,6 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { const VarDecl &D = *emission.Variable; - // Make sure we call @llvm.lifetime.end. This needs to happen - // *last*, so the cleanup needs to be pushed *first*. - if (emission.useLifetimeMarkers()) - EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, - emission.getAllocatedAddress(), - emission.getSizeForLifetimeMarkers()); - // Check the type for a cleanup. if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) emitAutoVarTypeCleanup(emission, dtorKind); @@ -1691,17 +1725,19 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, /// Lazily declare the @llvm.lifetime.start intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { - if (LifetimeStartFn) return LifetimeStartFn; + if (LifetimeStartFn) + return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), - llvm::Intrinsic::lifetime_start); + llvm::Intrinsic::lifetime_start, Int8PtrTy); return LifetimeStartFn; } /// Lazily declare the @llvm.lifetime.end intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { - if (LifetimeEndFn) return LifetimeEndFn; + if (LifetimeEndFn) + return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), - llvm::Intrinsic::lifetime_end); + llvm::Intrinsic::lifetime_end, Int8PtrTy); return LifetimeEndFn; } @@ -1869,6 +1905,19 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, DeclPtr.getPointer()); + + // We can only check return value nullability if all arguments to the + // function satisfy their nullability preconditions. This makes it necessary + // to emit null checks for args in the function body itself. + if (requiresReturnValueNullabilityCheck()) { + auto Nullability = Ty->getNullability(getContext()); + if (Nullability && *Nullability == NullabilityKind::NonNull) { + SanitizerScope SanScope(this); + RetValNullabilityPrecondition = + Builder.CreateAnd(RetValNullabilityPrecondition, + Builder.CreateIsNotNull(Arg.getAnyValue())); + } + } } void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index f56e182169311..f61d60a63a6a2 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -237,7 +237,7 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, llvm::FunctionType::get(IntTy, dtorStub->getType(), false); llvm::Constant *atexit = - CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeSet(), + CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit)) atexitFn->setDoesNotThrow(); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index f908bf2b3b0ac..ca1535182ec1b 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -180,8 +180,8 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, // The GCC runtime's personality function inherently doesn't support // mixed EH. Use the C++ personality just to avoid returning null. case ObjCRuntime::GCC: - case ObjCRuntime::ObjFW: // XXX: this will change soon - return EHPersonality::GNU_ObjC; + case ObjCRuntime::ObjFW: + return getObjCPersonality(T, L); case ObjCRuntime::GNUstep: return EHPersonality::GNU_ObjCXX; } @@ -231,7 +231,7 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true), Personality.PersonalityFn, - llvm::AttributeSet(), /*Local=*/true); + llvm::AttributeList(), /*Local=*/true); } static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, @@ -1666,23 +1666,12 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy; - llvm::Function *ParentFn = ParentCGF.CurFn; const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Function *Fn = llvm::Function::Create( FnTy, llvm::GlobalValue::InternalLinkage, Name.str(), &CGM.getModule()); - // The filter is either in the same comdat as the function, or it's internal. - if (llvm::Comdat *C = ParentFn->getComdat()) { - Fn->setComdat(C); - } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) { - llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName()); - ParentFn->setComdat(C); - Fn->setComdat(C); - } else { - Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - } IsOutlinedSEHHelper = true; diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index e5e34a5f3ed60..265ef27a46b0f 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -71,7 +71,8 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, /// block. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, const Twine &Name) { - return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); + return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), + nullptr, Name, AllocaInsertPt); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -534,7 +535,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const { void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Ptr, QualType Ty, - CharUnits Alignment, bool SkipNullCheck) { + CharUnits Alignment, + SanitizerSet SkippedChecks) { if (!sanitizePerformTypeCheck()) return; @@ -552,7 +554,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast || TCK == TCK_UpcastToVirtualBase; if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) && - !SkipNullCheck) { + !SkippedChecks.has(SanitizerKind::Null)) { // The glvalue must not be an empty glvalue. llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr); @@ -568,7 +570,9 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, } } - if (SanOpts.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) { + if (SanOpts.has(SanitizerKind::ObjectSize) && + !SkippedChecks.has(SanitizerKind::ObjectSize) && + !Ty->isIncompleteType()) { uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity(); // The glvalue must refer to a large enough storage region. @@ -578,22 +582,24 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); llvm::Value *Min = Builder.getFalse(); + llvm::Value *NullIsUnknown = Builder.getFalse(); llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); - llvm::Value *LargeEnough = - Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}), - llvm::ConstantInt::get(IntPtrTy, Size)); + llvm::Value *LargeEnough = Builder.CreateICmpUGE( + Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}), + llvm::ConstantInt::get(IntPtrTy, Size)); Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); } uint64_t AlignVal = 0; - if (SanOpts.has(SanitizerKind::Alignment)) { + if (SanOpts.has(SanitizerKind::Alignment) && + !SkippedChecks.has(SanitizerKind::Alignment)) { AlignVal = Alignment.getQuantity(); if (!Ty->isIncompleteType() && !AlignVal) AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity(); // The glvalue must be suitably aligned. - if (AlignVal) { + if (AlignVal > 1) { llvm::Value *Align = Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy), llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); @@ -624,6 +630,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // or call a non-static member function CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); if (SanOpts.has(SanitizerKind::Vptr) && + !SkippedChecks.has(SanitizerKind::Vptr) && (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || TCK == TCK_UpcastToVirtualBase) && @@ -947,15 +954,47 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, E->getType()); } +bool CodeGenFunction::IsWrappedCXXThis(const Expr *Obj) { + const Expr *Base = Obj; + while (!isa<CXXThisExpr>(Base)) { + // The result of a dynamic_cast can be null. + if (isa<CXXDynamicCastExpr>(Base)) + return false; + + if (const auto *CE = dyn_cast<CastExpr>(Base)) { + Base = CE->getSubExpr(); + } else if (const auto *PE = dyn_cast<ParenExpr>(Base)) { + Base = PE->getSubExpr(); + } else if (const auto *UO = dyn_cast<UnaryOperator>(Base)) { + if (UO->getOpcode() == UO_Extension) + Base = UO->getSubExpr(); + else + return false; + } else { + return false; + } + } + return true; +} + LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { LValue LV; if (SanOpts.has(SanitizerKind::ArrayBounds) && isa<ArraySubscriptExpr>(E)) LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true); else LV = EmitLValue(E); - if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) + if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) { + SanitizerSet SkippedChecks; + if (const auto *ME = dyn_cast<MemberExpr>(E)) { + bool IsBaseCXXThis = IsWrappedCXXThis(ME->getBase()); + if (IsBaseCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase())) + SkippedChecks.set(SanitizerKind::Null, true); + } EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), - E->getType(), LV.getAlignment()); + E->getType(), LV.getAlignment(), SkippedChecks); + } return LV; } @@ -1033,7 +1072,19 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { const auto *cleanups = cast<ExprWithCleanups>(E); enterFullExpression(cleanups); RunCleanupsScope Scope(*this); - return EmitLValue(cleanups->getSubExpr()); + LValue LV = EmitLValue(cleanups->getSubExpr()); + if (LV.isSimple()) { + // Defend against branches out of gnu statement expressions surrounded by + // cleanups. + llvm::Value *V = LV.getPointer(); + Scope.ForceCleanup({&V}); + return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), + getContext(), LV.getAlignmentSource(), + LV.getTBAAInfo()); + } + // FIXME: Is it possible to create an ExprWithCleanups that produces a + // bitfield lvalue or some other non-simple lvalue? + return LV; } case Expr::CXXDefaultArgExprClass: @@ -1265,6 +1316,53 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { return MDHelper.createRange(Min, End); } +bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, + SourceLocation Loc) { + bool HasBoolCheck = SanOpts.has(SanitizerKind::Bool); + bool HasEnumCheck = SanOpts.has(SanitizerKind::Enum); + if (!HasBoolCheck && !HasEnumCheck) + return false; + + bool IsBool = hasBooleanRepresentation(Ty) || + NSAPI(CGM.getContext()).isObjCBOOLType(Ty); + bool NeedsBoolCheck = HasBoolCheck && IsBool; + bool NeedsEnumCheck = HasEnumCheck && Ty->getAs<EnumType>(); + if (!NeedsBoolCheck && !NeedsEnumCheck) + return false; + + // Single-bit booleans don't need to be checked. Special-case this to avoid + // a bit width mismatch when handling bitfield values. This is handled by + // EmitFromMemory for the non-bitfield case. + if (IsBool && + cast<llvm::IntegerType>(Value->getType())->getBitWidth() == 1) + return false; + + llvm::APInt Min, End; + if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) + return true; + + SanitizerScope SanScope(this); + llvm::Value *Check; + --End; + if (!Min) { + Check = Builder.CreateICmpULE( + Value, llvm::ConstantInt::get(getLLVMContext(), End)); + } else { + llvm::Value *Upper = Builder.CreateICmpSLE( + Value, llvm::ConstantInt::get(getLLVMContext(), End)); + llvm::Value *Lower = Builder.CreateICmpSGE( + Value, llvm::ConstantInt::get(getLLVMContext(), Min)); + Check = Builder.CreateAnd(Upper, Lower); + } + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc), + EmitCheckTypeDescriptor(Ty)}; + SanitizerMask Kind = + NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool; + EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue, + StaticArgs, EmitCheckValue(Value)); + return true; +} + llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, @@ -1273,26 +1371,28 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, QualType TBAABaseType, uint64_t TBAAOffset, bool isNontemporal) { - // For better performance, handle vector loads differently. - if (Ty->isVectorType()) { - const llvm::Type *EltTy = Addr.getElementType(); - - const auto *VTy = cast<llvm::VectorType>(EltTy); - - // Handle vectors of size 3 like size 4 for better performance. - if (VTy->getNumElements() == 3) { - - // Bitcast to vec4 type. - llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), - 4); - Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); - // Now load value. - llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); - - // Shuffle vector to get vec3. - V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), - {0, 1, 2}, "extractVec"); - return EmitFromMemory(V, Ty); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + const llvm::Type *EltTy = Addr.getElementType(); + + const auto *VTy = cast<llvm::VectorType>(EltTy); + + // Handle vectors of size 3 like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = + llvm::VectorType::get(VTy->getElementType(), 4); + Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); + // Now load value. + llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), + {0, 1, 2}, "extractVec"); + return EmitFromMemory(V, Ty); + } } } @@ -1317,35 +1417,9 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, false /*ConvertTypeToTag*/); } - bool IsBool = hasBooleanRepresentation(Ty) || - NSAPI(CGM.getContext()).isObjCBOOLType(Ty); - bool NeedsBoolCheck = SanOpts.has(SanitizerKind::Bool) && IsBool; - bool NeedsEnumCheck = - SanOpts.has(SanitizerKind::Enum) && Ty->getAs<EnumType>(); - if (NeedsBoolCheck || NeedsEnumCheck) { - SanitizerScope SanScope(this); - llvm::APInt Min, End; - if (getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) { - --End; - llvm::Value *Check; - if (!Min) - Check = Builder.CreateICmpULE( - Load, llvm::ConstantInt::get(getLLVMContext(), End)); - else { - llvm::Value *Upper = Builder.CreateICmpSLE( - Load, llvm::ConstantInt::get(getLLVMContext(), End)); - llvm::Value *Lower = Builder.CreateICmpSGE( - Load, llvm::ConstantInt::get(getLLVMContext(), Min)); - Check = Builder.CreateAnd(Upper, Lower); - } - llvm::Constant *StaticArgs[] = { - EmitCheckSourceLocation(Loc), - EmitCheckTypeDescriptor(Ty) - }; - SanitizerMask Kind = NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool; - EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue, - StaticArgs, EmitCheckValue(Load)); - } + if (EmitScalarRangeCheck(Load, Ty, Loc)) { + // In order to prevent the optimizer from throwing away the check, don't + // attach range metadata to the load. } else if (CGM.getCodeGenOpts().OptimizationLevel > 0) if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo); @@ -1386,24 +1460,25 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, uint64_t TBAAOffset, bool isNontemporal) { - // Handle vectors differently to get better performance. - if (Ty->isVectorType()) { - llvm::Type *SrcTy = Value->getType(); - auto *VecTy = cast<llvm::VectorType>(SrcTy); - // Handle vec3 special. - if (VecTy->getNumElements() == 3) { - // Our source is a vec3, do a shuffle vector to make it a vec4. - llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), - Builder.getInt32(2), - llvm::UndefValue::get(Builder.getInt32Ty())}; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Value = Builder.CreateShuffleVector(Value, - llvm::UndefValue::get(VecTy), - MaskV, "extractVec"); - SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); - } - if (Addr.getElementType() != SrcTy) { - Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + auto *VecTy = cast<llvm::VectorType>(SrcTy); + // Handle vec3 special. + if (VecTy->getNumElements() == 3) { + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), + Builder.getInt32(2), + llvm::UndefValue::get(Builder.getInt32Ty())}; + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + if (Addr.getElementType() != SrcTy) { + Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + } } } @@ -1487,10 +1562,11 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { return EmitLoadOfGlobalRegLValue(LV); assert(LV.isBitField() && "Unknown LValue type!"); - return EmitLoadOfBitfieldLValue(LV); + return EmitLoadOfBitfieldLValue(LV, Loc); } -RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) { +RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, + SourceLocation Loc) { const CGBitFieldInfo &Info = LV.getBitFieldInfo(); // Get the output type. @@ -1515,7 +1591,7 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) { "bf.clear"); } Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast"); - + EmitScalarRangeCheck(Val, LV.getType(), Loc); return RValue::get(Val); } @@ -2545,8 +2621,8 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction( FnType, FnName, - llvm::AttributeSet::get(CGF.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, B), + llvm::AttributeList::get(CGF.getLLVMContext(), + llvm::AttributeList::FunctionIndex, B), /*Local=*/true); llvm::CallInst *HandlerCall = CGF.EmitNounwindRuntimeCall(Fn, FnArgs); if (!MayReturn) { @@ -2709,6 +2785,24 @@ void CodeGenFunction::EmitCfiSlowPathCheck( EmitBlock(Cont); } +// Emit a stub for __cfi_check function so that the linker knows about this +// symbol in LTO mode. +void CodeGenFunction::EmitCfiCheckStub() { + llvm::Module *M = &CGM.getModule(); + auto &Ctx = M->getContext(); + llvm::Function *F = llvm::Function::Create( + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false), + llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M); + llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F); + // FIXME: consider emitting an intrinsic call like + // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2) + // which can be lowered in CrossDSOCFI pass to the actual contents of + // __cfi_check. This would allow inlining of __cfi_check calls. + llvm::CallInst::Create( + llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB); + llvm::ReturnInst::Create(Ctx, nullptr, BB); +} + // This function is basically a switch over the CFI failure kind, which is // extracted from CFICheckFailData (1st function argument). Each case is either // llvm.trap or a call to one of the two runtime handlers, based on @@ -2821,7 +2915,7 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) { if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeSet::FunctionIndex, A); + TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); } return TrapCall; @@ -3335,7 +3429,14 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { AlignmentSource AlignSource; Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource); QualType PtrTy = BaseExpr->getType()->getPointeeType(); - EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy); + SanitizerSet SkippedChecks; + bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr); + if (IsBaseCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsBaseCXXThis || isa<DeclRefExpr>(BaseExpr)) + SkippedChecks.set(SanitizerKind::Null, true); + EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy, + /*Alignment=*/CharUnits::Zero(), SkippedChecks); BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource); } else BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess); diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 009244784e504..49bbb4808eaa2 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -111,6 +111,13 @@ public: void VisitGenericSelectionExpr(GenericSelectionExpr *GE) { Visit(GE->getResultExpr()); } + void VisitCoawaitExpr(CoawaitExpr *E) { + CGF.EmitCoawaitExpr(*E, Dest, IsResultUnused); + } + void VisitCoyieldExpr(CoyieldExpr *E) { + CGF.EmitCoyieldExpr(*E, Dest, IsResultUnused); + } + void VisitUnaryCoawait(UnaryOperator *E) { Visit(E->getSubExpr()); } void VisitUnaryExtension(UnaryOperator *E) { Visit(E->getSubExpr()); } void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) { return Visit(E->getReplacement()); @@ -1267,7 +1274,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Store the initializer into the field. EmitInitializationToLValue(E->getInit(curInitIndex++), LV); } else { - // We're out of initalizers; default-initialize to null + // We're out of initializers; default-initialize to null EmitNullInitializationToLValue(LV); } diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 71c8fb8b7ae35..d02f074dd605f 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -24,7 +24,15 @@ using namespace clang; using namespace CodeGen; -static RequiredArgs +namespace { +struct MemberCallInfo { + RequiredArgs ReqArgs; + // Number of prefix arguments for the call. Ignores the `this` pointer. + unsigned PrefixSize; +}; +} + +static MemberCallInfo commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, @@ -48,6 +56,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); + unsigned PrefixSize = Args.size() - 1; // And the rest of the call args. if (RtlArgs) { @@ -65,7 +74,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, FPT->getNumParams() == 0 && "No CallExpr specified for function with non-zero number of arguments"); } - return required; + return {required, PrefixSize}; } RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( @@ -75,9 +84,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( const CallExpr *CE, CallArgList *RtlArgs) { const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); CallArgList Args; - RequiredArgs required = commonEmitCXXMemberOrOperatorCall( + MemberCallInfo CallInfo = commonEmitCXXMemberOrOperatorCall( *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs); - auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required); + auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall( + Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize); return EmitCall(FnInfo, Callee, ReturnValue, Args); } @@ -290,10 +300,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (CE) CallLoc = CE->getExprLoc(); - EmitTypeCheck(isa<CXXConstructorDecl>(CalleeDecl) - ? CodeGenFunction::TCK_ConstructorCall - : CodeGenFunction::TCK_MemberCall, - CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent())); + SanitizerSet SkippedChecks; + if (const auto *CMCE = dyn_cast<CXXMemberCallExpr>(CE)) { + auto *IOA = CMCE->getImplicitObjectArgument(); + bool IsImplicitObjectCXXThis = IsWrappedCXXThis(IOA); + if (IsImplicitObjectCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) + SkippedChecks.set(SanitizerKind::Null, true); + } + EmitTypeCheck( + isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall + : CodeGenFunction::TCK_MemberCall, + CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()), + /*Alignment=*/CharUnits::Zero(), SkippedChecks); // FIXME: Uses of 'MD' past this point need to be audited. We may need to use // 'CalleeDecl' instead. @@ -420,7 +440,8 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // And the rest of the call args EmitCallArgs(Args, FPT, E->arguments()); - return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), + return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required, + /*PrefixSize=*/0), Callee, ReturnValue, Args); } @@ -659,7 +680,10 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // Emit the array size expression. // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. - numElements = CGF.EmitScalarExpr(e->getArraySize()); + numElements = CGF.CGM.EmitConstantExpr(e->getArraySize(), + CGF.getContext().getSizeType(), &CGF); + if (!numElements) + numElements = CGF.EmitScalarExpr(e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); // The number of elements can be have an arbitrary integer type; @@ -1256,10 +1280,10 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { // FIXME: Add addAttribute to CallSite. if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke)) - CI->addAttribute(llvm::AttributeSet::FunctionIndex, + CI->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Builtin); else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke)) - II->addAttribute(llvm::AttributeSet::FunctionIndex, + II->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Builtin); else llvm_unreachable("unexpected kind of call instruction"); @@ -1560,7 +1584,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // FIXME: Why do we not pass a CalleeDecl here? EmitCallArgs(allocatorArgs, allocatorType, E->placement_arguments(), - /*CalleeDecl*/nullptr, /*ParamsToSkip*/ParamsToSkip); + /*AC*/AbstractCallee(), /*ParamsToSkip*/ParamsToSkip); RValue RV = EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs); diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 59bc9cdbc0567..980972370dc2b 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -110,6 +110,16 @@ public: VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) { return Visit(PE->getReplacement()); } + ComplexPairTy VisitCoawaitExpr(CoawaitExpr *S) { + return CGF.EmitCoawaitExpr(*S).getComplexVal(); + } + ComplexPairTy VisitCoyieldExpr(CoyieldExpr *S) { + return CGF.EmitCoyieldExpr(*S).getComplexVal(); + } + ComplexPairTy VisitUnaryCoawait(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + // l-values. ComplexPairTy VisitDeclRefExpr(DeclRefExpr *E) { @@ -198,7 +208,11 @@ public: ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) { CGF.enterFullExpression(E); CodeGenFunction::RunCleanupsScope Scope(CGF); - return Visit(E->getSubExpr()); + ComplexPairTy Vals = Visit(E->getSubExpr()); + // Defend against dominance problems caused by jumps out of expression + // evaluation through the shared cleanup block. + Scope.ForceCleanup({&Vals.first, &Vals.second}); + return Vals; } ComplexPairTy VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) { assert(E->getType()->isAnyComplexType() && "Expected complex type!"); diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 1b85c45cd4be1..a64303831171d 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenFunction.h" +#include "CGCleanup.h" #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" @@ -24,6 +25,7 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/CodeGenOptions.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -47,7 +49,7 @@ struct BinOpInfo { Value *RHS; QualType Ty; // Computation Type. BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform - bool FPContractable; + FPOptions FPFeatures; const Expr *E; // Entire expr, for error unsupported. May not be binop. }; @@ -58,6 +60,75 @@ static bool MustVisitNullValue(const Expr *E) { return E->getType()->isNullPtrType(); } +/// If \p E is a widened promoted integer, get its base (unpromoted) type. +static llvm::Optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx, + const Expr *E) { + const Expr *Base = E->IgnoreImpCasts(); + if (E == Base) + return llvm::None; + + QualType BaseTy = Base->getType(); + if (!BaseTy->isPromotableIntegerType() || + Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType())) + return llvm::None; + + return BaseTy; +} + +/// Check if \p E is a widened promoted integer. +static bool IsWidenedIntegerOp(const ASTContext &Ctx, const Expr *E) { + return getUnwidenedIntegerType(Ctx, E).hasValue(); +} + +/// Check if we can skip the overflow check for \p Op. +static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { + assert((isa<UnaryOperator>(Op.E) || isa<BinaryOperator>(Op.E)) && + "Expected a unary or binary operator"); + + if (const auto *UO = dyn_cast<UnaryOperator>(Op.E)) + return IsWidenedIntegerOp(Ctx, UO->getSubExpr()); + + const auto *BO = cast<BinaryOperator>(Op.E); + auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS()); + if (!OptionalLHSTy) + return false; + + auto OptionalRHSTy = getUnwidenedIntegerType(Ctx, BO->getRHS()); + if (!OptionalRHSTy) + return false; + + QualType LHSTy = *OptionalLHSTy; + QualType RHSTy = *OptionalRHSTy; + + // We usually don't need overflow checks for binary operations with widened + // operands. Multiplication with promoted unsigned operands is a special case. + if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) || + !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType()) + return true; + + // The overflow check can be skipped if either one of the unpromoted types + // are less than half the size of the promoted type. + unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType()); + return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize || + (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize; +} + +/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions. +static void updateFastMathFlags(llvm::FastMathFlags &FMF, + FPOptions FPFeatures) { + FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement()); +} + +/// Propagate fast-math flags from \p Op to the instruction in \p V. +static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) { + if (auto *I = dyn_cast<llvm::Instruction>(V)) { + llvm::FastMathFlags FMF = I->getFastMathFlags(); + updateFastMathFlags(FMF, Op.FPFeatures); + I->setFastMathFlags(FMF); + } + return V; +} + class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, Value*> { CodeGenFunction &CGF; @@ -221,6 +292,15 @@ public: Value *VisitGenericSelectionExpr(GenericSelectionExpr *GE) { return Visit(GE->getResultExpr()); } + Value *VisitCoawaitExpr(CoawaitExpr *S) { + return CGF.EmitCoawaitExpr(*S).getScalarVal(); + } + Value *VisitCoyieldExpr(CoyieldExpr *S) { + return CGF.EmitCoyieldExpr(*S).getScalarVal(); + } + Value *VisitUnaryCoawait(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } // Leaves. Value *VisitIntegerLiteral(const IntegerLiteral *E) { @@ -300,6 +380,24 @@ public: return V; } + Value *VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) { + VersionTuple Version = E->getVersion(); + + // If we're checking for a platform older than our minimum deployment + // target, we can fold the check away. + if (Version <= CGF.CGM.getTarget().getPlatformMinVersion()) + return llvm::ConstantInt::get(Builder.getInt1Ty(), 1); + + Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor(); + llvm::Value *Args[] = { + llvm::ConstantInt::get(CGF.CGM.Int32Ty, Version.getMajor()), + llvm::ConstantInt::get(CGF.CGM.Int32Ty, Min ? *Min : 0), + llvm::ConstantInt::get(CGF.CGM.Int32Ty, SMin ? *SMin : 0), + }; + + return CGF.EmitBuiltinAvailable(Args); + } + Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E); Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E); Value *VisitConvertVectorExpr(ConvertVectorExpr *E); @@ -405,11 +503,7 @@ public: return CGF.LoadCXXThis(); } - Value *VisitExprWithCleanups(ExprWithCleanups *E) { - CGF.enterFullExpression(E); - CodeGenFunction::RunCleanupsScope Scope(CGF); - return Visit(E->getSubExpr()); - } + Value *VisitExprWithCleanups(ExprWithCleanups *E); Value *VisitCXXNewExpr(const CXXNewExpr *E) { return CGF.EmitCXXNewExpr(E); } @@ -464,16 +558,21 @@ public: return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); // Fall through. case LangOptions::SOB_Trapping: + if (CanElideOverflowCheck(CGF.getContext(), Ops)) + return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); return EmitOverflowCheckedBinOp(Ops); } } if (Ops.Ty->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !CanElideOverflowCheck(CGF.getContext(), Ops)) return EmitOverflowCheckedBinOp(Ops); - if (Ops.LHS->getType()->isFPOrFPVectorTy()) - return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); + if (Ops.LHS->getType()->isFPOrFPVectorTy()) { + Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); + return propagateFMFlags(V, Ops); + } return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul"); } /// Create a binary op that checks for overflow. @@ -1616,6 +1715,16 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) { E->getExprLoc()); } +Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { + CGF.enterFullExpression(E); + CodeGenFunction::RunCleanupsScope Scope(CGF); + Value *V = Visit(E->getSubExpr()); + // Defend against dominance problems caused by jumps out of expression + // evaluation through the shared cleanup block. + Scope.ForceCleanup({&V}); + return V; +} + //===----------------------------------------------------------------------===// // Unary Operators //===----------------------------------------------------------------------===// @@ -1627,7 +1736,7 @@ static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E, BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false); BinOp.Ty = E->getType(); BinOp.Opcode = IsInc ? BO_Add : BO_Sub; - BinOp.FPContractable = false; + // FIXME: once UnaryOperator carries FPFeatures, copy it here. BinOp.E = E; return BinOp; } @@ -1645,6 +1754,8 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( return Builder.CreateNSWAdd(InVal, Amount, Name); // Fall through. case LangOptions::SOB_Trapping: + if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr())) + return Builder.CreateNSWAdd(InVal, Amount, Name); return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc)); } llvm_unreachable("Unknown SignedOverflowBehaviorTy"); @@ -1891,7 +2002,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); BinOp.Ty = E->getType(); BinOp.Opcode = BO_Sub; - BinOp.FPContractable = false; + // FIXME: once UnaryOperator carries FPFeatures, copy it here. BinOp.E = E; return EmitSub(BinOp); } @@ -2112,7 +2223,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { Result.RHS = Visit(E->getRHS()); Result.Ty = E->getType(); Result.Opcode = E->getOpcode(); - Result.FPContractable = E->isFPContractable(); + Result.FPFeatures = E->getFPFeatures(); Result.E = E; return Result; } @@ -2132,7 +2243,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( OpInfo.RHS = Visit(E->getRHS()); OpInfo.Ty = E->getComputationResultType(); OpInfo.Opcode = E->getOpcode(); - OpInfo.FPContractable = E->isFPContractable(); + OpInfo.FPFeatures = E->getFPFeatures(); OpInfo.E = E; // Load/convert the LHS. LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); @@ -2263,8 +2374,10 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck( SanitizerKind::IntegerDivideByZero)); } + const auto *BO = cast<BinaryOperator>(Ops.E); if (CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow) && - Ops.Ty->hasSignedIntegerRepresentation()) { + Ops.Ty->hasSignedIntegerRepresentation() && + !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS())) { llvm::IntegerType *Ty = cast<llvm::IntegerType>(Zero->getType()); llvm::Value *IntMin = @@ -2324,12 +2437,12 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) { // Rem in C can't be a floating point type: C99 6.5.5p2. - if (CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero)) { + if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || + CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && + Ops.Ty->isIntegerType()) { CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); - - if (Ops.Ty->isIntegerType()) - EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false); + EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false); } if (Ops.Ty->hasUnsignedIntegerRepresentation()) @@ -2577,12 +2690,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, "Only fadd/fsub can be the root of an fmuladd."); // Check whether this op is marked as fusable. - if (!op.FPContractable) - return nullptr; - - // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is - // either disabled, or handled entirely by the LLVM backend). - if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On) + if (!op.FPFeatures.allowFPContractWithinStatement()) return nullptr; // We have a potentially fusable op. Look for a mul on one of the operands. @@ -2616,12 +2724,15 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); // Fall through. case LangOptions::SOB_Trapping: + if (CanElideOverflowCheck(CGF.getContext(), op)) + return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); return EmitOverflowCheckedBinOp(op); } } if (op.Ty->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !CanElideOverflowCheck(CGF.getContext(), op)) return EmitOverflowCheckedBinOp(op); if (op.LHS->getType()->isFPOrFPVectorTy()) { @@ -2629,7 +2740,8 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) return FMulAdd; - return Builder.CreateFAdd(op.LHS, op.RHS, "add"); + Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add"); + return propagateFMFlags(V, op); } return Builder.CreateAdd(op.LHS, op.RHS, "add"); @@ -2647,19 +2759,23 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); // Fall through. case LangOptions::SOB_Trapping: + if (CanElideOverflowCheck(CGF.getContext(), op)) + return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); return EmitOverflowCheckedBinOp(op); } } if (op.Ty->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !CanElideOverflowCheck(CGF.getContext(), op)) return EmitOverflowCheckedBinOp(op); if (op.LHS->getType()->isFPOrFPVectorTy()) { // Try to form an fmuladd. if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) return FMulAdd; - return Builder.CreateFSub(op.LHS, op.RHS, "sub"); + Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub"); + return propagateFMFlags(V, op); } return Builder.CreateSub(op.LHS, op.RHS, "sub"); @@ -2751,8 +2867,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { isa<llvm::IntegerType>(Ops.LHS->getType())) { CodeGenFunction::SanitizerScope SanScope(&CGF); SmallVector<std::pair<Value *, SanitizerMask>, 2> Checks; - llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS); - llvm::Value *ValidExponent = Builder.CreateICmpULE(RHS, WidthMinusOne); + llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, Ops.RHS); + llvm::Value *ValidExponent = Builder.CreateICmpULE(Ops.RHS, WidthMinusOne); if (SanitizeExponent) { Checks.push_back( @@ -2767,12 +2883,14 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); llvm::BasicBlock *CheckShiftBase = CGF.createBasicBlock("check"); Builder.CreateCondBr(ValidExponent, CheckShiftBase, Cont); + llvm::Value *PromotedWidthMinusOne = + (RHS == Ops.RHS) ? WidthMinusOne + : GetWidthMinusOneValue(Ops.LHS, RHS); CGF.EmitBlock(CheckShiftBase); - llvm::Value *BitsShiftedOff = - Builder.CreateLShr(Ops.LHS, - Builder.CreateSub(WidthMinusOne, RHS, "shl.zeros", - /*NUW*/true, /*NSW*/true), - "shl.check"); + llvm::Value *BitsShiftedOff = Builder.CreateLShr( + Ops.LHS, Builder.CreateSub(PromotedWidthMinusOne, RHS, "shl.zeros", + /*NUW*/ true, /*NSW*/ true), + "shl.check"); if (CGF.getLangOpts().CPlusPlus) { // In C99, we are not permitted to shift a 1 bit into the sign bit. // Under C++11's rules, shifting a 1 bit into the sign bit is @@ -3038,10 +3156,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after // the assignment...'. - if (LHS.isBitField()) + if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); - else + } else { + CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); + } } // If the result is clearly ignored, return now. @@ -3327,9 +3447,11 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // safe to evaluate the LHS and RHS unconditionally. if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) && isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) { - CGF.incrementProfileCounter(E); - llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr); + llvm::Value *StepV = Builder.CreateZExtOrBitCast(CondV, CGF.Int64Ty); + + CGF.incrementProfileCounter(E, StepV); + llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); if (!LHS) { @@ -3491,8 +3613,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // vector to get a vec4, then a bitcast if the target type is different. if (NumElementsSrc == 3 && NumElementsDst != 3) { Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - DstTy); + + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + DstTy); + } + Src->setName("astype"); return Src; } @@ -3501,9 +3627,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // to vec4 if the original type is not vec4, then a shuffle vector to // get a vec3. if (NumElementsSrc != 3 && NumElementsDst == 3) { - auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - Vec4Ty); + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + Vec4Ty); + } + Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); Src->setName("astype"); return Src; diff --git a/lib/CodeGen/CGCUDABuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp index 44dd003757adf..48156b1b26b78 100644 --- a/lib/CodeGen/CGCUDABuiltin.cpp +++ b/lib/CodeGen/CGGPUBuiltin.cpp @@ -1,4 +1,4 @@ -//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===// +//===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// Generates code for built-in CUDA calls which are not runtime-specific. -// (Runtime-specific codegen lives in CGCUDARuntime.) +// Generates code for built-in GPU calls which are not runtime-specific. +// (Runtime-specific codegen lives in programming model specific files.) // //===----------------------------------------------------------------------===// @@ -67,10 +67,9 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { // Note that by the time this function runs, E's args have already undergone the // standard C vararg promotion (short -> int, float -> double, etc.). RValue -CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue) { - assert(getLangOpts().CUDA); - assert(getLangOpts().CUDAIsDevice); +CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + assert(getTarget().getTriple().isNVPTX()); assert(E->getBuiltinCallee() == Builtin::BIprintf); assert(E->getNumArgs() >= 1); // printf always has at least one arg. diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 932b8a129e6e8..3a09a15dbc152 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1,4 +1,4 @@ -//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// +//===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===// // // The LLVM Compiler Infrastructure // @@ -117,10 +117,22 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E); if (!ALE) DLE = cast<ObjCDictionaryLiteral>(E); - - // Compute the type of the array we're initializing. + + // Optimize empty collections by referencing constants, when available. uint64_t NumElements = ALE ? ALE->getNumElements() : DLE->getNumElements(); + if (NumElements == 0 && CGM.getLangOpts().ObjCRuntime.hasEmptyCollections()) { + StringRef ConstantName = ALE ? "__NSArray0__" : "__NSDictionary0__"; + QualType IdTy(CGM.getContext().getObjCIdType()); + llvm::Constant *Constant = + CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName); + Address Addr(Constant, Context.getTypeAlignInChars(IdTy)); + LValue LV = MakeAddrLValue(Addr, IdTy); + return Builder.CreateBitCast(EmitLoadOfScalar(LV, E->getLocStart()), + ConvertType(E->getType())); + } + + // Compute the type of the array we're initializing. llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()), NumElements); QualType ElementType = Context.getObjCIdType().withConst(); @@ -427,7 +439,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, QualType ResultType = method ? method->getReturnType() : E->getType(); CallArgList Args; - EmitCallArgs(Args, method, E->arguments()); + EmitCallArgs(Args, method, E->arguments(), /*AC*/AbstractCallee(method)); // For delegate init calls in ARC, do an unsafe store of null into // self. This represents the call taking direct ownership of that @@ -1316,7 +1328,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, BinaryOperator assign(&ivarRef, finalArg, BO_Assign, ivarRef.getType(), VK_RValue, OK_Ordinary, - SourceLocation(), false); + SourceLocation(), FPOptions()); EmitStmt(&assign); } @@ -1469,6 +1481,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ if (DI) DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); + RunCleanupsScope ForScope(*this); + // The local variable comes into scope immediately. AutoVarEmission variable = AutoVarEmission::invalid(); if (const DeclStmt *SD = dyn_cast<DeclStmt>(S.getElement())) @@ -1499,8 +1513,6 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); - RunCleanupsScope ForScope(*this); - // Emit the collection pointer. In ARC, we do a retain. llvm::Value *Collection; if (getLangOpts().ObjCAutoRefCount) { @@ -1802,26 +1814,49 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { } +static bool IsForwarding(StringRef Name) { + return llvm::StringSwitch<bool>(Name) + .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV + "objc_autorelease", // ARCInstKind::Autorelease + "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV + "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV + "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease + "objc_retainedObject", // ARCInstKind::NoopCast + "objc_retain", // ARCInstKind::Retain + "objc_unretainedObject", // ARCInstKind::NoopCast + "objc_unretainedPointer", // ARCInstKind::NoopCast + "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV + true) + .Default(false); +} + static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, - llvm::FunctionType *type, - StringRef fnName) { - llvm::Constant *fn = CGM.CreateRuntimeFunction(type, fnName); + llvm::FunctionType *FTy, + StringRef Name) { + llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name); - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) { + if (auto *F = dyn_cast<llvm::Function>(RTF)) { // If the target runtime doesn't naturally support ARC, emit weak // references to the runtime support library. We don't really // permit this to fail, but we need a particular relocation style. if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() && !CGM.getTriple().isOSBinFormatCOFF()) { - f->setLinkage(llvm::Function::ExternalWeakLinkage); - } else if (fnName == "objc_retain" || fnName == "objc_release") { + F->setLinkage(llvm::Function::ExternalWeakLinkage); + } else if (Name == "objc_retain" || Name == "objc_release") { // If we have Native ARC, set nonlazybind attribute for these APIs for // performance. - f->addFnAttr(llvm::Attribute::NonLazyBind); + F->addFnAttr(llvm::Attribute::NonLazyBind); + } + + if (IsForwarding(Name)) { + llvm::AttrBuilder B; + B.addAttribute(llvm::Attribute::Returned); + + F->arg_begin()->addAttr(llvm::AttributeList::get(F->getContext(), 1, B)); } } - return fn; + return RTF; } /// Perform an operation having the signature @@ -1832,7 +1867,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, llvm::Constant *&fn, StringRef fnName, bool isTailCall = false) { - if (isa<llvm::ConstantPointerNull>(value)) return value; + if (isa<llvm::ConstantPointerNull>(value)) + return value; if (!fn) { llvm::FunctionType *fnType = @@ -3239,7 +3275,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment()); CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(), - VK_LValue, SourceLocation(), false); + VK_LValue, SourceLocation(), FPOptions()); EmitStmt(&TheCall); @@ -3375,5 +3411,54 @@ CodeGenFunction::EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty) { return Val; } +llvm::Value * +CodeGenFunction::EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args) { + assert(Args.size() == 3 && "Expected 3 argument here!"); + + if (!CGM.IsOSVersionAtLeastFn) { + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int32Ty, {Int32Ty, Int32Ty, Int32Ty}, false); + CGM.IsOSVersionAtLeastFn = + CGM.CreateRuntimeFunction(FTy, "__isOSVersionAtLeast"); + } + + llvm::Value *CallRes = + EmitNounwindRuntimeCall(CGM.IsOSVersionAtLeastFn, Args); + + return Builder.CreateICmpNE(CallRes, llvm::Constant::getNullValue(Int32Ty)); +} + +void CodeGenModule::emitAtAvailableLinkGuard() { + if (!IsOSVersionAtLeastFn) + return; + // @available requires CoreFoundation only on Darwin. + if (!Target.getTriple().isOSDarwin()) + return; + // Add -framework CoreFoundation to the linker commands. We still want to + // emit the core foundation reference down below because otherwise if + // CoreFoundation is not used in the code, the linker won't link the + // framework. + auto &Context = getLLVMContext(); + llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), + llvm::MDString::get(Context, "CoreFoundation")}; + LinkerOptionsMetadata.push_back(llvm::MDNode::get(Context, Args)); + // Emit a reference to a symbol from CoreFoundation to ensure that + // CoreFoundation is linked into the final binary. + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false); + llvm::Constant *CFFunc = + CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber"); + + llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); + llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction( + CheckFTy, "__clang_at_available_requires_core_foundation_framework")); + CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); + CodeGenFunction CGF(*this); + CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); + CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy)); + CGF.Builder.CreateUnreachable(); + addCompilerUsedGlobal(CFLinkCheckFunc); +} CGObjCRuntime::~CGObjCRuntime() {} diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index fa2b3d81e29a4..9f6ccb4b5d266 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -18,7 +18,7 @@ #include "CGCleanup.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" @@ -2207,7 +2207,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { IvarNames.push_back(MakeConstantString(IVD->getNameAsString())); // Get the type encoding for this ivar std::string TypeStr; - Context.getObjCEncodingForType(IVD->getType(), TypeStr); + Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD); IvarTypes.push_back(MakeConstantString(TypeStr)); // Get the offset uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 7219592fffcd7..43b347ce353f0 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -17,7 +17,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" @@ -64,13 +64,11 @@ private: // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; - return - CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, - params, true), - "objc_msgSend", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NonLazyBind)); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind)); } /// void objc_msgSend_stret (id, SEL, ...) @@ -589,13 +587,11 @@ public: llvm::Constant *getSetJmpFn() { // This is specifically the prototype for x86. llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() }; - return - CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, - params, false), - "_setjmp", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NonLazyBind)); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind)); } public: diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index 9062936fdd149..db02c631c9e64 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -58,9 +58,6 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { case BuiltinType::OCLQueue: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.queue_t"), 0); - case BuiltinType::OCLNDRange: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0); case BuiltinType::OCLReserveID: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0); diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 40252171368b8..874b6a69e513f 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -15,7 +15,7 @@ #include "CGCleanup.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" @@ -842,12 +842,12 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); } -llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { +static llvm::Value *emitParallelOrTeamsOutlinedFunction( + CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); - const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); bool HasCancel = false; if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) @@ -857,11 +857,27 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, - HasCancel, getOutlinedHelperName()); + HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } +llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); + return emitParallelOrTeamsOutlinedFunction( + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); +} + +llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); + return emitParallelOrTeamsOutlinedFunction( + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); +} + llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, @@ -2958,7 +2974,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create the offloading info metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - // Auxiliar methods to create metadata values and strings. + // Auxiliary methods to create metadata values and strings. auto getMDInt = [&](unsigned v) { return llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); @@ -3561,7 +3577,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, SharedRefLValue.getAddress(), Type); } else { // Initialize firstprivate array using element-by-element - // intialization. + // initialization. CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, @@ -3764,9 +3780,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; auto *TaskPrivatesMapTy = - std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), - 3) - ->getType(); + std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( @@ -4006,8 +4020,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, - NumDependencies, &TaskArgs, + auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, + &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { if (!Data.Tied) { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); @@ -4241,12 +4255,10 @@ static void emitReductionCombiner(CodeGenFunction &CGF, CGF.EmitIgnoredExpr(ReductionOp); } -static llvm::Value *emitReductionFunction(CodeGenModule &CGM, - llvm::Type *ArgsType, - ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps) { +llvm::Value *CGOpenMPRuntime::emitReductionFunction( + CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { auto &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); @@ -4329,11 +4341,11 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, return Fn; } -static void emitSingleReductionCombiner(CodeGenFunction &CGF, - const Expr *ReductionOp, - const Expr *PrivateRef, - const DeclRefExpr *LHS, - const DeclRefExpr *RHS) { +void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS) { if (PrivateRef->getType()->isArrayType()) { // Emit reduction for array section. auto *LHSVar = cast<VarDecl>(LHS->getDecl()); @@ -4353,9 +4365,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps, - bool WithNowait, bool SimpleReduction) { + ReductionOptionsTy Options) { if (!CGF.HaveInsertPoint()) return; + + bool WithNowait = Options.WithNowait; + bool SimpleReduction = Options.SimpleReduction; + // Next code should be emitted for reduction: // // static kmp_critical_name lock = { 0 }; @@ -4497,12 +4513,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, }; auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &RT = CGF.CGM.getOpenMPRuntime(); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), - cast<DeclRefExpr>(*IRHS)); + RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); ++IPriv; ++ILHS; ++IRHS; @@ -4562,7 +4579,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } if (XExpr) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto &&AtomicRedGen = [BO, VD, IPriv, + auto &&AtomicRedGen = [BO, VD, Loc](CodeGenFunction &CGF, const Expr *XExpr, const Expr *EExpr, const Expr *UpExpr) { LValue X = CGF.EmitLValue(XExpr); @@ -4572,7 +4589,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::AtomicOrdering::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { + [&CGF, UpExpr, VD, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate( VD, [&CGF, VD, XRValue, Loc]() -> Address { @@ -4874,25 +4891,45 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) { return Body; } -/// \brief Emit the num_teams clause of an enclosed teams directive at the -/// target region scope. If there is no teams directive associated with the -/// target directive, or if there is no num_teams clause associated with the -/// enclosed teams directive, return nullptr. +/// Emit the number of teams for a target directive. Inspect the num_teams +/// clause associated with a teams construct combined or closely nested +/// with the target directive. +/// +/// Emit a team of size one for directives such as 'target parallel' that +/// have no associated teams construct. +/// +/// Otherwise, return nullptr. static llvm::Value * -emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - // FIXME: For the moment we do not support combined directives with target and - // teams, so we do not expect to get any num_teams clause in the provided - // directive. Once we support that, this assertion can be replaced by the - // actual emission of the clause expression. - assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && - "Not expecting clause in directive."); + auto &Bld = CGF.Builder; + + // If the target directive is combined with a teams directive: + // Return the value in the num_teams clause, if any. + // Otherwise, return 0 to denote the runtime default. + if (isOpenMPTeamsDirective(D.getDirectiveKind())) { + if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { + CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); + auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // The default value is 0. + return Bld.getInt32(0); + } + + // If the target directive is combined with a parallel directive but not a + // teams directive, start one team. + if (isOpenMPParallelDirective(D.getDirectiveKind())) + return Bld.getInt32(1); // If the current target region has a teams region enclosed, we need to get // the number of teams to pass to the runtime function call. This is done @@ -4910,38 +4947,92 @@ emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); } // If we have an enclosed teams directive but no num_teams clause we use // the default value 0. - return CGF.Builder.getInt32(0); + return Bld.getInt32(0); } // No teams associated with the directive. return nullptr; } -/// \brief Emit the thread_limit clause of an enclosed teams directive at the -/// target region scope. If there is no teams directive associated with the -/// target directive, or if there is no thread_limit clause associated with the -/// enclosed teams directive, return nullptr. +/// Emit the number of threads for a target directive. Inspect the +/// thread_limit clause associated with a teams construct combined or closely +/// nested with the target directive. +/// +/// Emit the num_threads clause for directives such as 'target parallel' that +/// have no associated teams construct. +/// +/// Otherwise, return nullptr. static llvm::Value * -emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - // FIXME: For the moment we do not support combined directives with target and - // teams, so we do not expect to get any thread_limit clause in the provided - // directive. Once we support that, this assertion can be replaced by the - // actual emission of the clause expression. - assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && - "Not expecting clause in directive."); + auto &Bld = CGF.Builder; + + // + // If the target directive is combined with a teams directive: + // Return the value in the thread_limit clause, if any. + // + // If the target directive is combined with a parallel directive: + // Return the value in the num_threads clause, if any. + // + // If both clauses are set, select the minimum of the two. + // + // If neither teams or parallel combined directives set the number of threads + // in a team, return 0 to denote the runtime default. + // + // If this is not a teams directive return nullptr. + + if (isOpenMPTeamsDirective(D.getDirectiveKind()) || + isOpenMPParallelDirective(D.getDirectiveKind())) { + llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); + llvm::Value *NumThreadsVal = nullptr; + llvm::Value *ThreadLimitVal = nullptr; + + if (const auto *ThreadLimitClause = + D.getSingleClause<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), + /*IgnoreResultAssign*/ true); + ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } + + if (const auto *NumThreadsClause = + D.getSingleClause<OMPNumThreadsClause>()) { + CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + llvm::Value *NumThreads = + CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign*/ true); + NumThreadsVal = + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); + } + + // Select the lesser of thread_limit and num_threads. + if (NumThreadsVal) + ThreadLimitVal = ThreadLimitVal + ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, + ThreadLimitVal), + NumThreadsVal, ThreadLimitVal) + : NumThreadsVal; + + // Set default value passed to the runtime if either teams or a target + // parallel type directive is found but no clause is specified. + if (!ThreadLimitVal) + ThreadLimitVal = DefaultThreadLimitVal; + + return ThreadLimitVal; + } // If the current target region has a teams region enclosed, we need to get // the thread limit to pass to the runtime function call. This is done @@ -5984,8 +6075,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFnID, OffloadError, OffloadErrorQType, + auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFnID, OffloadError, &D](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. @@ -6021,24 +6112,50 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); - auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); + auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); + auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); - // If we have NumTeams defined this means that we have an enclosed teams - // region. Therefore we also expect to have ThreadLimit defined. These two - // values should be defined in the presence of a teams directive, regardless - // of having any clauses associated. If the user is using teams but no - // clauses, these two values will be the default that should be passed to - // the runtime library - a 32-bit integer with the value zero. + // The target region is an outlined function launched by the runtime + // via calls __tgt_target() or __tgt_target_teams(). + // + // __tgt_target() launches a target region with one team and one thread, + // executing a serial region. This master thread may in turn launch + // more threads within its team upon encountering a parallel region, + // however, no additional teams can be launched on the device. + // + // __tgt_target_teams() launches a target region with one or more teams, + // each with one or more threads. This call is required for target + // constructs such as: + // 'target teams' + // 'target' / 'teams' + // 'target teams distribute parallel for' + // 'target parallel' + // and so on. + // + // Note that on the host and CPU targets, the runtime implementation of + // these calls simply call the outlined function without forking threads. + // The outlined functions themselves have runtime calls to + // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by + // the compiler in emitTeamsCall() and emitParallelCall(). + // + // In contrast, on the NVPTX target, the implementation of + // __tgt_target_teams() launches a GPU kernel with the requested number + // of teams and threads so no additional calls to the runtime are required. if (NumTeams) { - assert(ThreadLimit && "Thread limit expression should be available along " - "with number of teams."); + // If we have NumTeams defined this means that we have an enclosed teams + // region. Therefore we also expect to have NumThreads defined. These two + // values should be defined in the presence of a teams directive, + // regardless of having any clauses associated. If the user is using teams + // but no clauses, these two values will be the default that should be + // passed to the runtime library - a 32-bit integer with the value zero. + assert(NumThreads && "Thread limit expression should be available along " + "with number of teams."); llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, NumTeams, - ThreadLimit}; + NumThreads}; Return = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { @@ -6095,17 +6212,18 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (!S) return; - // If we find a OMP target directive, codegen the outline function and - // register the result. - // FIXME: Add other directives with target when they become supported. - bool isTargetDirective = isa<OMPTargetDirective>(S); + // Codegen OMP target directives that offload compute to the device. + bool requiresDeviceCodegen = + isa<OMPExecutableDirective>(S) && + isOpenMPTargetExecutionDirective( + cast<OMPExecutableDirective>(S)->getDirectiveKind()); - if (isTargetDirective) { - auto *E = cast<OMPExecutableDirective>(S); + if (requiresDeviceCodegen) { + auto &E = *cast<OMPExecutableDirective>(S); unsigned DeviceID; unsigned FileID; unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, + getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, FileID, Line); // Is this a target region that should not be emitted as an entry point? If @@ -6114,13 +6232,22 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, ParentName, Line)) return; - llvm::Function *Fn; - llvm::Constant *Addr; - std::tie(Fn, Addr) = - CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( - CGM, cast<OMPTargetDirective>(*E), ParentName, - /*isOffloadEntry=*/true); - assert(Fn && Addr && "Target region emission failed."); + switch (S->getStmtClass()) { + case Stmt::OMPTargetDirectiveClass: + CodeGenFunction::EmitOMPTargetDeviceFunction( + CGM, ParentName, cast<OMPTargetDirective>(*S)); + break; + case Stmt::OMPTargetParallelDirectiveClass: + CodeGenFunction::EmitOMPTargetParallelDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); + break; + case Stmt::OMPTargetTeamsDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); + break; + default: + llvm_unreachable("Unknown target directive for OpenMP device codegen."); + } return; } @@ -6271,8 +6398,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction]( - CodeGenFunction &CGF, PrePostActionTy &) { + auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, + PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; @@ -6318,8 +6445,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( }; // Generate code for the closing of the data region. - auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; @@ -6397,7 +6523,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( "Expecting either target enter, exit data, or update directives."); // Generate the code for the opening of the data environment. - auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 61ddc702ed24a..7901a6b7a8fce 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -527,6 +527,7 @@ public: /// Get combiner/initializer for the specified user-defined reduction, if any. virtual std::pair<llvm::Function *, llvm::Function *> getUserDefinedReduction(const OMPDeclareReductionDecl *D); + /// \brief Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -535,7 +536,19 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitParallelOrTeamsOutlinedFunction( + virtual llvm::Value *emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + + /// \brief Emits outlined function for the specified OpenMP teams directive + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + virtual llvm::Value *emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); @@ -880,6 +893,32 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel = false); + + /// Emits reduction function. + /// \param ArgsType Array type containing pointers to reduction variables. + /// \param Privates List of private copies for original reduction arguments. + /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. + /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' + /// or 'operator binop(LHS, RHS)'. + llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps); + + /// Emits single reduction combiner + void emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS); + + struct ReductionOptionsTy { + bool WithNowait; + bool SimpleReduction; + OpenMPDirectiveKind ReductionKind; + }; /// \brief Emit a code for reduction clause. Next code should be emitted for /// reduction: /// \code @@ -916,14 +955,18 @@ public: /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. - /// \param WithNowait true if parent directive has also nowait clause, false - /// otherwise. + /// \param Options List of options for reduction codegen: + /// WithNowait true if parent directive has also nowait clause, false + /// otherwise. + /// SimpleReduction Emit reduction operation only. Used for omp simd + /// directive on the host. + /// ReductionKind The kind of reduction to perform. virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps, - bool WithNowait, bool SimpleReduction); + ReductionOptionsTy Options); /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); @@ -991,7 +1034,7 @@ public: virtual bool emitTargetGlobalVariable(GlobalDecl GD); /// \brief Emit the global \a GD if it is meaningful for the target. Returns - /// if it was emitted succesfully. + /// if it was emitted successfully. /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 6a6d832e33cd5..c3391d087b759 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -26,6 +26,11 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_kernel_init, /// \brief Call to void __kmpc_kernel_deinit(); OMPRTL_NVPTX__kmpc_kernel_deinit, + /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + /// short RequiresOMPRuntime, short RequiresDataSharing); + OMPRTL_NVPTX__kmpc_spmd_kernel_init, + /// \brief Call to void __kmpc_spmd_kernel_deinit(); + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void /// *outlined_function); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, @@ -39,6 +44,30 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 /// global_tid); OMPRTL_NVPTX__kmpc_end_serialized_parallel, + /// \brief Call to int32_t __kmpc_shuffle_int32(int32_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int32, + /// \brief Call to int64_t __kmpc_shuffle_int64(int64_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int64, + /// \brief Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, + /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, + /// int32_t num_vars, size_t reduce_size, void *reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), + /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, + /// int32_t index, int32_t width), + /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t + /// index, int32_t width, int32_t reduce)) + OMPRTL_NVPTX__kmpc_teams_reduce_nowait, + /// \brief Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); + OMPRTL_NVPTX__kmpc_end_reduce_nowait }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -76,6 +105,47 @@ public: CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; + +// A class to track the execution mode when codegening directives within +// a target region. The appropriate mode (generic/spmd) is set on entry +// to the target region and used by containing directives such as 'parallel' +// to emit optimized code. +class ExecutionModeRAII { +private: + CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; + CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; + +public: + ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, + CGOpenMPRuntimeNVPTX::ExecutionMode NewMode) + : Mode(Mode) { + SavedMode = Mode; + Mode = NewMode; + } + ~ExecutionModeRAII() { Mode = SavedMode; } +}; + +/// GPU Configuration: This information can be derived from cuda registers, +/// however, providing compile time constants helps generate more efficient +/// code. For all practical purposes this is fine because the configuration +/// is the same for all known NVPTX architectures. +enum MachineConfiguration : unsigned { + WarpSize = 32, + /// Number of bits required to represent a lane identifier, which is + /// computed as log_2(WarpSize). + LaneIDBits = 5, + LaneIDMask = WarpSize - 1, + + /// Global memory alignment for performance. + GlobalMemoryAlignment = 256, +}; + +enum NamedBarrier : unsigned { + /// Synchronize on this barrier #ID using a named barrier primitive. + /// Only the subset of active threads in a parallel region arrive at the + /// barrier. + NB_Parallel = 1, +}; } // anonymous namespace /// Get the GPU warp size. @@ -96,6 +166,23 @@ static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { llvm::None, "nvptx_tid"); } +/// Get the id of the warp in the block. +/// We assume that the warp size is 32, which is always the case +/// on the NVPTX device, to generate more efficient code. +static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id"); +} + +/// Get the id of the current lane in the Warp. +/// We assume that the warp size is 32, which is always the case +/// on the NVPTX device, to generate more efficient code. +static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask), + "nvptx_lane_id"); +} + /// Get the maximum number of threads in a block of the GPU. static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; @@ -112,16 +199,37 @@ static void getNVPTXCTABarrier(CodeGenFunction &CGF) { &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } +/// Get barrier #ID to synchronize selected (multiple of warp size) threads in +/// a CTA. +static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, + llvm::Value *NumThreads) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; + Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(), + llvm::Intrinsic::nvvm_barrier), + Args); +} + /// Synchronize all GPU threads in a block. static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } +/// Synchronize worker threads in a parallel region. +static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) { + return getNVPTXBarrier(CGF, NB_Parallel, NumThreads); +} + /// Get the value of the thread_limit clause in the teams directive. -/// The runtime encodes thread_limit in the launch parameter, always starting -/// thread_limit+warpSize threads per team. -static llvm::Value *getThreadLimit(CodeGenFunction &CGF) { +/// For the 'generic' execution mode, the runtime encodes thread_limit in +/// the launch parameters, always starting thread_limit+warpSize threads per +/// CTA. The threads in the last warp are reserved for master execution. +/// For the 'spmd' execution mode, all threads in a CTA are part of the team. +static llvm::Value *getThreadLimit(CodeGenFunction &CGF, + bool IsInSpmdExecutionMode = false) { CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), - "thread_limit"); + return IsInSpmdExecutionMode + ? getNVPTXNumThreads(CGF) + : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), + "thread_limit"); } /// Get the thread id of the OMP master thread. @@ -159,12 +267,34 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI); } +bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const { + return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; +} + +static CGOpenMPRuntimeNVPTX::ExecutionMode +getExecutionModeForDirective(CodeGenModule &CGM, + const OMPExecutableDirective &D) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + switch (DirectiveKind) { + case OMPD_target: + case OMPD_target_teams: + return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; + case OMPD_target_parallel: + return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; + default: + llvm_unreachable("Unsupported directive on NVPTX device."); + } + llvm_unreachable("Unsupported directive on NVPTX device."); +} + void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + ExecutionModeRAII ModeRAII(CurrentExecutionMode, + CGOpenMPRuntimeNVPTX::ExecutionMode::Generic); EntryFunctionState EST; WorkerFunctionState WST(CGM); Work.clear(); @@ -252,6 +382,94 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF, EST.ExitBB = nullptr; } +void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) { + ExecutionModeRAII ModeRAII(CurrentExecutionMode, + CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd); + EntryFunctionState EST; + + // Emit target region as a standalone region. + class NVPTXPrePostActionTy : public PrePostActionTy { + CGOpenMPRuntimeNVPTX &RT; + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; + const OMPExecutableDirective &D; + + public: + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + const OMPExecutableDirective &D) + : RT(RT), EST(EST), D(D) {} + void Enter(CodeGenFunction &CGF) override { + RT.emitSpmdEntryHeader(CGF, EST, D); + } + void Exit(CodeGenFunction &CGF) override { + RT.emitSpmdEntryFooter(CGF, EST); + } + } Action(*this, EST, D); + CodeGen.setAction(Action); + emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, + IsOffloadEntry, CodeGen); + return; +} + +void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( + CodeGenFunction &CGF, EntryFunctionState &EST, + const OMPExecutableDirective &D) { + auto &Bld = CGF.Builder; + + // Setup BBs in entry function. + llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); + EST.ExitBB = CGF.createBasicBlock(".exit"); + + // Initialize the OMP state in the runtime; called by all active threads. + // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters + // based on code analysis of the target region. + llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true), + /*RequiresOMPRuntime=*/Bld.getInt16(1), + /*RequiresDataSharing=*/Bld.getInt16(1)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + CGF.EmitBranch(ExecuteBB); + + CGF.EmitBlock(ExecuteBB); +} + +void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, + EntryFunctionState &EST) { + if (!EST.ExitBB) + EST.ExitBB = CGF.createBasicBlock(".exit"); + + llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit"); + CGF.EmitBranch(OMPDeInitBB); + + CGF.EmitBlock(OMPDeInitBB); + // DeInitialize the OMP state in the runtime; called by all active threads. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None); + CGF.EmitBranch(EST.ExitBB); + + CGF.EmitBlock(EST.ExitBB); + EST.ExitBB = nullptr; +} + +// Create a unique global variable to indicate the execution mode of this target +// region. The execution mode is either 'generic', or 'spmd' depending on the +// target directive. This variable is picked up by the offload library to setup +// the device appropriately before kernel launch. If the execution mode is +// 'generic', the runtime reserves one warp for the master, otherwise, all +// warps participate in parallel work. +static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, + CGOpenMPRuntimeNVPTX::ExecutionMode Mode) { + (void)new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode")); +} + void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { auto &Ctx = CGM.getContext(); @@ -385,6 +603,22 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; } + case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { + // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + // short RequiresOMPRuntime, short RequiresDataSharing); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); + break; + } + case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: { + // Build void __kmpc_spmd_kernel_deinit(); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit"); + break; + } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( /// void *outlined_function); @@ -428,6 +662,103 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; } + case OMPRTL_NVPTX__kmpc_shuffle_int32: { + // Build int32_t __kmpc_shuffle_int32(int32_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); + break; + } + case OMPRTL_NVPTX__kmpc_shuffle_int64: { + // Build int64_t __kmpc_shuffle_int64(int64_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); + break; + } + case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: { + // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t Algorithm Version), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); + break; + } + case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { + // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, + // int32_t num_vars, size_t reduce_size, void *reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t shortCircuit), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), + // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, + // int32_t index, int32_t width), + // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, + // int32_t index, int32_t width, int32_t reduce)) + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, + CGM.Int32Ty, CGM.Int32Ty}; + auto *CopyToScratchpadFnTy = + llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams, + /*isVarArg=*/false); + llvm::Type *LoadReduceTypeParams[] = { + CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; + auto *LoadReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo(), + CopyToScratchpadFnTy->getPointerTo(), + LoadReduceFnTy->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait"); + break; + } + case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); + break; + } } return RTLFn; } @@ -463,39 +794,74 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); - emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); + CGOpenMPRuntimeNVPTX::ExecutionMode Mode = + getExecutionModeForDirective(CGM, D); + switch (Mode) { + case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic: + emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + CodeGen); + break; + case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd: + emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + CodeGen); + break; + case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown: + llvm_unreachable( + "Unknown programming model for OpenMP directive on NVPTX target."); + } + + setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM) { + : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); } +void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) { + // Do nothing in case of Spmd mode and L0 parallel. + // TODO: If in Spmd mode and L1 parallel emit the clause. + if (isInSpmdExecutionMode()) + return; + + CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); +} + +void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + // Do nothing in case of Spmd mode and L0 parallel. + // TODO: If in Spmd mode and L1 parallel emit the clause. + if (isInSpmdExecutionMode()) + return; + + CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); +} + void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) {} -llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction( +llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar, + InnermostKind, CodeGen); +} - llvm::Function *OutlinedFun = nullptr; - if (isa<OMPTeamsDirective>(D)) { - llvm::Value *OutlinedFunVal = - CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); - OutlinedFun = cast<llvm::Function>(OutlinedFunVal); - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } else { - llvm::Value *OutlinedFunVal = - CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); - OutlinedFun = cast<llvm::Function>(OutlinedFunVal); - } +llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + + llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( + D, ThreadIDVar, InnermostKind, CodeGen); + llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); + OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); return OutlinedFun; } @@ -525,7 +891,10 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall( if (!CGF.HaveInsertPoint()) return; - emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + if (isInSpmdExecutionMode()) + emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + else + emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); } void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( @@ -533,8 +902,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { llvm::Function *Fn = cast<llvm::Function>(OutlinedFn); - auto &&L0ParallelGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) { CGBuilderTy &Bld = CGF.Builder; // Prepare for parallel region. Indicate the outlined function. @@ -565,8 +933,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; @@ -596,3 +964,1289 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( ThenRCG(CGF); } } + +void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { + // Just call the outlined function to execute the parallel region. + // OutlinedFn(>id, &zero, CapturedStruct); + // + // TODO: Do something with IfCond when support for the 'if' clause + // is added on Spmd target directives. + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back( + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + OutlinedFnArgs.push_back( + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); +} + +/// This function creates calls to one of two shuffle functions to copy +/// variables between lanes in a warp. +static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, + QualType ElemTy, + llvm::Value *Elem, + llvm::Value *Offset) { + auto &CGM = CGF.CGM; + auto &C = CGM.getContext(); + auto &Bld = CGF.Builder; + CGOpenMPRuntimeNVPTX &RT = + *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime())); + + unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity(); + assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction."); + + OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4 + ? OMPRTL_NVPTX__kmpc_shuffle_int32 + : OMPRTL_NVPTX__kmpc_shuffle_int64; + + // Cast all types to 32- or 64-bit values before calling shuffle routines. + auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; + auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy); + auto *WarpSize = CGF.EmitScalarConversion( + getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true), + C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation()); + + auto *ShuffledVal = + CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn), + {ElemCast, Offset, WarpSize}); + + return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy)); +} + +namespace { +enum CopyAction : unsigned { + // RemoteLaneToThread: Copy over a Reduce list from a remote lane in + // the warp using shuffle instructions. + RemoteLaneToThread, + // ThreadCopy: Make a copy of a Reduce list on the thread's stack. + ThreadCopy, + // ThreadToScratchpad: Copy a team-reduced array to the scratchpad. + ThreadToScratchpad, + // ScratchpadToThread: Copy from a scratchpad array in global memory + // containing team-reduced data to a thread's stack. + ScratchpadToThread, +}; +} // namespace + +struct CopyOptionsTy { + llvm::Value *RemoteLaneOffset; + llvm::Value *ScratchpadIndex; + llvm::Value *ScratchpadWidth; +}; + +/// Emit instructions to copy a Reduce list, which contains partially +/// aggregated values, in the specified direction. +static void emitReductionListCopy( + CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, + ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase, + CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) { + + auto &CGM = CGF.CGM; + auto &C = CGM.getContext(); + auto &Bld = CGF.Builder; + + auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; + auto *ScratchpadIndex = CopyOptions.ScratchpadIndex; + auto *ScratchpadWidth = CopyOptions.ScratchpadWidth; + + // Iterates, element-by-element, through the source Reduce list and + // make a copy. + unsigned Idx = 0; + unsigned Size = Privates.size(); + for (auto &Private : Privates) { + Address SrcElementAddr = Address::invalid(); + Address DestElementAddr = Address::invalid(); + Address DestElementPtrAddr = Address::invalid(); + // Should we shuffle in an element from a remote lane? + bool ShuffleInElement = false; + // Set to true to update the pointer in the dest Reduce list to a + // newly created element. + bool UpdateDestListPtr = false; + // Increment the src or dest pointer to the scratchpad, for each + // new element. + bool IncrScratchpadSrc = false; + bool IncrScratchpadDest = false; + + switch (Action) { + case RemoteLaneToThread: { + // Step 1.1: Get the address for the src element in the Reduce list. + Address SrcElementPtrAddr = + Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( + SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + SrcElementAddr = + Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + + // Step 1.2: Create a temporary to store the element in the destination + // Reduce list. + DestElementPtrAddr = + Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementAddr = + CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); + ShuffleInElement = true; + UpdateDestListPtr = true; + break; + } + case ThreadCopy: { + // Step 1.1: Get the address for the src element in the Reduce list. + Address SrcElementPtrAddr = + Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( + SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + SrcElementAddr = + Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + + // Step 1.2: Get the address for dest element. The destination + // element has already been created on the thread's stack. + DestElementPtrAddr = + Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + llvm::Value *DestElementPtr = + CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()); + Address DestElemAddr = + Address(DestElementPtr, C.getTypeAlignInChars(Private->getType())); + DestElementAddr = Bld.CreateElementBitCast( + DestElemAddr, CGF.ConvertTypeForMem(Private->getType())); + break; + } + case ThreadToScratchpad: { + // Step 1.1: Get the address for the src element in the Reduce list. + Address SrcElementPtrAddr = + Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( + SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + SrcElementAddr = + Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + + // Step 1.2: Get the address for dest element: + // address = base + index * ElementSizeInChars. + unsigned ElementSizeInChars = + C.getTypeSizeInChars(Private->getType()).getQuantity(); + auto *CurrentOffset = + Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), + ScratchpadIndex); + auto *ScratchPadElemAbsolutePtrVal = + Bld.CreateAdd(DestBase.getPointer(), CurrentOffset); + ScratchPadElemAbsolutePtrVal = + Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); + Address ScratchpadPtr = + Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); + DestElementAddr = Bld.CreateElementBitCast( + ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType())); + IncrScratchpadDest = true; + break; + } + case ScratchpadToThread: { + // Step 1.1: Get the address for the src element in the scratchpad. + // address = base + index * ElementSizeInChars. + unsigned ElementSizeInChars = + C.getTypeSizeInChars(Private->getType()).getQuantity(); + auto *CurrentOffset = + Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), + ScratchpadIndex); + auto *ScratchPadElemAbsolutePtrVal = + Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset); + ScratchPadElemAbsolutePtrVal = + Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); + SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); + IncrScratchpadSrc = true; + + // Step 1.2: Create a temporary to store the element in the destination + // Reduce list. + DestElementPtrAddr = + Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementAddr = + CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); + UpdateDestListPtr = true; + break; + } + } + + // Regardless of src and dest of copy, we emit the load of src + // element as this is required in all directions + SrcElementAddr = Bld.CreateElementBitCast( + SrcElementAddr, CGF.ConvertTypeForMem(Private->getType())); + llvm::Value *Elem = + CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false, + Private->getType(), SourceLocation()); + + // Now that all active lanes have read the element in the + // Reduce list, shuffle over the value from the remote lane. + if (ShuffleInElement) { + Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem, + RemoteLaneOffset); + } + + // Store the source element value to the dest element address. + CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, + Private->getType()); + + // Step 3.1: Modify reference in dest Reduce list as needed. + // Modifying the reference in Reduce list to point to the newly + // created element. The element is live in the current function + // scope and that of functions it invokes (i.e., reduce_function). + // RemoteReduceData[i] = (void*)&RemoteElem + if (UpdateDestListPtr) { + CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast( + DestElementAddr.getPointer(), CGF.VoidPtrTy), + DestElementPtrAddr, /*Volatile=*/false, + C.VoidPtrTy); + } + + // Step 4.1: Increment SrcBase/DestBase so that it points to the starting + // address of the next element in scratchpad memory, unless we're currently + // processing the last one. Memory alignment is also taken care of here. + if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) { + llvm::Value *ScratchpadBasePtr = + IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer(); + unsigned ElementSizeInChars = + C.getTypeSizeInChars(Private->getType()).getQuantity(); + ScratchpadBasePtr = Bld.CreateAdd( + ScratchpadBasePtr, + Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get( + CGM.SizeTy, ElementSizeInChars))); + + // Take care of global memory alignment for performance + ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateSDiv( + ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); + ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateMul( + ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); + + if (IncrScratchpadDest) + DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); + else /* IncrScratchpadSrc = true */ + SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); + } + + Idx++; + } +} + +/// This function emits a helper that loads data from the scratchpad array +/// and (optionally) reduces it with the input operand. +/// +/// load_and_reduce(local, scratchpad, index, width, should_reduce) +/// reduce_data remote; +/// for elem in remote: +/// remote.elem = Scratchpad[elem_id][index] +/// if (should_reduce) +/// local = local @ remote +/// else +/// local = remote +llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, + llvm::Value *ReduceFn) { + auto &C = CGM.getContext(); + auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + + // Destination of the copy. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + // Base address of the scratchpad array, with each element storing a + // Reduce list per team. + ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + // A source index into the scratchpad array. + ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, Int32Ty); + // Row width of an element in the scratchpad array, typically + // the number of teams. + ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, Int32Ty); + // If should_reduce == 1, then it's load AND reduce, + // If should_reduce == 0 (or otherwise), then it only loads (+ copy). + // The latter case is used for initialization. + ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, Int32Ty); + + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&ScratchPadArg); + Args.push_back(&IndexArg); + Args.push_back(&WidthArg); + Args.push_back(&ShouldReduceArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_load_and_reduce", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + // Get local Reduce list pointer. + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address ReduceListAddr( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); + llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( + AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + + Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); + llvm::Value *IndexVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGM.SizeTy, /*isSigned=*/true); + + Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); + llvm::Value *WidthVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGM.SizeTy, /*isSigned=*/true); + + Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg); + llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar( + AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation()); + + // The absolute ptr address to the base addr of the next element to copy. + llvm::Value *CumulativeElemBasePtr = + Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); + Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); + + // Create a Remote Reduce list to store the elements read from the + // scratchpad array. + Address RemoteReduceList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list"); + + // Assemble remote Reduce list from scratchpad array. + emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates, + SrcDataAddr, RemoteReduceList, + {/*RemoteLaneOffset=*/nullptr, + /*ScratchpadIndex=*/IndexVal, + /*ScratchpadWidth=*/WidthVal}); + + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + + auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1)); + Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); + + CGF.EmitBlock(ThenBB); + // We should reduce with the local Reduce list. + // reduce_function(LocalReduceList, RemoteReduceList) + llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + ReduceListAddr.getPointer(), CGF.VoidPtrTy); + llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + RemoteReduceList.getPointer(), CGF.VoidPtrTy); + CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr}); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + // No reduction; just copy: + // Local Reduce list = Remote Reduce list. + emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, + RemoteReduceList, ReduceListAddr); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that stores reduced data from the team +/// master to a scratchpad array in global memory. +/// +/// for elem in Reduce List: +/// scratchpad[elem_id][index] = elem +/// +llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy) { + + auto &C = CGM.getContext(); + auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + + // Source of the copy. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + // Base address of the scratchpad array, with each element storing a + // Reduce list per team. + ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + // A destination index into the scratchpad array, typically the team + // identifier. + ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, Int32Ty); + // Row width of an element in the scratchpad array, typically + // the number of teams. + ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, Int32Ty); + + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&ScratchPadArg); + Args.push_back(&IndexArg); + Args.push_back(&WidthArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_copy_to_scratchpad", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address SrcDataAddr( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); + llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( + AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + + Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); + llvm::Value *IndexVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGF.SizeTy, /*isSigned=*/true); + + Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); + llvm::Value *WidthVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGF.SizeTy, /*isSigned=*/true); + + // The absolute ptr address to the base addr of the next element to copy. + llvm::Value *CumulativeElemBasePtr = + Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); + Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); + + emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates, + SrcDataAddr, DestDataAddr, + {/*RemoteLaneOffset=*/nullptr, + /*ScratchpadIndex=*/IndexVal, + /*ScratchpadWidth=*/WidthVal}); + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that gathers Reduce lists from the first +/// lane of every active warp to lanes in the first warp. +/// +/// void inter_warp_copy_func(void* reduce_data, num_warps) +/// shared smem[warp_size]; +/// For all data entries D in reduce_data: +/// If (I am the first lane in each warp) +/// Copy my local D to smem[warp_id] +/// sync +/// if (I am the first warp) +/// Copy smem[thread_id] to my local D +/// sync +static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy) { + auto &C = CGM.getContext(); + auto &M = CGM.getModule(); + + // ReduceList: thread local Reduce list. + // At the stage of the computation when this function is called, partially + // aggregated values reside in the first lane of every active warp. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + // NumWarps: number of warps active in the parallel region. This could + // be smaller than 32 (max warps in a CTA) for partial block reduction. + ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, + C.getIntTypeForBitwidth(32, /* Signed */ true)); + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&NumWarpsArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_inter_warp_copy_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + // This array is used as a medium to transfer, one reduce element at a time, + // the data from the first lane of every warp to lanes in the first warp + // in order to perform the final step of a reduction in a parallel region + // (reduction across warps). The array is placed in NVPTX __shared__ memory + // for reduced latency, as well as to have a distinct copy for concurrently + // executing target regions. The array is declared with common linkage so + // as to be shared across compilation units. + const char *TransferMediumName = + "__openmp_nvptx_data_transfer_temporary_storage"; + llvm::GlobalVariable *TransferMedium = + M.getGlobalVariable(TransferMediumName); + if (!TransferMedium) { + auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize); + unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); + TransferMedium = new llvm::GlobalVariable( + M, Ty, + /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, + llvm::Constant::getNullValue(Ty), TransferMediumName, + /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, + SharedAddressSpace); + } + + // Get the CUDA thread id of the current OpenMP thread on the GPU. + auto *ThreadID = getNVPTXThreadID(CGF); + // nvptx_lane_id = nvptx_id % warpsize + auto *LaneID = getNVPTXLaneID(CGF); + // nvptx_warp_id = nvptx_id / warpsize + auto *WarpID = getNVPTXWarpID(CGF); + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + unsigned Idx = 0; + for (auto &Private : Privates) { + // + // Warp master copies reduce element to transfer medium in __shared__ + // memory. + // + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + + // if (lane_id == 0) + auto IsWarpMaster = + Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master"); + Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); + CGF.EmitBlock(ThenBB); + + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = (type[i]*)(elemptrptr) + Address ElemPtr = + Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); + ElemPtr = Bld.CreateElementBitCast( + ElemPtr, CGF.ConvertTypeForMem(Private->getType())); + // elem = *elemptr + llvm::Value *Elem = CGF.EmitLoadOfScalar( + ElemPtr, /*Volatile=*/false, Private->getType(), SourceLocation()); + + // Get pointer to location in transfer medium. + // MediumPtr = &medium[warp_id] + llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); + Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType())); + // Casting to actual data type. + // MediumPtr = (type[i]*)MediumPtrAddr; + MediumPtr = Bld.CreateElementBitCast( + MediumPtr, CGF.ConvertTypeForMem(Private->getType())); + + //*MediumPtr = elem + Bld.CreateStore(Elem, MediumPtr); + + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); + llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( + AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation()); + + auto *NumActiveThreads = Bld.CreateNSWMul( + NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads"); + // named_barrier_sync(ParallelBarrierID, num_active_threads) + syncParallelThreads(CGF, NumActiveThreads); + + // + // Warp 0 copies reduce element from transfer medium. + // + llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); + + // Up to 32 threads in warp 0 are active. + auto IsActiveThread = + Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); + Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); + + CGF.EmitBlock(W0ThenBB); + + // SrcMediumPtr = &medium[tid] + llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); + Address SrcMediumPtr(SrcMediumPtrVal, + C.getTypeAlignInChars(Private->getType())); + // SrcMediumVal = *SrcMediumPtr; + SrcMediumPtr = Bld.CreateElementBitCast( + SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType())); + llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar( + SrcMediumPtr, /*Volatile=*/false, Private->getType(), SourceLocation()); + + // TargetElemPtr = (type[i]*)(SrcDataAddr[i]) + Address TargetElemPtrPtr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( + TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + Address TargetElemPtr = + Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType())); + TargetElemPtr = Bld.CreateElementBitCast( + TargetElemPtr, CGF.ConvertTypeForMem(Private->getType())); + + // *TargetElemPtr = SrcMediumVal; + CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false, + Private->getType()); + Bld.CreateBr(W0MergeBB); + + CGF.EmitBlock(W0ElseBB); + Bld.CreateBr(W0MergeBB); + + CGF.EmitBlock(W0MergeBB); + + // While warp 0 copies values from transfer medium, all other warps must + // wait. + syncParallelThreads(CGF, NumActiveThreads); + Idx++; + } + + CGF.FinishFunction(); + return Fn; +} + +/// Emit a helper that reduces data across two OpenMP threads (lanes) +/// in the same warp. It uses shuffle instructions to copy over data from +/// a remote lane's stack. The reduction algorithm performed is specified +/// by the fourth parameter. +/// +/// Algorithm Versions. +/// Full Warp Reduce (argument value 0): +/// This algorithm assumes that all 32 lanes are active and gathers +/// data from these 32 lanes, producing a single resultant value. +/// Contiguous Partial Warp Reduce (argument value 1): +/// This algorithm assumes that only a *contiguous* subset of lanes +/// are active. This happens for the last warp in a parallel region +/// when the user specified num_threads is not an integer multiple of +/// 32. This contiguous subset always starts with the zeroth lane. +/// Partial Warp Reduce (argument value 2): +/// This algorithm gathers data from any number of lanes at any position. +/// All reduced values are stored in the lowest possible lane. The set +/// of problems every algorithm addresses is a super set of those +/// addressable by algorithms with a lower version number. Overhead +/// increases as algorithm version increases. +/// +/// Terminology +/// Reduce element: +/// Reduce element refers to the individual data field with primitive +/// data types to be combined and reduced across threads. +/// Reduce list: +/// Reduce list refers to a collection of local, thread-private +/// reduce elements. +/// Remote Reduce list: +/// Remote Reduce list refers to a collection of remote (relative to +/// the current thread) reduce elements. +/// +/// We distinguish between three states of threads that are important to +/// the implementation of this function. +/// Alive threads: +/// Threads in a warp executing the SIMT instruction, as distinguished from +/// threads that are inactive due to divergent control flow. +/// Active threads: +/// The minimal set of threads that has to be alive upon entry to this +/// function. The computation is correct iff active threads are alive. +/// Some threads are alive but they are not active because they do not +/// contribute to the computation in any useful manner. Turning them off +/// may introduce control flow overheads without any tangible benefits. +/// Effective threads: +/// In order to comply with the argument requirements of the shuffle +/// function, we must keep all lanes holding data alive. But at most +/// half of them perform value aggregation; we refer to this half of +/// threads as effective. The other half is simply handing off their +/// data. +/// +/// Procedure +/// Value shuffle: +/// In this step active threads transfer data from higher lane positions +/// in the warp to lower lane positions, creating Remote Reduce list. +/// Value aggregation: +/// In this step, effective threads combine their thread local Reduce list +/// with Remote Reduce list and store the result in the thread local +/// Reduce list. +/// Value copy: +/// In this step, we deal with the assumption made by algorithm 2 +/// (i.e. contiguity assumption). When we have an odd number of lanes +/// active, say 2k+1, only k threads will be effective and therefore k +/// new values will be produced. However, the Reduce list owned by the +/// (2k+1)th thread is ignored in the value aggregation. Therefore +/// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so +/// that the contiguity assumption still holds. +static llvm::Value * +emitShuffleAndReduceFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn) { + auto &C = CGM.getContext(); + + // Thread local Reduce list used to host the values of data to be reduced. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + // Current lane id; could be logical. + ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.ShortTy); + // Offset of the remote source lane relative to the current lane. + ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.ShortTy); + // Algorithm version. This is expected to be known at compile time. + ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.ShortTy); + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&LaneIDArg); + Args.push_back(&RemoteLaneOffsetArg); + Args.push_back(&AlgoVerArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg); + llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar( + AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation()); + + Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg); + llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar( + AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation()); + + Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg); + llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar( + AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation()); + + // Create a local thread-private variable to host the Reduce list + // from a remote lane. + Address RemoteReduceList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list"); + + // This loop iterates through the list of reduce elements and copies, + // element by element, from a remote lane in the warp to RemoteReduceList, + // hosted on the thread's stack. + emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates, + LocalReduceList, RemoteReduceList, + {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal, + /*ScratchpadIndex=*/nullptr, + /*ScratchpadWidth=*/nullptr}); + + // The actions to be performed on the Remote Reduce list is dependent + // on the algorithm version. + // + // if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 && + // LaneId % 2 == 0 && Offset > 0): + // do the reduction value aggregation + // + // The thread local variable Reduce list is mutated in place to host the + // reduced data, which is the aggregated value produced from local and + // remote lanes. + // + // Note that AlgoVer is expected to be a constant integer known at compile + // time. + // When AlgoVer==0, the first conjunction evaluates to true, making + // the entire predicate true during compile time. + // When AlgoVer==1, the second conjunction has only the second part to be + // evaluated during runtime. Other conjunctions evaluates to false + // during compile time. + // When AlgoVer==2, the third conjunction has only the second part to be + // evaluated during runtime. Other conjunctions evaluates to false + // during compile time. + auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0)); + + auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); + auto CondAlgo1 = Bld.CreateAnd( + Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal)); + + auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); + auto CondAlgo2 = Bld.CreateAnd( + Algo2, + Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)), + Bld.getInt16(0))); + CondAlgo2 = Bld.CreateAnd( + CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0))); + + auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); + CondReduce = Bld.CreateOr(CondReduce, CondAlgo2); + + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); + + CGF.EmitBlock(ThenBB); + // reduce_function(LocalReduceList, RemoteReduceList) + llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + LocalReduceList.getPointer(), CGF.VoidPtrTy); + llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + RemoteReduceList.getPointer(), CGF.VoidPtrTy); + CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr}); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local + // Reduce list. + Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); + auto CondCopy = Bld.CreateAnd( + Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal)); + + llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont"); + Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB); + + CGF.EmitBlock(CpyThenBB); + emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, + RemoteReduceList, LocalReduceList); + Bld.CreateBr(CpyMergeBB); + + CGF.EmitBlock(CpyElseBB); + Bld.CreateBr(CpyMergeBB); + + CGF.EmitBlock(CpyMergeBB); + + CGF.FinishFunction(); + return Fn; +} + +/// +/// Design of OpenMP reductions on the GPU +/// +/// Consider a typical OpenMP program with one or more reduction +/// clauses: +/// +/// float foo; +/// double bar; +/// #pragma omp target teams distribute parallel for \ +/// reduction(+:foo) reduction(*:bar) +/// for (int i = 0; i < N; i++) { +/// foo += A[i]; bar *= B[i]; +/// } +/// +/// where 'foo' and 'bar' are reduced across all OpenMP threads in +/// all teams. In our OpenMP implementation on the NVPTX device an +/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads +/// within a team are mapped to CUDA threads within a threadblock. +/// Our goal is to efficiently aggregate values across all OpenMP +/// threads such that: +/// +/// - the compiler and runtime are logically concise, and +/// - the reduction is performed efficiently in a hierarchical +/// manner as follows: within OpenMP threads in the same warp, +/// across warps in a threadblock, and finally across teams on +/// the NVPTX device. +/// +/// Introduction to Decoupling +/// +/// We would like to decouple the compiler and the runtime so that the +/// latter is ignorant of the reduction variables (number, data types) +/// and the reduction operators. This allows a simpler interface +/// and implementation while still attaining good performance. +/// +/// Pseudocode for the aforementioned OpenMP program generated by the +/// compiler is as follows: +/// +/// 1. Create private copies of reduction variables on each OpenMP +/// thread: 'foo_private', 'bar_private' +/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned +/// to it and writes the result in 'foo_private' and 'bar_private' +/// respectively. +/// 3. Call the OpenMP runtime on the GPU to reduce within a team +/// and store the result on the team master: +/// +/// __kmpc_nvptx_parallel_reduce_nowait(..., +/// reduceData, shuffleReduceFn, interWarpCpyFn) +/// +/// where: +/// struct ReduceData { +/// double *foo; +/// double *bar; +/// } reduceData +/// reduceData.foo = &foo_private +/// reduceData.bar = &bar_private +/// +/// 'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two +/// auxiliary functions generated by the compiler that operate on +/// variables of type 'ReduceData'. They aid the runtime perform +/// algorithmic steps in a data agnostic manner. +/// +/// 'shuffleReduceFn' is a pointer to a function that reduces data +/// of type 'ReduceData' across two OpenMP threads (lanes) in the +/// same warp. It takes the following arguments as input: +/// +/// a. variable of type 'ReduceData' on the calling lane, +/// b. its lane_id, +/// c. an offset relative to the current lane_id to generate a +/// remote_lane_id. The remote lane contains the second +/// variable of type 'ReduceData' that is to be reduced. +/// d. an algorithm version parameter determining which reduction +/// algorithm to use. +/// +/// 'shuffleReduceFn' retrieves data from the remote lane using +/// efficient GPU shuffle intrinsics and reduces, using the +/// algorithm specified by the 4th parameter, the two operands +/// element-wise. The result is written to the first operand. +/// +/// Different reduction algorithms are implemented in different +/// runtime functions, all calling 'shuffleReduceFn' to perform +/// the essential reduction step. Therefore, based on the 4th +/// parameter, this function behaves slightly differently to +/// cooperate with the runtime to ensure correctness under +/// different circumstances. +/// +/// 'InterWarpCpyFn' is a pointer to a function that transfers +/// reduced variables across warps. It tunnels, through CUDA +/// shared memory, the thread-private data of type 'ReduceData' +/// from lane 0 of each warp to a lane in the first warp. +/// 4. Call the OpenMP runtime on the GPU to reduce across teams. +/// The last team writes the global reduced value to memory. +/// +/// ret = __kmpc_nvptx_teams_reduce_nowait(..., +/// reduceData, shuffleReduceFn, interWarpCpyFn, +/// scratchpadCopyFn, loadAndReduceFn) +/// +/// 'scratchpadCopyFn' is a helper that stores reduced +/// data from the team master to a scratchpad array in +/// global memory. +/// +/// 'loadAndReduceFn' is a helper that loads data from +/// the scratchpad array and reduces it with the input +/// operand. +/// +/// These compiler generated functions hide address +/// calculation and alignment information from the runtime. +/// 5. if ret == 1: +/// The team master of the last team stores the reduced +/// result to the globals in memory. +/// foo += reduceData.foo; bar *= reduceData.bar +/// +/// +/// Warp Reduction Algorithms +/// +/// On the warp level, we have three algorithms implemented in the +/// OpenMP runtime depending on the number of active lanes: +/// +/// Full Warp Reduction +/// +/// The reduce algorithm within a warp where all lanes are active +/// is implemented in the runtime as follows: +/// +/// full_warp_reduce(void *reduce_data, +/// kmp_ShuffleReductFctPtr ShuffleReduceFn) { +/// for (int offset = WARPSIZE/2; offset > 0; offset /= 2) +/// ShuffleReduceFn(reduce_data, 0, offset, 0); +/// } +/// +/// The algorithm completes in log(2, WARPSIZE) steps. +/// +/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is +/// not used therefore we save instructions by not retrieving lane_id +/// from the corresponding special registers. The 4th parameter, which +/// represents the version of the algorithm being used, is set to 0 to +/// signify full warp reduction. +/// +/// In this version, 'ShuffleReduceFn' behaves, per element, as follows: +/// +/// #reduce_elem refers to an element in the local lane's data structure +/// #remote_elem is retrieved from a remote lane +/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); +/// reduce_elem = reduce_elem REDUCE_OP remote_elem; +/// +/// Contiguous Partial Warp Reduction +/// +/// This reduce algorithm is used within a warp where only the first +/// 'n' (n <= WARPSIZE) lanes are active. It is typically used when the +/// number of OpenMP threads in a parallel region is not a multiple of +/// WARPSIZE. The algorithm is implemented in the runtime as follows: +/// +/// void +/// contiguous_partial_reduce(void *reduce_data, +/// kmp_ShuffleReductFctPtr ShuffleReduceFn, +/// int size, int lane_id) { +/// int curr_size; +/// int offset; +/// curr_size = size; +/// mask = curr_size/2; +/// while (offset>0) { +/// ShuffleReduceFn(reduce_data, lane_id, offset, 1); +/// curr_size = (curr_size+1)/2; +/// offset = curr_size/2; +/// } +/// } +/// +/// In this version, 'ShuffleReduceFn' behaves, per element, as follows: +/// +/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); +/// if (lane_id < offset) +/// reduce_elem = reduce_elem REDUCE_OP remote_elem +/// else +/// reduce_elem = remote_elem +/// +/// This algorithm assumes that the data to be reduced are located in a +/// contiguous subset of lanes starting from the first. When there is +/// an odd number of active lanes, the data in the last lane is not +/// aggregated with any other lane's dat but is instead copied over. +/// +/// Dispersed Partial Warp Reduction +/// +/// This algorithm is used within a warp when any discontiguous subset of +/// lanes are active. It is used to implement the reduction operation +/// across lanes in an OpenMP simd region or in a nested parallel region. +/// +/// void +/// dispersed_partial_reduce(void *reduce_data, +/// kmp_ShuffleReductFctPtr ShuffleReduceFn) { +/// int size, remote_id; +/// int logical_lane_id = number_of_active_lanes_before_me() * 2; +/// do { +/// remote_id = next_active_lane_id_right_after_me(); +/// # the above function returns 0 of no active lane +/// # is present right after the current lane. +/// size = number_of_active_lanes_in_this_warp(); +/// logical_lane_id /= 2; +/// ShuffleReduceFn(reduce_data, logical_lane_id, +/// remote_id-1-threadIdx.x, 2); +/// } while (logical_lane_id % 2 == 0 && size > 1); +/// } +/// +/// There is no assumption made about the initial state of the reduction. +/// Any number of lanes (>=1) could be active at any position. The reduction +/// result is returned in the first active lane. +/// +/// In this version, 'ShuffleReduceFn' behaves, per element, as follows: +/// +/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); +/// if (lane_id % 2 == 0 && offset > 0) +/// reduce_elem = reduce_elem REDUCE_OP remote_elem +/// else +/// reduce_elem = remote_elem +/// +/// +/// Intra-Team Reduction +/// +/// This function, as implemented in the runtime call +/// '__kmpc_nvptx_parallel_reduce_nowait', aggregates data across OpenMP +/// threads in a team. It first reduces within a warp using the +/// aforementioned algorithms. We then proceed to gather all such +/// reduced values at the first warp. +/// +/// The runtime makes use of the function 'InterWarpCpyFn', which copies +/// data from each of the "warp master" (zeroth lane of each warp, where +/// warp-reduced data is held) to the zeroth warp. This step reduces (in +/// a mathematical sense) the problem of reduction across warp masters in +/// a block to the problem of warp reduction. +/// +/// +/// Inter-Team Reduction +/// +/// Once a team has reduced its data to a single value, it is stored in +/// a global scratchpad array. Since each team has a distinct slot, this +/// can be done without locking. +/// +/// The last team to write to the scratchpad array proceeds to reduce the +/// scratchpad array. One or more workers in the last team use the helper +/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e., +/// the k'th worker reduces every k'th element. +/// +/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait' to +/// reduce across workers and compute a globally reduced value. +/// +void CGOpenMPRuntimeNVPTX::emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { + if (!CGF.HaveInsertPoint()) + return; + + bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); + bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); + // FIXME: Add support for simd reduction. + assert((TeamsReduction || ParallelReduction) && + "Invalid reduction selection in emitReduction."); + + auto &C = CGM.getContext(); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + auto Size = RHSExprs.size(); + for (auto *E : Privates) { + if (E->getType()->isVariablyModifiedType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .first, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + // 2. Emit reduce_func(). + auto *ReductionFn = emitReductionFunction( + CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); + + // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), + // RedList, shuffle_reduce_func, interwarp_copy_func); + auto *ThreadId = getThreadID(CGF, Loc); + auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); + + auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction( + CGM, Privates, ReductionArrayTy, ReductionFn); + auto *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy); + + llvm::Value *Res = nullptr; + if (ParallelReduction) { + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn}; + + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), + Args); + } + + if (TeamsReduction) { + auto *ScratchPadCopyFn = + emitCopyToScratchpad(CGM, Privates, ReductionArrayTy); + auto *LoadAndReduceFn = emitReduceScratchpadFunction( + CGM, Privates, ReductionArrayTy, ReductionFn); + + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn, + ScratchPadCopyFn, + LoadAndReduceFn}; + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait), + Args); + } + + // 5. Build switch(res) + auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); + + // 6. Build case 1: where we have reduced values in the master + // thread in each team. + // __kmpc_end_reduce{_nowait}(<gtid>); + // break; + auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); + CGF.EmitBlock(Case1BB); + + // Add emission of __kmpc_end_reduce{_nowait}(<gtid>); + llvm::Value *EndArgs[] = {ThreadId}; + auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps, + this](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto IPriv = Privates.begin(); + auto ILHS = LHSExprs.begin(); + auto IRHS = RHSExprs.begin(); + for (auto *E : ReductionOps) { + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); + ++IPriv; + ++ILHS; + ++IRHS; + } + }; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + EndArgs); + RCG.setAction(Action); + RCG(CGF); + CGF.EmitBranch(DefaultBB); + CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 4010b46a4cbd6..ae25e94759e6a 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -43,6 +43,8 @@ private: void createWorkerFunction(CodeGenModule &CGM); }; + bool isInSpmdExecutionMode() const; + /// \brief Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); @@ -58,11 +60,12 @@ private: /// function. void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); - /// \brief Returns specified OpenMP runtime function for the current OpenMP - /// implementation. Specialized for the NVPTX device. - /// \param Function OpenMP runtime function. - /// \return Specified function. - llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + /// \brief Helper for Spmd mode target directive's entry function. + void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + const OMPExecutableDirective &D); + + /// \brief Signal termination of Spmd mode execution. + void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); // // Base class overrides. @@ -87,6 +90,22 @@ private: llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); + /// \brief Emit outlined function specialized for the Single Program + /// Multiple Data programming model for applicable target directives on the + /// NVPTX device. + /// \param D Directive to emit. + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Object containing the target statements. + /// An outlined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); + /// \brief Emit outlined function for 'target' directive on the NVPTX /// device. /// \param D Directive to emit. @@ -118,6 +137,22 @@ private: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); + /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// This call is for a parallel directive within an SPMD target directive. + /// \param OutlinedFn Outlined function to be run in parallel threads. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// \param IfCond Condition in the associated 'if' clause, if it was + /// specified, nullptr otherwise. + /// + void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond); + protected: /// \brief Get the function name of an outlined region. // The name can be customized depending on the target. @@ -129,6 +164,20 @@ protected: public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 + /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. + virtual void emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) override; + + /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' + /// clause. + /// \param NumThreads An integer value of threads. + virtual void emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) override; + /// \brief This function ought to emit, in the general case, a call to // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed // as these numbers are obtained through the PTX grid and block configuration. @@ -138,7 +187,22 @@ public: const Expr *ThreadLimit, SourceLocation Loc) override; /// \brief Emits inlined function for the specified OpenMP parallel - // directive but an inlined function for teams. + // directive. + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + llvm::Value * + emitParallelOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; + + /// \brief Emits inlined function for the specified OpenMP teams + // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). /// \param D OpenMP directive. @@ -147,10 +211,10 @@ public: /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. llvm::Value * - emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + emitTeamsOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; /// \brief Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -177,6 +241,50 @@ public: llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; + + /// Emit a code for reduction clause. + /// + /// \param Privates List of private copies for original reduction arguments. + /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. + /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' + /// or 'operator binop(LHS, RHS)'. + /// \param Options List of options for reduction codegen: + /// WithNowait true if parent directive has also nowait clause, false + /// otherwise. + /// SimpleReduction Emit reduction operation only. Used for omp simd + /// directive on the host. + /// ReductionKind The kind of reduction to perform. + virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, + ReductionOptionsTy Options) override; + + /// Returns specified OpenMP runtime function for the current OpenMP + /// implementation. Specialized for the NVPTX device. + /// \param Function OpenMP runtime function. + /// \return Specified function. + llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + + /// Target codegen is specialized based on two programming models: the + /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' + /// model for constructs like 'target parallel' that support it. + enum ExecutionMode { + /// Single Program Multiple Data. + Spmd, + /// Generic codegen to support fork-join model. + Generic, + Unknown, + }; + +private: + // Track the execution mode when codegening directives within a target + // region. The appropriate mode (generic/spmd) is set on entry to the + // target region and used by containing directives such as 'parallel' + // to emit optimized code. + ExecutionMode CurrentExecutionMode; }; } // CodeGen namespace. diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 8370607db50f9..0ebfd99363c1d 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -145,7 +145,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitCoroutineBody(cast<CoroutineBodyStmt>(*S)); break; case Stmt::CoreturnStmtClass: - CGM.ErrorUnsupported(S, "coroutine"); + EmitCoreturnStmt(cast<CoreturnStmt>(*S)); break; case Stmt::CapturedStmtClass: { const CapturedStmt *CS = cast<CapturedStmt>(S); @@ -2127,16 +2127,16 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect); llvm::CallInst *Result = Builder.CreateCall(IA, Args); - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadNone); else if (ReadOnly) - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadOnly); } @@ -2157,7 +2157,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Conservatively, mark all inline asm blocks in CUDA as convergent // (meaning, they may call an intrinsically convergent op, such as bar.sync, // and so can't have certain optimizations applied around them). - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Convergent); } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 39e1cdfdbe2a1..22269e42c7a00 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -26,7 +26,7 @@ using namespace CodeGen; namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. -class OMPLexicalScope final : public CodeGenFunction::LexicalScope { +class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { if (auto *CPI = OMPClauseWithPreInit::get(C)) { @@ -54,10 +54,11 @@ class OMPLexicalScope final : public CodeGenFunction::LexicalScope { public: OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, - bool AsInlined = false) + bool AsInlined = false, bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { - emitPreInitStmt(CGF, S); + if (EmitPreInitStmt) + emitPreInitStmt(CGF, S); if (AsInlined) { if (S.hasAssociatedStmt()) { auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); @@ -81,6 +82,38 @@ public: } }; +/// Lexical scope for OpenMP parallel construct, that handles correct codegen +/// for captured expressions. +class OMPParallelScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPParallelDirective(Kind); + } + +public: + OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + +/// Lexical scope for OpenMP teams construct, that handles correct codegen +/// for captured expressions. +class OMPTeamsScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPTeamsDirective(Kind); + } + +public: + OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { @@ -194,6 +227,17 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, return TmpAddr; } +static QualType getCanonicalParamType(ASTContext &C, QualType T) { + if (T->isLValueReferenceType()) { + return C.getLValueReferenceType( + getCanonicalParamType(C, T.getNonReferenceType()), + /*SpelledAsLValue=*/false); + } + if (T->isPointerType()) + return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + return C.getCanonicalParamType(T); +} + llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { assert( @@ -233,13 +277,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { II = &getContext().Idents.get("vla"); } if (ArgType->isVariablyModifiedType()) { - bool IsReference = ArgType->isLValueReferenceType(); ArgType = - getContext().getCanonicalParamType(ArgType.getNonReferenceType()); - if (IsReference && !ArgType->isPointerType()) { - ArgType = getContext().getLValueReferenceType( - ArgType, /*SpelledAsLValue=*/false); - } + getCanonicalParamType(getContext(), ArgType.getNonReferenceType()); } Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, FD->getLocation(), II, ArgType)); @@ -986,7 +1025,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( OriginalBaseLValue); // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { + PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { return OASELValueLB.getAddress(); }); // Emit reduction copy. @@ -1040,9 +1079,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { - return ASELValue.getAddress(); - }); + PrivateScope.addPrivate( + LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); // Emit reduction copy. bool IsRegistered = PrivateScope.addPrivate( OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, @@ -1158,7 +1196,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( } void CodeGenFunction::EmitOMPReductionClauseFinal( - const OMPExecutableDirective &D) { + const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { if (!HaveInsertPoint()) return; llvm::SmallVector<const Expr *, 8> Privates; @@ -1174,14 +1212,15 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); } if (HasAtLeastOneReduction) { + bool WithNowait = D.getSingleClause<OMPNowaitClause>() || + isOpenMPParallelDirective(D.getDirectiveKind()) || + D.getDirectiveKind() == OMPD_simd; + bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, - D.getSingleClause<OMPNowaitClause>() || - isOpenMPParallelDirective(D.getDirectiveKind()) || - D.getDirectiveKind() == OMPD_simd, - D.getDirectiveKind() == OMPD_simd); + {WithNowait, SimpleReduction, ReductionKind}); } } @@ -1214,10 +1253,9 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). - emitParallelOrTeamsOutlinedFunction(S, - *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), @@ -1239,7 +1277,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, } } - OMPLexicalScope Scope(CGF, S); + OMPParallelScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, @@ -1264,7 +1302,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); emitPostUpdateForReductionClause( @@ -1677,7 +1715,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { // Emit final copy of the lastprivate variables at the end of loops. if (HasLastprivateClause) CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -2003,15 +2041,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( }); } -void CodeGenFunction::EmitOMPTargetTeamsDirective( - const OMPTargetTeamsDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( const OMPTargetTeamsDistributeDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective( @@ -2222,7 +2251,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); } - EmitOMPReductionClauseFinal(S); + EmitOMPReductionClauseFinal( + S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) + ? /*Parallel and Simd*/ OMPD_parallel_for_simd + : /*Parallel only*/ OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { @@ -2320,8 +2352,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getLocStart(), - /*fpContractable=*/false); + OK_Ordinary, S.getLocStart(), FPOptions()); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, S.getLocStart()); @@ -2397,7 +2428,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { @@ -2633,7 +2664,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); @@ -3250,7 +3281,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = XRValExpr->getType(); auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, - IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { + IsPostfixUpdate](RValue XRValue) -> RValue { CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); RValue Res = CGF.EmitAnyExpr(UE); @@ -3277,7 +3308,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = X->getType().getNonReferenceType(); ExprRValue = convertToType(CGF, ExprRValue, E->getType(), X->getType().getNonReferenceType(), Loc); - auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { + auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { NewVVal = XRValue; return ExprRValue; }; @@ -3404,41 +3435,24 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } -std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> -CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( - CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, - bool IsOffloadEntry) { - llvm::Function *OutlinedFn = nullptr; - llvm::Constant *OutlinedFnID = nullptr; - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); - CGF.EmitOMPPrivateClause(S, PrivateScope); - (void)PrivateScope.Privatize(); - - Action.Enter(CGF); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }; - // Emit target region as a standalone region. - CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - return std::make_pair(OutlinedFn, OutlinedFnID); -} - -void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen) { + assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); + CodeGenModule &CGM = CGF.CGM; const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - GenerateOpenMPCapturedVars(CS, CapturedVars); - llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; - // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - - if (auto *C = S.getSingleClause<OMPIfClause>()) { - IfCond = C->getCondition(); + // Check for the at most one if clause associated with the target region. + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_target) { + IfCond = C->getCondition(); + break; + } } // Check if we have any device clause associated with the directive. @@ -3453,43 +3467,76 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { bool IsOffloadEntry = true; if (IfCond) { bool Val; - if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) + if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) IsOffloadEntry = false; } if (CGM.getLangOpts().OMPTargetTriples.empty()) IsOffloadEntry = false; - assert(CurFuncDecl && "No parent declaration for target region!"); + assert(CGF.CurFuncDecl && "No parent declaration for target region!"); StringRef ParentName; // In case we have Ctors/Dtors we use the complete type variant to produce // the mangling of the device outlined kernel. - if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) + if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); - else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) + else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); else ParentName = - CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); + CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); - std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( - CGM, S, ParentName, IsOffloadEntry); - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, + IsOffloadEntry, CodeGen); + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, CapturedVars); } +static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, + PrePostActionTy &Action) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); +} + +void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). - emitParallelOrTeamsOutlinedFunction(S, - *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); - const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); - const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); - const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); + const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); + const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); if (NT || TL) { Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; @@ -3498,7 +3545,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, S.getLocStart()); } - OMPLexicalScope Scope(CGF, S); + OMPTeamsScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, @@ -3511,10 +3558,47 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause( + *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); +} + +static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDirective &S) { + auto *CS = S.getCapturedStmt(OMPD_teams); + Action.Enter(CGF); + auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDirective( + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPCancellationPointDirective( @@ -3740,9 +3824,47 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } +static void emitTargetParallelRegion(CodeGenFunction &CGF, + const OMPTargetParallelDirective &S, + PrePostActionTy &Action) { + // Get the captured statement associated with the 'parallel' region. + auto *CS = S.getCapturedStmt(OMPD_parallel); + Action.Enter(CGF); + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + void CodeGenFunction::EmitOMPTargetParallelDirective( const OMPTargetParallelDirective &S) { - // TODO: codegen for target parallel. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPTargetParallelForDirective( diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index 1a09830b52fd3..7b0c8bf7d6e9b 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -14,7 +14,7 @@ #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -284,6 +284,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, if (isa<CXXDestructorDecl>(MD)) CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs); +#ifndef NDEBUG + unsigned PrefixArgs = CallArgs.size() - 1; +#endif // Add the rest of the arguments. for (const ParmVarDecl *PD : MD->parameters()) EmitDelegateCallArg(CallArgs, PD, SourceLocation()); @@ -292,7 +295,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, #ifndef NDEBUG const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall( - CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD)); + CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD), PrefixArgs); assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() && CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() && CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention()); @@ -380,8 +383,8 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, AttributeList, CallingConv, /*AttrOnCallSite=*/true); - llvm::AttributeSet Attrs = - llvm::AttributeSet::get(getLLVMContext(), AttributeList); + llvm::AttributeList Attrs = + llvm::AttributeList::get(getLLVMContext(), AttributeList); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -744,9 +747,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { switch (keyFunction->getTemplateSpecializationKind()) { case TSK_Undeclared: case TSK_ExplicitSpecialization: - assert((def || CodeGenOpts.OptimizationLevel > 0) && - "Shouldn't query vtable linkage without key function or " - "optimizations"); + assert((def || CodeGenOpts.OptimizationLevel > 0 || + CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo) && + "Shouldn't query vtable linkage without key function, " + "optimizations, or debug info"); if (!def && CodeGenOpts.OptimizationLevel > 0) return llvm::GlobalVariable::AvailableExternallyLinkage; @@ -942,7 +946,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - if (!getCodeGenOpts().PrepareForLTO) + if (!getCodeGenOpts().LTOUnit) return; CharUnits PointerWidth = diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 4fbf9f22a98db..84248cc647199 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -36,7 +36,6 @@ add_clang_library(clangCodeGen CGAtomic.cpp CGBlocks.cpp CGBuiltin.cpp - CGCUDABuiltin.cpp CGCUDANV.cpp CGCUDARuntime.cpp CGCXX.cpp @@ -55,6 +54,7 @@ add_clang_library(clangCodeGen CGExprComplex.cpp CGExprConstant.cpp CGExprScalar.cpp + CGGPUBuiltin.cpp CGLoopInfo.cpp CGObjC.cpp CGObjCGNU.cpp @@ -75,8 +75,10 @@ add_clang_library(clangCodeGen CodeGenPGO.cpp CodeGenTBAA.cpp CodeGenTypes.cpp + ConstantInitBuilder.cpp CoverageMappingGen.cpp ItaniumCXXABI.cpp + MacroPPCallbacks.cpp MicrosoftCXXABI.cpp ModuleBuilder.cpp ObjectFilePCHContainerOperations.cpp diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index 5f74141d75b37..b864069dc6456 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -7,7 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "clang/CodeGen/CodeGenAction.h" +#include "CodeGenModule.h" #include "CoverageMappingGen.h" +#include "MacroPPCallbacks.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" @@ -16,15 +19,16 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" -#include "clang/CodeGen/CodeGenAction.h" #include "clang/CodeGen/ModuleBuilder.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" @@ -35,12 +39,16 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Transforms/IPO/Internalize.h" + #include <memory> using namespace clang; using namespace llvm; namespace clang { class BackendConsumer : public ASTConsumer { + using LinkModule = CodeGenAction::LinkModule; + virtual void anchor(); DiagnosticsEngine &Diags; BackendAction Action; @@ -61,43 +69,39 @@ namespace clang { std::unique_ptr<CodeGenerator> Gen; - SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4> - LinkModules; + SmallVector<LinkModule, 4> LinkModules; // This is here so that the diagnostic printer knows the module a diagnostic // refers to. llvm::Module *CurLinkModule = nullptr; public: - BackendConsumer( - BackendAction Action, DiagnosticsEngine &Diags, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - bool TimePasses, const std::string &InFile, - const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules, - std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr) + BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, bool TimePasses, + const std::string &InFile, + SmallVector<LinkModule, 4> LinkModules, + std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(std::move(OS)), Context(nullptr), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, - CodeGenOpts, C, CoverageInfo)) { + CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)) { llvm::TimePassesIsEnabled = TimePasses; - for (auto &I : LinkModules) - this->LinkModules.push_back( - std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second))); } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr<llvm::Module> takeModule() { return std::unique_ptr<llvm::Module>(Gen->ReleaseModule()); } - void releaseLinkModules() { - for (auto &I : LinkModules) - I.second.release(); - } + + CodeGenerator *getCodeGenerator() { return Gen.get(); } void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { Gen->HandleCXXStaticMemberVarInstantiation(VD); @@ -159,6 +163,35 @@ namespace clang { HandleTopLevelDecl(D); } + // Links each entry in LinkModules into our module. Returns true on error. + bool LinkInModules() { + for (auto &LM : LinkModules) { + if (LM.PropagateAttrs) + for (Function &F : *LM.Module) + Gen->CGM().AddDefaultFnAttrs(F); + + CurLinkModule = LM.Module.get(); + + bool Err; + if (LM.Internalize) { + Err = Linker::linkModules( + *getModule(), std::move(LM.Module), LM.LinkFlags, + [](llvm::Module &M, const llvm::StringSet<> &GVS) { + internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { + return !GV.hasName() || (GVS.count(GV.getName()) == 0); + }); + }); + } else { + Err = Linker::linkModules(*getModule(), std::move(LM.Module), + LM.LinkFlags); + } + + if (Err) + return true; + } + return false; // success + } + void HandleTranslationUnit(ASTContext &C) override { { PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); @@ -216,13 +249,9 @@ namespace clang { Ctx.setDiagnosticHotnessRequested(true); } - // Link LinkModule into this module if present, preserving its validity. - for (auto &I : LinkModules) { - unsigned LinkFlags = I.first; - CurLinkModule = I.second.get(); - if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags)) - return; - } + // Link each LinkModule into our module. + if (LinkInModules()) + return; EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); @@ -275,7 +304,7 @@ namespace clang { /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc - getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D, + getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const; @@ -298,9 +327,8 @@ namespace clang { /// them. void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID); - void OptimizationRemarkHandler(const llvm::OptimizationRemark &D); - void OptimizationRemarkHandler(const llvm::OptimizationRemarkMissed &D); - void OptimizationRemarkHandler(const llvm::OptimizationRemarkAnalysis &D); + void + OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D); void OptimizationRemarkHandler( const llvm::OptimizationRemarkAnalysisFPCommute &D); void OptimizationRemarkHandler( @@ -308,7 +336,7 @@ namespace clang { void OptimizationFailureHandler( const llvm::DiagnosticInfoOptimizationFailure &D); }; - + void BackendConsumer::anchor() {} } @@ -377,7 +405,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, // code. if (LocCookie.isValid()) { Diags.Report(LocCookie, DiagID).AddString(Message); - + if (D.getLoc().isValid()) { DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here); // Convert the SMDiagnostic ranges into SourceRange and attach them @@ -390,7 +418,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, } return; } - + // Otherwise, report the backend issue as occurring in the generated .s file. // If Loc is invalid, we still need to report the issue, it just gets no // location info. @@ -477,8 +505,8 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { } const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( - const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename, - unsigned &Line, unsigned &Column) const { + const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo, + StringRef &Filename, unsigned &Line, unsigned &Column) const { SourceManager &SourceMgr = Context->getSourceManager(); FileManager &FileMgr = SourceMgr.getFileManager(); SourceLocation DILoc; @@ -568,36 +596,34 @@ void BackendConsumer::EmitOptimizationMessage( } void BackendConsumer::OptimizationRemarkHandler( - const llvm::OptimizationRemark &D) { - // Optimization remarks are active only if the -Rpass flag has a regular - // expression that matches the name of the pass name in \p D. - if (CodeGenOpts.OptimizationRemarkPattern && - CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName())) - EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark); -} - -void BackendConsumer::OptimizationRemarkHandler( - const llvm::OptimizationRemarkMissed &D) { - // Missed optimization remarks are active only if the -Rpass-missed - // flag has a regular expression that matches the name of the pass - // name in \p D. - if (CodeGenOpts.OptimizationRemarkMissedPattern && - CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName())) - EmitOptimizationMessage(D, - diag::remark_fe_backend_optimization_remark_missed); -} - -void BackendConsumer::OptimizationRemarkHandler( - const llvm::OptimizationRemarkAnalysis &D) { - // Optimization analysis remarks are active if the pass name is set to - // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a - // regular expression that matches the name of the pass name in \p D. - - if (D.shouldAlwaysPrint() || - (CodeGenOpts.OptimizationRemarkAnalysisPattern && - CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) - EmitOptimizationMessage( - D, diag::remark_fe_backend_optimization_remark_analysis); + const llvm::DiagnosticInfoOptimizationBase &D) { + if (D.isPassed()) { + // Optimization remarks are active only if the -Rpass flag has a regular + // expression that matches the name of the pass name in \p D. + if (CodeGenOpts.OptimizationRemarkPattern && + CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName())) + EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark); + } else if (D.isMissed()) { + // Missed optimization remarks are active only if the -Rpass-missed + // flag has a regular expression that matches the name of the pass + // name in \p D. + if (CodeGenOpts.OptimizationRemarkMissedPattern && + CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName())) + EmitOptimizationMessage( + D, diag::remark_fe_backend_optimization_remark_missed); + } else { + assert(D.isAnalysis() && "Unknown remark type"); + + bool ShouldAlwaysPrint = false; + if (auto *ORA = dyn_cast<llvm::OptimizationRemarkAnalysis>(&D)) + ShouldAlwaysPrint = ORA->shouldAlwaysPrint(); + + if (ShouldAlwaysPrint || + (CodeGenOpts.OptimizationRemarkAnalysisPattern && + CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) + EmitOptimizationMessage( + D, diag::remark_fe_backend_optimization_remark_analysis); + } } void BackendConsumer::OptimizationRemarkHandler( @@ -680,6 +706,21 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast<OptimizationRemarkAnalysisAliasing>(DI)); return; + case llvm::DK_MachineOptimizationRemark: + // Optimization remarks are always handled completely by this + // handler. There is no generic way of emitting them. + OptimizationRemarkHandler(cast<MachineOptimizationRemark>(DI)); + return; + case llvm::DK_MachineOptimizationRemarkMissed: + // Optimization remarks are always handled completely by this + // handler. There is no generic way of emitting them. + OptimizationRemarkHandler(cast<MachineOptimizationRemarkMissed>(DI)); + return; + case llvm::DK_MachineOptimizationRemarkAnalysis: + // Optimization remarks are always handled completely by this + // handler. There is no generic way of emitting them. + OptimizationRemarkHandler(cast<MachineOptimizationRemarkAnalysis>(DI)); + return; case llvm::DK_OptimizationFailure: // Optimization failures are always handled completely by this // handler. @@ -729,10 +770,6 @@ void CodeGenAction::EndSourceFileAction() { if (!getCompilerInstance().hasASTConsumer()) return; - // Take back ownership of link modules we passed to consumer. - if (!LinkModules.empty()) - BEConsumer->releaseLinkModules(); - // Steal the module from the consumer. TheModule = BEConsumer->takeModule(); } @@ -775,13 +812,12 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { // Load bitcode modules to link with, if we need to. if (LinkModules.empty()) - for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) { - const std::string &LinkBCFile = I.second; - - auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile); + for (const CodeGenOptions::BitcodeFileToLink &F : + CI.getCodeGenOpts().LinkBitcodeFiles) { + auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); if (!BCBuf) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << BCBuf.getError().message(); + << F.Filename << BCBuf.getError().message(); LinkModules.clear(); return nullptr; } @@ -791,12 +827,13 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (!ModuleOrErr) { handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << EIB.message(); + << F.Filename << EIB.message(); }); LinkModules.clear(); return nullptr; } - addLinkModule(ModuleOrErr.get().release(), I.first); + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); } CoverageSourceInfo *CoverageInfo = nullptr; @@ -810,9 +847,20 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { std::unique_ptr<BackendConsumer> Result(new BackendConsumer( BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules, - std::move(OS), *VMContext, CoverageInfo)); + CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, + std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); BEConsumer = Result.get(); + + // Enable generating macro debug info only when debug info is not disabled and + // also macro debug info is enabled. + if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && + CI.getCodeGenOpts().MacroDebugInfo) { + std::unique_ptr<PPCallbacks> Callbacks = + llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(), + CI.getPreprocessor()); + CI.getPreprocessor().addPPCallbacks(std::move(Callbacks)); + } + return std::move(Result); } @@ -838,6 +886,62 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, Diags->Report(DiagID).AddString("cannot compile inline asm"); } +std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) { + CompilerInstance &CI = getCompilerInstance(); + SourceManager &SM = CI.getSourceManager(); + + // For ThinLTO backend invocations, ensure that the context + // merges types based on ODR identifiers. We also need to read + // the correct module out of a multi-module bitcode file. + if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) { + VMContext->enableDebugTypeODRUniquing(); + + auto DiagErrors = [&](Error E) -> std::unique_ptr<llvm::Module> { + unsigned DiagID = + CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(DiagID) << EIB.message(); + }); + return {}; + }; + + Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef); + if (!BMOrErr) + return DiagErrors(BMOrErr.takeError()); + + Expected<std::unique_ptr<llvm::Module>> MOrErr = + BMOrErr->parseModule(*VMContext); + if (!MOrErr) + return DiagErrors(MOrErr.takeError()); + return std::move(*MOrErr); + } + + llvm::SMDiagnostic Err; + if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext)) + return M; + + // Translate from the diagnostic info to the SourceManager location if + // available. + // TODO: Unify this with ConvertBackendLocation() + SourceLocation Loc; + if (Err.getLineNo() > 0) { + assert(Err.getColumnNo() >= 0); + Loc = SM.translateFileLineCol(SM.getFileEntryForID(SM.getMainFileID()), + Err.getLineNo(), Err.getColumnNo() + 1); + } + + // Strip off a leading diagnostic code if there is one. + StringRef Msg = Err.getMessage(); + if (Msg.startswith("error: ")) + Msg = Msg.substr(7); + + unsigned DiagID = + CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); + + CI.getDiagnostics().Report(Loc, DiagID) << Msg; + return {}; +} + void CodeGenAction::ExecuteAction() { // If this is an IR file, we have to treat it specially. if (getCurrentFileKind() == IK_LLVM_IR) { @@ -855,35 +959,10 @@ void CodeGenAction::ExecuteAction() { if (Invalid) return; - // For ThinLTO backend invocations, ensure that the context - // merges types based on ODR identifiers. - if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) - VMContext->enableDebugTypeODRUniquing(); - - llvm::SMDiagnostic Err; - TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext); - if (!TheModule) { - // Translate from the diagnostic info to the SourceManager location if - // available. - // TODO: Unify this with ConvertBackendLocation() - SourceLocation Loc; - if (Err.getLineNo() > 0) { - assert(Err.getColumnNo() >= 0); - Loc = SM.translateFileLineCol(SM.getFileEntryForID(FID), - Err.getLineNo(), Err.getColumnNo() + 1); - } - - // Strip off a leading diagnostic code if there is one. - StringRef Msg = Err.getMessage(); - if (Msg.startswith("error: ")) - Msg = Msg.substr(7); - - unsigned DiagID = - CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); - - CI.getDiagnostics().Report(Loc, DiagID) << Msg; + TheModule = loadModule(*MainFile); + if (!TheModule) return; - } + const TargetOptions &TargetOpts = CI.getTargetOpts(); if (TheModule->getTargetTriple() != TargetOpts.Triple) { CI.getDiagnostics().Report(SourceLocation(), diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index e142a21b1e744..6e6eb7d7f13c6 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -45,15 +45,15 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, if (CGOpts.DisableLifetimeMarkers) return false; - // Asan uses markers for use-after-scope checks. - if (CGOpts.SanitizeAddressUseAfterScope) - return true; - // Disable lifetime markers in msan builds. // FIXME: Remove this when msan works with lifetime markers. if (LangOpts.Sanitize.has(SanitizerKind::Memory)) return false; + // Asan uses markers for use-after-scope checks. + if (CGOpts.SanitizeAddressUseAfterScope) + return true; + // For now, only in optimized builds. return CGOpts.OptimizationLevel != 0; } @@ -149,6 +149,8 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, Alignment = CGM.getClassPointerAlignment(RD); } else { Alignment = getContext().getTypeAlignInChars(T); + if (T.getQualifiers().hasUnaligned()) + Alignment = CharUnits::One(); } // Cap to the global maximum type alignment unless the alignment @@ -200,7 +202,8 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { llvm_unreachable("non-canonical or dependent type in IR-generation"); case Type::Auto: - llvm_unreachable("undeduced auto type in IR-generation"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("undeduced type in IR-generation"); // Various scalar types. case Type::Builtin: @@ -607,11 +610,6 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName)); - // Get argument type qualifiers: - if (ty.isConstQualified()) - typeQuals = "const"; - if (ty.isVolatileQualified()) - typeQuals += typeQuals.empty() ? "volatile" : " volatile"; if (isPipe) typeQuals = "pipe"; } @@ -707,6 +705,11 @@ static bool endsWithReturn(const Decl* F) { return false; } +static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { + Fn->addFnAttr("sanitize_thread_no_checking_at_run_time"); + Fn->removeFnAttr(llvm::Attribute::SanitizeThread); +} + void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, @@ -750,16 +753,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr(llvm::Attribute::SafeStack); // Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize, - // .cxx_destruct and all of their calees at run time. + // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time. if (SanOpts.has(SanitizerKind::Thread)) { if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) { IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0); if (OMD->getMethodFamily() == OMF_dealloc || OMD->getMethodFamily() == OMF_initialize || (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) { - Fn->addFnAttr("sanitize_thread_no_checking_at_run_time"); - Fn->removeFnAttr(llvm::Attribute::SanitizeThread); + markAsIgnoreThreadCheckingAtRuntime(Fn); } + } else if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + IdentifierInfo *II = FD->getIdentifier(); + if (II && II->isStr("__destroy_helper_block_")) + markAsIgnoreThreadCheckingAtRuntime(Fn); } } @@ -770,10 +776,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr("function-instrument", "xray-always"); if (XRayAttr->neverXRayInstrument()) Fn->addFnAttr("function-instrument", "xray-never"); + if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) { + Fn->addFnAttr("xray-log-args", + llvm::utostr(LogArgs->getArgumentCount())); + } } else { - Fn->addFnAttr( - "xray-instruction-threshold", - llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + if (!CGM.imbueXRayAttrs(Fn, Loc)) + Fn->addFnAttr( + "xray-instruction-threshold", + llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); } } @@ -807,6 +818,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } } + // If we're checking nullability, we need to know whether we can check the + // return value. Initialize the flag to 'true' and refine it in EmitParmDecl. + if (SanOpts.has(SanitizerKind::NullabilityReturn)) { + auto Nullability = FnRetTy->getNullability(getContext()); + if (Nullability && *Nullability == NullabilityKind::NonNull) { + if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && + CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>())) + RetValNullabilityPrecondition = + llvm::ConstantInt::getTrue(getLLVMContext()); + } + } + // If we're in C++ mode and the function name is "main", it is guaranteed // to be norecurse by the standard (3.6.1.3 "The function main shall not be // used within a program"). @@ -851,8 +874,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // inlining, we just add an attribute to insert a mcount call in backend. // The attribute "counting-function" is set to mcount function name which is // architecture dependent. - if (CGM.getCodeGenOpts().InstrumentForProfiling) - Fn->addFnAttr("counting-function", getTarget().getMCountName()); + if (CGM.getCodeGenOpts().InstrumentForProfiling) { + if (CGM.getCodeGenOpts().CallFEntry) + Fn->addFnAttr("fentry-call", "true"); + else + Fn->addFnAttr("counting-function", getTarget().getMCountName()); + } if (RetTy->isVoidType()) { // Void type; nothing to return. @@ -935,6 +962,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // fast register allocator would be happier... CXXThisValue = CXXABIThisValue; } + + // Check the 'this' pointer once per function, if it's available. + if (CXXThisValue) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::ObjectSize, true); + QualType ThisTy = MD->getThisType(getContext()); + EmitTypeCheck(TCK_Load, Loc, CXXThisValue, ThisTy, + getContext().getTypeAlignInChars(ThisTy->getPointeeType()), + SkippedChecks); + } } // If any of the arguments have a variably modified type, make sure to @@ -1076,8 +1113,13 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, if (FD->hasAttr<NoDebugAttr>()) DebugInfo = nullptr; // disable debug info indefinitely for this function + // The function might not have a body if we're generating thunks for a + // function declaration. SourceRange BodyRange; - if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange(); + if (Stmt *Body = FD->getBody()) + BodyRange = Body->getSourceRange(); + else + BodyRange = FD->getLocation(); CurEHLocation = BodyRange.getEnd(); // Use the location of the start of the function to determine where @@ -1891,6 +1933,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::Typedef: case Type::Decltype: case Type::Auto: + case Type::DeducedTemplateSpecialization: // Stop walking: nothing to do. return; diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 586134023240b..fa72019eb08b2 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -115,6 +115,8 @@ enum TypeEvaluationKind { SANITIZER_CHECK(MissingReturn, missing_return, 0) \ SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \ SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \ + SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \ + SANITIZER_CHECK(NullabilityReturn, nullability_return, 0) \ SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \ SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \ SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \ @@ -212,6 +214,13 @@ public: /// value. This is invalid iff the function has no return value. Address ReturnValue; + /// Return true if a label was seen in the current scope. + bool hasLabelBeenSeenInCurrentScope() const { + if (CurLexicalScope) + return CurLexicalScope->hasLabels(); + return !LabelMap.empty(); + } + /// AllocaInsertPoint - This is an instruction in the entry block before which /// we prefer to insert allocas. llvm::AssertingVH<llvm::Instruction> AllocaInsertPt; @@ -298,6 +307,31 @@ public: ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; + /// An abstract representation of regular/ObjC call/message targets. + class AbstractCallee { + /// The function declaration of the callee. + const Decl *CalleeDecl; + + public: + AbstractCallee() : CalleeDecl(nullptr) {} + AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {} + AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {} + bool hasFunctionDecl() const { + return dyn_cast_or_null<FunctionDecl>(CalleeDecl); + } + const Decl *getDecl() const { return CalleeDecl; } + unsigned getNumParams() const { + if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl)) + return FD->getNumParams(); + return cast<ObjCMethodDecl>(CalleeDecl)->param_size(); + } + const ParmVarDecl *getParamDecl(unsigned I) const { + if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl)) + return FD->getParamDecl(I); + return *(cast<ObjCMethodDecl>(CalleeDecl)->param_begin() + I); + } + }; + /// \brief Sanitizers enabled for this function. SanitizerSet SanOpts; @@ -548,14 +582,10 @@ public: CGF.DidCallStackSave = false; } - /// \brief Exit this cleanup scope, emitting any accumulated - /// cleanups. + /// \brief Exit this cleanup scope, emitting any accumulated cleanups. ~RunCleanupsScope() { - if (PerformCleanup) { - CGF.DidCallStackSave = OldDidCallStackSave; - CGF.PopCleanupBlocks(CleanupStackDepth, - LifetimeExtendedCleanupStackSize); - } + if (PerformCleanup) + ForceCleanup(); } /// \brief Determine whether this scope requires any cleanups. @@ -565,11 +595,15 @@ public: /// \brief Force the emission of cleanups now, instead of waiting /// until this object is destroyed. - void ForceCleanup() { + /// \param ValuesToReload - A list of values that need to be available at + /// the insertion point after cleanup emission. If cleanup emission created + /// a shared cleanup block, these value pointers will be rewritten. + /// Otherwise, they not will be modified. + void ForceCleanup(std::initializer_list<llvm::Value**> ValuesToReload = {}) { assert(PerformCleanup && "Already forced cleanup"); CGF.DidCallStackSave = OldDidCallStackSave; - CGF.PopCleanupBlocks(CleanupStackDepth, - LifetimeExtendedCleanupStackSize); + CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize, + ValuesToReload); PerformCleanup = false; } }; @@ -620,6 +654,10 @@ public: rescopeLabels(); } + bool hasLabels() const { + return !Labels.empty(); + } + void rescopeLabels(); }; @@ -727,13 +765,17 @@ public: /// \brief Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. - void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize); + void + PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, + std::initializer_list<llvm::Value **> ValuesToReload = {}); /// \brief Takes the old cleanup stack size and emits the cleanup blocks /// that have been added, then adds all lifetime-extended cleanups from /// the given position to the stack. - void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, - size_t OldLifetimeExtendedStackSize); + void + PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, + size_t OldLifetimeExtendedStackSize, + std::initializer_list<llvm::Value **> ValuesToReload = {}); void ResolveBranchFixups(llvm::BasicBlock *Target); @@ -1116,10 +1158,11 @@ private: uint64_t LoopCount); public: - /// Increment the profiler's counter for the given statement. - void incrementProfileCounter(const Stmt *S) { + /// Increment the profiler's counter for the given statement by \p StepV. + /// If \p StepV is null, the default increment is 1. + void incrementProfileCounter(const Stmt *S, llvm::Value *StepV = nullptr) { if (CGM.getCodeGenOpts().hasProfileClangInstr()) - PGO.emitCounterIncrement(Builder, S); + PGO.emitCounterIncrement(Builder, S, StepV); PGO.setCurrentStmt(S); } @@ -1334,6 +1377,16 @@ private: /// information about the layout of the variable. llvm::DenseMap<const ValueDecl *, BlockByrefInfo> BlockByrefInfos; + /// Used by -fsanitize=nullability-return to determine whether the return + /// value can be checked. + llvm::Value *RetValNullabilityPrecondition = nullptr; + + /// Check if -fsanitize=nullability-return instrumentation is required for + /// this function. + bool requiresReturnValueNullabilityCheck() const { + return RetValNullabilityPrecondition; + } + llvm::BasicBlock *TerminateLandingPad; llvm::BasicBlock *TerminateHandler; llvm::BasicBlock *TrapBB; @@ -1553,6 +1606,8 @@ public: SourceLocation Loc = SourceLocation(), SourceLocation StartLoc = SourceLocation()); + static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor); + void EmitConstructorBody(FunctionArgList &Args); void EmitDestructorBody(FunctionArgList &Args); void emitImplicitAssignmentOperatorBody(FunctionArgList &Args); @@ -1710,6 +1765,9 @@ public: void EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc, SourceLocation EndLoc); + /// Emit a test that checks if the return value \p RV is nonnull. + void EmitReturnValueCheck(llvm::Value *RV, SourceLocation EndLoc); + /// EmitStartEHSpec - Emit the start of the exception spec. void EmitStartEHSpec(const Decl *D); @@ -1928,7 +1986,7 @@ public: /// pointer to a char. Address EmitMSVAListRef(const Expr *E); - /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will + /// EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will /// always be accessible even if no aggregate location is provided. RValue EmitAnyExprToTemp(const Expr *E); @@ -2019,6 +2077,9 @@ public: llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L); llvm::BasicBlock *GetIndirectGotoBlock(); + /// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts. + static bool IsWrappedCXXThis(const Expr *E); + /// EmitNullInitialization - Generate code to set a value of the given type to /// null, If the type contains data member pointers, they will be initialized /// to -1 in accordance with the Itanium C++ ABI. @@ -2230,7 +2291,9 @@ public: TCK_Upcast, /// Checking the operand of a cast to a virtual base object. Must be an /// object within its lifetime. - TCK_UpcastToVirtualBase + TCK_UpcastToVirtualBase, + /// Checking the value assigned to a _Nonnull pointer. Must not be null. + TCK_NonnullAssign }; /// \brief Whether any type-checking sanitizers are enabled. If \c false, @@ -2241,7 +2304,7 @@ public: /// appropriate size and alignment for an object of type \p Type. void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero(), - bool SkipNullCheck = false); + SanitizerSet SkippedChecks = SanitizerSet()); /// \brief Emit a check that \p Base points into an array object, which /// we can access at index \p Index. \p Accessed should be \c false if we @@ -2401,6 +2464,12 @@ public: PeepholeProtection protectFromPeepholes(RValue rvalue); void unprotectFromPeepholes(PeepholeProtection protection); + void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment, + llvm::Value *OffsetValue = nullptr) { + Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment, + OffsetValue); + } + //===--------------------------------------------------------------------===// // Statement Emission //===--------------------------------------------------------------------===// @@ -2463,6 +2532,13 @@ public: void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S); void EmitCoroutineBody(const CoroutineBodyStmt &S); + void EmitCoreturnStmt(const CoreturnStmt &S); + RValue EmitCoawaitExpr(const CoawaitExpr &E, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); + RValue EmitCoyieldExpr(const CoyieldExpr &E, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID); void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); @@ -2627,7 +2703,9 @@ public: /// the end of the directive. /// /// \param D Directive that has at least one 'reduction' directives. - void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D); + /// \param ReductionKind The kind of reduction to perform. + void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D, + const OpenMPDirectiveKind ReductionKind); /// \brief Emit initial code for linear variables. Creates private copies /// and initializes them with the values according to OpenMP standard. /// @@ -2704,13 +2782,16 @@ public: void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); - /// Emit outlined function for the target directive. - static std::pair<llvm::Function * /*OutlinedFn*/, - llvm::Constant * /*OutlinedFnID*/> - EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM, - const OMPTargetDirective &S, - StringRef ParentName, - bool IsOffloadEntry); + /// Emit device code for the target directive. + static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetDirective &S); + static void + EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelDirective &S); + static void + EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. @@ -2843,6 +2924,13 @@ public: /// representation to its value representation. llvm::Value *EmitFromMemory(llvm::Value *Value, QualType Ty); + /// Check if the scalar \p Value is within the valid range for the given + /// type \p Ty. + /// + /// Returns true if a check is needed (even if the range is unknown). + bool EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, + SourceLocation Loc); + /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to /// the LLVM value representation. @@ -2883,7 +2971,7 @@ public: /// rvalue, returning the rvalue. RValue EmitLoadOfLValue(LValue V, SourceLocation Loc); RValue EmitLoadOfExtVectorElementLValue(LValue V); - RValue EmitLoadOfBitfieldLValue(LValue LV); + RValue EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc); RValue EmitLoadOfGlobalRegLValue(LValue LV); /// EmitStoreThroughLValue - Store the specified rvalue into the specified @@ -3092,8 +3180,8 @@ public: RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E, ReturnValueSlot ReturnValue); - RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue); + RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue); RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, @@ -3149,6 +3237,8 @@ private: public: llvm::Value *EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E); + llvm::Value *EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args); + llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E); llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); llvm::Value *EmitObjCBoxedExpr(const ObjCBoxedExpr *E); @@ -3396,6 +3486,10 @@ public: void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount); + /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is + /// nonnull, if \p LHS is marked _Nonnull. + void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); @@ -3429,13 +3523,16 @@ public: /// "trap-func-name" if specified. llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID); + /// \brief Emit a stub for the cross-DSO CFI check function. + void EmitCfiCheckStub(); + /// \brief Emit a cross-DSO CFI failure handling function. void EmitCfiCheckFail(); /// \brief Create a check for a function parameter that may potentially be /// declared as non-null. void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, - const FunctionDecl *FD, unsigned ParmNum); + AbstractCallee AC, unsigned ParmNum); /// EmitCallArg - Emit a single call argument. void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType); @@ -3490,14 +3587,18 @@ private: /// \brief Attempts to statically evaluate the object size of E. If that /// fails, emits code to figure the size of E out for us. This is /// pass_object_size aware. + /// + /// If EmittedExpr is non-null, this will use that instead of re-emitting E. llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType); + llvm::IntegerType *ResType, + llvm::Value *EmittedE); /// \brief Emits the size of E, as required by __builtin_object_size. This /// function is aware of pass_object_size parameters, and will act accordingly /// if E is a parameter with the pass_object_size attribute. llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType); + llvm::IntegerType *ResType, + llvm::Value *EmittedE); public: #ifndef NDEBUG @@ -3533,7 +3634,7 @@ public: template <typename T> void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo, llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange, - const FunctionDecl *CalleeDecl = nullptr, + AbstractCallee AC = AbstractCallee(), unsigned ParamsToSkip = 0, EvaluationOrder Order = EvaluationOrder::Default) { SmallVector<QualType, 16> ArgTypes; @@ -3575,12 +3676,12 @@ public: for (auto *A : llvm::make_range(Arg, ArgRange.end())) ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType()); - EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order); + EmitCallArgs(Args, ArgTypes, ArgRange, AC, ParamsToSkip, Order); } void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes, llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange, - const FunctionDecl *CalleeDecl = nullptr, + AbstractCallee AC = AbstractCallee(), unsigned ParamsToSkip = 0, EvaluationOrder Order = EvaluationOrder::Default); diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 36005430ae4c3..d48bff9c30a3d 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -24,7 +24,6 @@ #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "CodeGenTBAA.h" -#include "ConstantBuilder.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -42,6 +41,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/CodeGenOptions.h" #include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/Triple.h" @@ -406,8 +406,11 @@ void CodeGenModule::Release() { EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) CoverageMapping->emit(); - if (CodeGenOpts.SanitizeCfiCrossDso) + if (CodeGenOpts.SanitizeCfiCrossDso) { CodeGenFunction(*this).EmitCfiCheckFail(); + CodeGenFunction(*this).EmitCfiCheckStub(); + } + emitAtAvailableLinkGuard(); emitLLVMUsed(); if (SanStats) SanStats->finish(); @@ -416,6 +419,12 @@ void CodeGenModule::Release() { (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { EmitModuleLinkOptions(); } + + // Record mregparm value now so it is visible through rest of codegen. + if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) + getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", + CodeGenOpts.NumRegisterParameters); + if (CodeGenOpts.DwarfVersion) { // We actually want the latest version when there are conflicts. // We can change from Warning to Latest if such mode is supported. @@ -833,7 +842,7 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, AttributeListType AttributeList; ConstructAttributeList(F->getName(), Info, D, AttributeList, CallingConv, false); - F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList)); + F->setAttributes(llvm::AttributeList::get(getLLVMContext(), AttributeList)); F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); } @@ -882,10 +891,10 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get( - F->getContext(), - llvm::AttributeSet::FunctionIndex, B)); + F->addAttributes( + llvm::AttributeList::FunctionIndex, + llvm::AttributeList::get(F->getContext(), + llvm::AttributeList::FunctionIndex, B)); return; } @@ -951,9 +960,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get( - F->getContext(), llvm::AttributeSet::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, + llvm::AttributeList::get( + F->getContext(), llvm::AttributeList::FunctionIndex, B)); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) @@ -1029,7 +1038,6 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } else if (ND->hasAttr<DLLExportAttr>()) { GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) { // "extern_weak" is overloaded in LLVM; we probably should have // separate linkage types for this. @@ -1107,7 +1115,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. - F->addAttribute(llvm::AttributeSet::FunctionIndex, + F->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoBuiltin); // A sane operator new returns a non-aliasing pointer. @@ -1116,7 +1124,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, auto Kind = FD->getDeclName().getCXXOverloadedOperator(); if (getCodeGenOpts().AssumeSaneOperatorNew && (Kind == OO_New || Kind == OO_Array_New)) - F->addAttribute(llvm::AttributeSet::ReturnIndex, + F->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias); } @@ -1482,6 +1490,30 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, return false; } +bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, + StringRef Category) const { + if (!LangOpts.XRayInstrument) + return false; + const auto &XRayFilter = getContext().getXRayFilter(); + using ImbueAttr = XRayFunctionFilter::ImbueAttribute; + auto Attr = XRayFunctionFilter::ImbueAttribute::NONE; + if (Loc.isValid()) + Attr = XRayFilter.shouldImbueLocation(Loc, Category); + if (Attr == ImbueAttr::NONE) + Attr = XRayFilter.shouldImbueFunction(Fn->getName()); + switch (Attr) { + case ImbueAttr::NONE: + return false; + case ImbueAttr::ALWAYS: + Fn->addFnAttr("function-instrument", "xray-always"); + break; + case ImbueAttr::NEVER: + Fn->addFnAttr("function-instrument", "xray-never"); + break; + } + return true; +} + bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) @@ -1693,6 +1725,16 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } +// Check if T is a class type with a destructor that's not dllimport. +static bool HasNonDllImportDtor(QualType T) { + if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs<RecordType>()) + if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl())) + if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>()) + return true; + + return false; +} + namespace { struct FunctionIsDirectlyRecursive : public RecursiveASTVisitor<FunctionIsDirectlyRecursive> { @@ -1726,6 +1768,7 @@ namespace { } }; + // Make sure we're not referencing non-imported vars or functions. struct DLLImportFunctionVisitor : public RecursiveASTVisitor<DLLImportFunctionVisitor> { bool SafeToInline = true; @@ -1733,12 +1776,25 @@ namespace { bool shouldVisitImplicitCode() const { return true; } bool VisitVarDecl(VarDecl *VD) { - // A thread-local variable cannot be imported. - SafeToInline = !VD->getTLSKind(); + if (VD->getTLSKind()) { + // A thread-local variable cannot be imported. + SafeToInline = false; + return SafeToInline; + } + + // A variable definition might imply a destructor call. + if (VD->isThisDeclarationADefinition()) + SafeToInline = !HasNonDllImportDtor(VD->getType()); + + return SafeToInline; + } + + bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { + if (const auto *D = E->getTemporary()->getDestructor()) + SafeToInline = D->hasAttr<DLLImportAttr>(); return SafeToInline; } - // Make sure we're not referencing non-imported vars or functions. bool VisitDeclRefExpr(DeclRefExpr *E) { ValueDecl *VD = E->getDecl(); if (isa<FunctionDecl>(VD)) @@ -1747,14 +1803,28 @@ namespace { SafeToInline = !V->hasGlobalStorage() || V->hasAttr<DLLImportAttr>(); return SafeToInline; } + bool VisitCXXConstructExpr(CXXConstructExpr *E) { SafeToInline = E->getConstructor()->hasAttr<DLLImportAttr>(); return SafeToInline; } + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { + CXXMethodDecl *M = E->getMethodDecl(); + if (!M) { + // Call through a pointer to member function. This is safe to inline. + SafeToInline = true; + } else { + SafeToInline = M->hasAttr<DLLImportAttr>(); + } + return SafeToInline; + } + bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { SafeToInline = E->getOperatorDelete()->hasAttr<DLLImportAttr>(); return SafeToInline; } + bool VisitCXXNewExpr(CXXNewExpr *E) { SafeToInline = E->getOperatorNew()->hasAttr<DLLImportAttr>(); return SafeToInline; @@ -1783,16 +1853,6 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { return Walker.Result; } -// Check if T is a class type with a destructor that's not dllimport. -static bool HasNonDllImportDtor(QualType T) { - if (const RecordType *RT = dyn_cast<RecordType>(T)) - if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl())) - if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>()) - return true; - - return false; -} - bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; @@ -1828,22 +1888,6 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { return !isTriviallyRecursive(F); } -/// If the type for the method's class was generated by -/// CGDebugInfo::createContextChain(), the cache contains only a -/// limited DIType without any declarations. Since EmitFunctionStart() -/// needs to find the canonical declaration for each method, we need -/// to construct the complete type prior to emitting the method. -void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) { - if (!D->isInstance()) - return; - - if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { - const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext())); - DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation()); - } -} - void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -1858,7 +1902,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { return; if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) { - CompleteDIClassType(Method); // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method)) @@ -1893,13 +1936,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. -llvm::Constant * -CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, - llvm::Type *Ty, - GlobalDecl GD, bool ForVTable, - bool DontDefer, bool IsThunk, - llvm::AttributeSet ExtraAttrs, - ForDefinition_t IsForDefinition) { +llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( + StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable, + bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs, + ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); // Lookup the entry, lazily creating it if necessary. @@ -1989,12 +2029,11 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); - if (ExtraAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) { - llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeSet::FunctionIndex); - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(VMContext, - llvm::AttributeSet::FunctionIndex, - B)); + if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { + llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); + F->addAttributes(llvm::AttributeList::FunctionIndex, + llvm::AttributeList::get( + VMContext, llvm::AttributeList::FunctionIndex, B)); } if (!DontDefer) { @@ -2069,7 +2108,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, StringRef MangledName = getMangledName(GD); return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, - /*IsThunk=*/false, llvm::AttributeSet(), + /*IsThunk=*/false, llvm::AttributeList(), IsForDefinition); } @@ -2115,7 +2154,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// type and name. llvm::Constant * CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeSet ExtraAttrs, + llvm::AttributeList ExtraAttrs, bool Local) { llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, @@ -2143,9 +2182,8 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, /// CreateBuiltinFunction - Create a new builtin function with the specified /// type and name. llvm::Constant * -CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, - StringRef Name, - llvm::AttributeSet ExtraAttrs) { +CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name, + llvm::AttributeList ExtraAttrs) { llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); @@ -2803,7 +2841,7 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) - return llvm::Function::AvailableExternallyLinkage; + return llvm::GlobalValue::AvailableExternallyLinkage; // Note that Apple's kernel linker doesn't support symbol // coalescing, so we need to avoid linkonce and weak linkages there. @@ -2897,14 +2935,8 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, continue; // Get the call site's attribute list. - SmallVector<llvm::AttributeSet, 8> newAttrs; - llvm::AttributeSet oldAttrs = callSite.getAttributes(); - - // Collect any return attributes from the call. - if (oldAttrs.hasAttributes(llvm::AttributeSet::ReturnIndex)) - newAttrs.push_back( - llvm::AttributeSet::get(newFn->getContext(), - oldAttrs.getRetAttributes())); + SmallVector<llvm::AttributeSet, 8> newArgAttrs; + llvm::AttributeList oldAttrs = callSite.getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); @@ -2914,27 +2946,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; - for (llvm::Function::arg_iterator ai = newFn->arg_begin(), - ae = newFn->arg_end(); ai != ae; ++ai, ++argNo) { - if (callSite.getArgument(argNo)->getType() != ai->getType()) { + for (llvm::Argument &A : newFn->args()) { + if (callSite.getArgument(argNo)->getType() != A.getType()) { dontTransform = true; break; } // Add any parameter attributes. - if (oldAttrs.hasAttributes(argNo + 1)) - newAttrs. - push_back(llvm:: - AttributeSet::get(newFn->getContext(), - oldAttrs.getParamAttributes(argNo + 1))); + newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo)); + argNo++; } if (dontTransform) continue; - if (oldAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) - newAttrs.push_back(llvm::AttributeSet::get(newFn->getContext(), - oldAttrs.getFnAttributes())); - // Okay, we can transform this. Create the new call instruction and copy // over the required information. newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo); @@ -2958,8 +2982,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite.getInstruction()); - newCall.setAttributes( - llvm::AttributeSet::get(newFn->getContext(), newAttrs)); + newCall.setAttributes(llvm::AttributeList::get( + newFn->getContext(), oldAttrs.getFnAttributes(), + oldAttrs.getRetAttributes(), newArgAttrs)); newCall.setCallingConv(callSite.getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. @@ -3341,6 +3366,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm_unreachable("unknown file format"); case llvm::Triple::COFF: case llvm::Triple::ELF: + case llvm::Triple::Wasm: GV->setSection("cfstring"); break; case llvm::Triple::MachO: @@ -3790,6 +3816,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitDeclContext(cast<NamespaceDecl>(D)); break; case Decl::CXXRecord: + if (DebugInfo) { + if (auto *ES = D->getASTContext().getExternalSource()) + if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) + DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D)); + } // Emit any static data members, they may be definitions. for (auto *I : cast<CXXRecordDecl>(D)->decls()) if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I)) diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 36f6785fd1b92..d0b2dd717c8c7 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -28,6 +28,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/SanitizerBlacklist.h" +#include "clang/Basic/XRayLists.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -546,6 +547,10 @@ public: return *ObjCData; } + // Version checking function, used to implement ObjC's @available: + // i32 @__isOSVersionAtLeast(i32, i32, i32) + llvm::Constant *IsOSVersionAtLeastFn = nullptr; + InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } @@ -906,14 +911,13 @@ public: /// Create a new runtime function with the specified type and name. llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false); /// Create a new compiler builtin function with the specified type and name. - llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty, - StringRef Name, - llvm::AttributeSet ExtraAttrs = - llvm::AttributeSet()); + llvm::Constant * + CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList()); /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); @@ -1022,6 +1026,25 @@ public: CGCalleeInfo CalleeInfo, AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite); + /// Adds attributes to F according to our CodeGenOptions and LangOptions, as + /// though we had emitted it ourselves. We remove any attributes on F that + /// conflict with the attributes we add here. + /// + /// This is useful for adding attrs to bitcode modules that you want to link + /// with but don't control, such as CUDA's libdevice. When linking with such + /// a bitcode library, you might want to set e.g. its functions' + /// "unsafe-fp-math" attribute to match the attr of the functions you're + /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of + /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM + /// will propagate unsafe-fp-math=false up to every transitive caller of a + /// function in the bitcode library! + /// + /// With the exception of fast-math attrs, this will only make the attributes + /// on the function more conservative. But it's unsafe to call this on a + /// function which relies on particular fast-math attributes for correctness. + /// It's up to you to ensure that this is safe. + void AddDefaultFnAttrs(llvm::Function &F); + // Fills in the supplied string map with the set of target features for the // passed in function. void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, @@ -1103,6 +1126,12 @@ public: QualType Ty, StringRef Category = StringRef()) const; + /// Imbue XRay attributes to a function, applying the always/never attribute + /// lists in the process. Returns true if we did imbue attributes this way, + /// false otherwise. + bool imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, + StringRef Category = StringRef()) const; + SanitizerMetadata *getSanitizerMetadata() { return SanitizerMD.get(); } @@ -1176,7 +1205,7 @@ public: void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD); - /// \breif Get the declaration of std::terminate for the platform. + /// \brief Get the declaration of std::terminate for the platform. llvm::Constant *getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); @@ -1190,12 +1219,11 @@ public: llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT); private: - llvm::Constant * - GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D, - bool ForVTable, bool DontDefer = false, - bool IsThunk = false, - llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), - ForDefinition_t IsForDefinition = NotForDefinition); + llvm::Constant *GetOrCreateLLVMFunction( + StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, + bool DontDefer = false, bool IsThunk = false, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + ForDefinition_t IsForDefinition = NotForDefinition); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *PTy, @@ -1222,7 +1250,6 @@ private: void EmitDeclContext(const DeclContext *DC); void EmitLinkageSpec(const LinkageSpecDecl *D); - void CompleteDIClassType(const CXXMethodDecl* D); /// \brief Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); @@ -1266,6 +1293,10 @@ private: /// Emit any vtables which we deferred and still have a use for. void EmitDeferredVTables(); + /// Emit a dummy function that reference a CoreFoundation symbol when + /// @available is used on Darwin. + void emitAtAvailableLinkGuard(); + /// Emit the llvm.used and llvm.compiler.used metadata. void emitLLVMUsed(); @@ -1304,6 +1335,12 @@ private: /// Check whether we can use a "simpler", more core exceptions personality /// function. void SimplifyPersonality(); + + /// Helper function for ConstructAttributeList and AddDefaultFnAttrs. + /// Constructs an AttrList for a function with the given properties. + void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs); }; } // end namespace CodeGen } // end namespace clang diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index c6c3fa41e6281..6acedc033a6ea 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -612,7 +612,7 @@ uint64_t PGOHash::finalize() { llvm::MD5::MD5Result Result; MD5.final(Result); using namespace llvm::support; - return endian::read<uint64_t, little, unaligned>(Result); + return Result.low(); } void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { @@ -626,12 +626,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { // Constructors and destructors may be represented by several functions in IR. // If so, instrument only base variant, others are implemented by delegation // to the base one, it would be counted twice otherwise. - if (CGM.getTarget().getCXXABI().hasConstructorVariants() && - ((isa<CXXConstructorDecl>(GD.getDecl()) && - GD.getCtorType() != Ctor_Base) || - (isa<CXXDestructorDecl>(GD.getDecl()) && - GD.getDtorType() != Dtor_Base))) { + if (CGM.getTarget().getCXXABI().hasConstructorVariants()) { + if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) return; + + if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D)) + if (GD.getCtorType() != Ctor_Base && + CodeGenFunction::IsConstructorDelegationValid(CCD)) + return; } CGM.ClearUnusedCoverageMapping(D); setFuncName(Fn); @@ -737,7 +739,8 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader, Fn->setEntryCount(FunctionCount); } -void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { +void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, + llvm::Value *StepV) { if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap) return; if (!Builder.GetInsertBlock()) @@ -745,11 +748,18 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { unsigned Counter = (*RegionCounterMap)[S]; auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext()); - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), - {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter)}); + + llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), + Builder.getInt32(Counter), StepV}; + if (!StepV) + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), + makeArrayRef(Args, 4)); + else + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step), + makeArrayRef(Args)); } // This method either inserts a call to the profile run-time during diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 4f229cde63b03..0026df570bce0 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -105,7 +105,8 @@ private: void emitCounterRegionMapping(const Decl *D); public: - void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S); + void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, + llvm::Value *StepV); /// Return the region count for the counter at the given index. uint64_t getRegionCount(const Stmt *S) { diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index adb40c8c0d478..dc24b2040f047 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -472,7 +472,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty); break; @@ -487,7 +486,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; } case Type::Auto: - llvm_unreachable("Unexpected undeduced auto type!"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Unexpected undeduced type!"); case Type::Complex: { llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType()); ResultType = llvm::StructType::get(EltTy, EltTy, nullptr); diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index 2ce6591e4eb7d..f0b97ebde1c21 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -303,11 +303,14 @@ public: const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args, const CXXConstructorDecl *D, CXXCtorType CtorKind, - unsigned ExtraArgs); + unsigned ExtraPrefixArgs, + unsigned ExtraSuffixArgs, + bool PassProtoArgs = true); const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args, const FunctionProtoType *type, - RequiredArgs required); + RequiredArgs required, + unsigned numPrefixArgs); const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD); const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD, CXXCtorType CT); diff --git a/lib/CodeGen/ConstantBuilder.h b/lib/CodeGen/ConstantBuilder.h deleted file mode 100644 index 40b34a9d61c8f..0000000000000 --- a/lib/CodeGen/ConstantBuilder.h +++ /dev/null @@ -1,444 +0,0 @@ -//===----- ConstantBuilder.h - Builder for LLVM IR constants ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class provides a convenient interface for building complex -// global initializers. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H -#define LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" - -#include "CodeGenModule.h" - -#include <vector> - -namespace clang { -namespace CodeGen { - -class ConstantStructBuilder; -class ConstantArrayBuilder; - -/// A convenience builder class for complex constant initializers, -/// especially for anonymous global structures used by various language -/// runtimes. -/// -/// The basic usage pattern is expected to be something like: -/// ConstantInitBuilder builder(CGM); -/// auto toplevel = builder.beginStruct(); -/// toplevel.addInt(CGM.SizeTy, widgets.size()); -/// auto widgetArray = builder.beginArray(); -/// for (auto &widget : widgets) { -/// auto widgetDesc = widgetArray.beginStruct(); -/// widgetDesc.addInt(CGM.SizeTy, widget.getPower()); -/// widgetDesc.add(CGM.GetAddrOfConstantString(widget.getName())); -/// widgetDesc.add(CGM.GetAddrOfGlobal(widget.getInitializerDecl())); -/// widgetArray.add(widgetDesc.finish()); -/// } -/// toplevel.add(widgetArray.finish()); -/// auto global = toplevel.finishAndCreateGlobal("WIDGET_LIST", Align, -/// /*constant*/ true); -class ConstantInitBuilder { - struct SelfReference { - llvm::GlobalVariable *Dummy; - llvm::SmallVector<llvm::Constant*, 4> Indices; - - SelfReference(llvm::GlobalVariable *dummy) : Dummy(dummy) {} - }; - CodeGenModule &CGM; - llvm::SmallVector<llvm::Constant*, 16> Buffer; - std::vector<SelfReference> SelfReferences; - bool Frozen = false; - -public: - explicit ConstantInitBuilder(CodeGenModule &CGM) : CGM(CGM) {} - - ~ConstantInitBuilder() { - assert(Buffer.empty() && "didn't claim all values out of buffer"); - } - - class AggregateBuilderBase { - protected: - ConstantInitBuilder &Builder; - AggregateBuilderBase *Parent; - size_t Begin; - bool Finished = false; - bool Frozen = false; - - llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() { - return Builder.Buffer; - } - - const llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() const { - return Builder.Buffer; - } - - AggregateBuilderBase(ConstantInitBuilder &builder, - AggregateBuilderBase *parent) - : Builder(builder), Parent(parent), Begin(builder.Buffer.size()) { - if (parent) { - assert(!parent->Frozen && "parent already has child builder active"); - parent->Frozen = true; - } else { - assert(!builder.Frozen && "builder already has child builder active"); - builder.Frozen = true; - } - } - - ~AggregateBuilderBase() { - assert(Finished && "didn't finish aggregate builder"); - } - - void markFinished() { - assert(!Frozen && "child builder still active"); - assert(!Finished && "builder already finished"); - Finished = true; - if (Parent) { - assert(Parent->Frozen && - "parent not frozen while child builder active"); - Parent->Frozen = false; - } else { - assert(Builder.Frozen && - "builder not frozen while child builder active"); - Builder.Frozen = false; - } - } - - public: - // Not copyable. - AggregateBuilderBase(const AggregateBuilderBase &) = delete; - AggregateBuilderBase &operator=(const AggregateBuilderBase &) = delete; - - // Movable, mostly to allow returning. But we have to write this out - // properly to satisfy the assert in the destructor. - AggregateBuilderBase(AggregateBuilderBase &&other) - : Builder(other.Builder), Parent(other.Parent), Begin(other.Begin), - Finished(other.Finished), Frozen(other.Frozen) { - other.Finished = false; - } - AggregateBuilderBase &operator=(AggregateBuilderBase &&other) = delete; - - /// Abandon this builder completely. - void abandon() { - markFinished(); - auto &buffer = Builder.Buffer; - buffer.erase(buffer.begin() + Begin, buffer.end()); - } - - /// Add a new value to this initializer. - void add(llvm::Constant *value) { - assert(value && "adding null value to constant initializer"); - assert(!Finished && "cannot add more values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - Builder.Buffer.push_back(value); - } - - /// Add an integer value of type size_t. - void addSize(CharUnits size) { - add(Builder.CGM.getSize(size)); - } - - /// Add an integer value of a specific type. - void addInt(llvm::IntegerType *intTy, uint64_t value, - bool isSigned = false) { - add(llvm::ConstantInt::get(intTy, value, isSigned)); - } - - /// Add a null pointer of a specific type. - void addNullPointer(llvm::PointerType *ptrTy) { - add(llvm::ConstantPointerNull::get(ptrTy)); - } - - /// Add a bitcast of a value to a specific type. - void addBitCast(llvm::Constant *value, llvm::Type *type) { - add(llvm::ConstantExpr::getBitCast(value, type)); - } - - /// Add a bunch of new values to this initializer. - void addAll(ArrayRef<llvm::Constant *> values) { - assert(!Finished && "cannot add more values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - Builder.Buffer.append(values.begin(), values.end()); - } - - /// An opaque class to hold the abstract position of a placeholder. - class PlaceholderPosition { - size_t Index; - friend class AggregateBuilderBase; - PlaceholderPosition(size_t index) : Index(index) {} - }; - - /// Add a placeholder value to the structure. The returned position - /// can be used to set the value later; it will not be invalidated by - /// any intermediate operations except (1) filling the same position or - /// (2) finishing the entire builder. - /// - /// This is useful for emitting certain kinds of structure which - /// contain some sort of summary field, generaly a count, before any - /// of the data. By emitting a placeholder first, the structure can - /// be emitted eagerly. - PlaceholderPosition addPlaceholder() { - assert(!Finished && "cannot add more values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - Builder.Buffer.push_back(nullptr); - return Builder.Buffer.size() - 1; - } - - /// Fill a previously-added placeholder. - void fillPlaceholderWithInt(PlaceholderPosition position, - llvm::IntegerType *type, uint64_t value, - bool isSigned = false) { - fillPlaceholder(position, llvm::ConstantInt::get(type, value, isSigned)); - } - - /// Fill a previously-added placeholder. - void fillPlaceholder(PlaceholderPosition position, llvm::Constant *value) { - assert(!Finished && "cannot change values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - llvm::Constant *&slot = Builder.Buffer[position.Index]; - assert(slot == nullptr && "placeholder already filled"); - slot = value; - } - - /// Produce an address which will eventually point to the the next - /// position to be filled. This is computed with an indexed - /// getelementptr rather than by computing offsets. - /// - /// The returned pointer will have type T*, where T is the given - /// position. - llvm::Constant *getAddrOfCurrentPosition(llvm::Type *type) { - // Make a global variable. We will replace this with a GEP to this - // position after installing the initializer. - auto dummy = - new llvm::GlobalVariable(Builder.CGM.getModule(), type, true, - llvm::GlobalVariable::PrivateLinkage, - nullptr, ""); - Builder.SelfReferences.emplace_back(dummy); - auto &entry = Builder.SelfReferences.back(); - (void) getGEPIndicesToCurrentPosition(entry.Indices); - return dummy; - } - - ArrayRef<llvm::Constant*> getGEPIndicesToCurrentPosition( - llvm::SmallVectorImpl<llvm::Constant*> &indices) { - getGEPIndicesTo(indices, Builder.Buffer.size()); - return indices; - } - - ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr); - ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr); - - private: - void getGEPIndicesTo(llvm::SmallVectorImpl<llvm::Constant*> &indices, - size_t position) const { - // Recurse on the parent builder if present. - if (Parent) { - Parent->getGEPIndicesTo(indices, Begin); - - // Otherwise, add an index to drill into the first level of pointer. - } else { - assert(indices.empty()); - indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0)); - } - - assert(position >= Begin); - // We have to use i32 here because struct GEPs demand i32 indices. - // It's rather unlikely to matter in practice. - indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, - position - Begin)); - } - }; - - template <class Impl> - class AggregateBuilder : public AggregateBuilderBase { - protected: - AggregateBuilder(ConstantInitBuilder &builder, - AggregateBuilderBase *parent) - : AggregateBuilderBase(builder, parent) {} - - Impl &asImpl() { return *static_cast<Impl*>(this); } - - public: - /// Given that this builder was created by beginning an array or struct - /// component on the given parent builder, finish the array/struct - /// component and add it to the parent. - /// - /// It is an intentional choice that the parent is passed in explicitly - /// despite it being redundant with information already kept in the - /// builder. This aids in readability by making it easier to find the - /// places that add components to a builder, as well as "bookending" - /// the sub-builder more explicitly. - void finishAndAddTo(AggregateBuilderBase &parent) { - assert(Parent == &parent && "adding to non-parent builder"); - parent.add(asImpl().finishImpl()); - } - - /// Given that this builder was created by beginning an array or struct - /// directly on a ConstantInitBuilder, finish the array/struct and - /// create a global variable with it as the initializer. - template <class... As> - llvm::GlobalVariable *finishAndCreateGlobal(As &&...args) { - assert(!Parent && "finishing non-root builder"); - return Builder.createGlobal(asImpl().finishImpl(), - std::forward<As>(args)...); - } - - /// Given that this builder was created by beginning an array or struct - /// directly on a ConstantInitBuilder, finish the array/struct and - /// set it as the initializer of the given global variable. - void finishAndSetAsInitializer(llvm::GlobalVariable *global) { - assert(!Parent && "finishing non-root builder"); - return Builder.setGlobalInitializer(global, asImpl().finishImpl()); - } - }; - - ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr); - - ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr); - -private: - llvm::GlobalVariable *createGlobal(llvm::Constant *initializer, - const llvm::Twine &name, - CharUnits alignment, - bool constant = false, - llvm::GlobalValue::LinkageTypes linkage - = llvm::GlobalValue::InternalLinkage, - unsigned addressSpace = 0) { - auto GV = new llvm::GlobalVariable(CGM.getModule(), - initializer->getType(), - constant, - linkage, - initializer, - name, - /*insert before*/ nullptr, - llvm::GlobalValue::NotThreadLocal, - addressSpace); - GV->setAlignment(alignment.getQuantity()); - resolveSelfReferences(GV); - return GV; - } - - void setGlobalInitializer(llvm::GlobalVariable *GV, - llvm::Constant *initializer) { - GV->setInitializer(initializer); - resolveSelfReferences(GV); - } - - void resolveSelfReferences(llvm::GlobalVariable *GV) { - for (auto &entry : SelfReferences) { - llvm::Constant *resolvedReference = - llvm::ConstantExpr::getInBoundsGetElementPtr( - GV->getValueType(), GV, entry.Indices); - entry.Dummy->replaceAllUsesWith(resolvedReference); - entry.Dummy->eraseFromParent(); - } - } -}; - -/// A helper class of ConstantInitBuilder, used for building constant -/// array initializers. -class ConstantArrayBuilder - : public ConstantInitBuilder::AggregateBuilder<ConstantArrayBuilder> { - llvm::Type *EltTy; - friend class ConstantInitBuilder; - template <class Impl> friend class ConstantInitBuilder::AggregateBuilder; - ConstantArrayBuilder(ConstantInitBuilder &builder, - AggregateBuilderBase *parent, llvm::Type *eltTy) - : AggregateBuilder(builder, parent), EltTy(eltTy) {} -public: - size_t size() const { - assert(!Finished); - assert(!Frozen); - assert(Begin <= getBuffer().size()); - return getBuffer().size() - Begin; - } - - bool empty() const { - return size() == 0; - } - -private: - /// Form an array constant from the values that have been added to this - /// builder. - llvm::Constant *finishImpl() { - markFinished(); - - auto &buffer = getBuffer(); - assert((Begin < buffer.size() || - (Begin == buffer.size() && EltTy)) - && "didn't add any array elements without element type"); - auto elts = llvm::makeArrayRef(buffer).slice(Begin); - auto eltTy = EltTy ? EltTy : elts[0]->getType(); - auto type = llvm::ArrayType::get(eltTy, elts.size()); - auto constant = llvm::ConstantArray::get(type, elts); - buffer.erase(buffer.begin() + Begin, buffer.end()); - return constant; - } -}; - -inline ConstantArrayBuilder -ConstantInitBuilder::beginArray(llvm::Type *eltTy) { - return ConstantArrayBuilder(*this, nullptr, eltTy); -} - -inline ConstantArrayBuilder -ConstantInitBuilder::AggregateBuilderBase::beginArray(llvm::Type *eltTy) { - return ConstantArrayBuilder(Builder, this, eltTy); -} - -/// A helper class of ConstantInitBuilder, used for building constant -/// struct initializers. -class ConstantStructBuilder - : public ConstantInitBuilder::AggregateBuilder<ConstantStructBuilder> { - llvm::StructType *Ty; - friend class ConstantInitBuilder; - template <class Impl> friend class ConstantInitBuilder::AggregateBuilder; - ConstantStructBuilder(ConstantInitBuilder &builder, - AggregateBuilderBase *parent, llvm::StructType *ty) - : AggregateBuilder(builder, parent), Ty(ty) {} - - /// Finish the struct. - llvm::Constant *finishImpl() { - markFinished(); - - auto &buffer = getBuffer(); - assert(Begin < buffer.size() && "didn't add any struct elements?"); - auto elts = llvm::makeArrayRef(buffer).slice(Begin); - - llvm::Constant *constant; - if (Ty) { - constant = llvm::ConstantStruct::get(Ty, elts); - } else { - constant = llvm::ConstantStruct::getAnon(elts, /*packed*/ false); - } - - buffer.erase(buffer.begin() + Begin, buffer.end()); - return constant; - } -}; - -inline ConstantStructBuilder -ConstantInitBuilder::beginStruct(llvm::StructType *structTy) { - return ConstantStructBuilder(*this, nullptr, structTy); -} - -inline ConstantStructBuilder -ConstantInitBuilder::AggregateBuilderBase::beginStruct( - llvm::StructType *structTy) { - return ConstantStructBuilder(Builder, this, structTy); -} - -} // end namespace CodeGen -} // end namespace clang - -#endif diff --git a/lib/CodeGen/ConstantInitBuilder.cpp b/lib/CodeGen/ConstantInitBuilder.cpp new file mode 100644 index 0000000000000..7f8d809850329 --- /dev/null +++ b/lib/CodeGen/ConstantInitBuilder.cpp @@ -0,0 +1,280 @@ +//===--- ConstantInitBuilder.cpp - Global initializer builder -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines out-of-line routines for building initializers for +// global variables, in particular the kind of globals that are implicitly +// introduced by various language ABIs. +// +//===----------------------------------------------------------------------===// + +#include "clang/CodeGen/ConstantInitBuilder.h" +#include "CodeGenModule.h" + +using namespace clang; +using namespace CodeGen; + +llvm::Type *ConstantInitFuture::getType() const { + assert(Data && "dereferencing null future"); + if (Data.is<llvm::Constant*>()) { + return Data.get<llvm::Constant*>()->getType(); + } else { + return Data.get<ConstantInitBuilderBase*>()->Buffer[0]->getType(); + } +} + +void ConstantInitFuture::abandon() { + assert(Data && "abandoning null future"); + if (auto builder = Data.dyn_cast<ConstantInitBuilderBase*>()) { + builder->abandon(0); + } + Data = nullptr; +} + +void ConstantInitFuture::installInGlobal(llvm::GlobalVariable *GV) { + assert(Data && "installing null future"); + if (Data.is<llvm::Constant*>()) { + GV->setInitializer(Data.get<llvm::Constant*>()); + } else { + auto &builder = *Data.get<ConstantInitBuilderBase*>(); + assert(builder.Buffer.size() == 1); + builder.setGlobalInitializer(GV, builder.Buffer[0]); + builder.Buffer.clear(); + Data = nullptr; + } +} + +ConstantInitFuture +ConstantInitBuilderBase::createFuture(llvm::Constant *initializer) { + assert(Buffer.empty() && "buffer not current empty"); + Buffer.push_back(initializer); + return ConstantInitFuture(this); +} + +// Only used in this file. +inline ConstantInitFuture::ConstantInitFuture(ConstantInitBuilderBase *builder) + : Data(builder) { + assert(!builder->Frozen); + assert(builder->Buffer.size() == 1); + assert(builder->Buffer[0] != nullptr); +} + +llvm::GlobalVariable * +ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer, + const llvm::Twine &name, + CharUnits alignment, + bool constant, + llvm::GlobalValue::LinkageTypes linkage, + unsigned addressSpace) { + auto GV = new llvm::GlobalVariable(CGM.getModule(), + initializer->getType(), + constant, + linkage, + initializer, + name, + /*insert before*/ nullptr, + llvm::GlobalValue::NotThreadLocal, + addressSpace); + GV->setAlignment(alignment.getQuantity()); + resolveSelfReferences(GV); + return GV; +} + +void ConstantInitBuilderBase::setGlobalInitializer(llvm::GlobalVariable *GV, + llvm::Constant *initializer){ + GV->setInitializer(initializer); + + if (!SelfReferences.empty()) + resolveSelfReferences(GV); +} + +void ConstantInitBuilderBase::resolveSelfReferences(llvm::GlobalVariable *GV) { + for (auto &entry : SelfReferences) { + llvm::Constant *resolvedReference = + llvm::ConstantExpr::getInBoundsGetElementPtr( + GV->getValueType(), GV, entry.Indices); + auto dummy = entry.Dummy; + dummy->replaceAllUsesWith(resolvedReference); + dummy->eraseFromParent(); + } + SelfReferences.clear(); +} + +void ConstantInitBuilderBase::abandon(size_t newEnd) { + // Remove all the entries we've added. + Buffer.erase(Buffer.begin() + newEnd, Buffer.end()); + + // If we're abandoning all the way to the beginning, destroy + // all the self-references, because we might not get another + // opportunity. + if (newEnd == 0) { + for (auto &entry : SelfReferences) { + auto dummy = entry.Dummy; + dummy->replaceAllUsesWith(llvm::UndefValue::get(dummy->getType())); + dummy->eraseFromParent(); + } + SelfReferences.clear(); + } +} + +void ConstantAggregateBuilderBase::addSize(CharUnits size) { + add(Builder.CGM.getSize(size)); +} + +llvm::Constant * +ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType, + llvm::Constant *target) { + // Compute the address of the relative-address slot. + auto base = getAddrOfCurrentPosition(offsetType); + + // Subtract. + base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy); + target = llvm::ConstantExpr::getPtrToInt(target, Builder.CGM.IntPtrTy); + llvm::Constant *offset = llvm::ConstantExpr::getSub(target, base); + + // Truncate to the relative-address type if necessary. + if (Builder.CGM.IntPtrTy != offsetType) { + offset = llvm::ConstantExpr::getTrunc(offset, offsetType); + } + + return offset; +} + +llvm::Constant * +ConstantAggregateBuilderBase::getAddrOfCurrentPosition(llvm::Type *type) { + // Make a global variable. We will replace this with a GEP to this + // position after installing the initializer. + auto dummy = + new llvm::GlobalVariable(Builder.CGM.getModule(), type, true, + llvm::GlobalVariable::PrivateLinkage, + nullptr, ""); + Builder.SelfReferences.emplace_back(dummy); + auto &entry = Builder.SelfReferences.back(); + (void) getGEPIndicesToCurrentPosition(entry.Indices); + return dummy; +} + +void ConstantAggregateBuilderBase::getGEPIndicesTo( + llvm::SmallVectorImpl<llvm::Constant*> &indices, + size_t position) const { + // Recurse on the parent builder if present. + if (Parent) { + Parent->getGEPIndicesTo(indices, Begin); + + // Otherwise, add an index to drill into the first level of pointer. + } else { + assert(indices.empty()); + indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0)); + } + + assert(position >= Begin); + // We have to use i32 here because struct GEPs demand i32 indices. + // It's rather unlikely to matter in practice. + indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, + position - Begin)); +} + +ConstantAggregateBuilderBase::PlaceholderPosition +ConstantAggregateBuilderBase::addPlaceholderWithSize(llvm::Type *type) { + // Bring the offset up to the last field. + CharUnits offset = getNextOffsetFromGlobal(); + + // Create the placeholder. + auto position = addPlaceholder(); + + // Advance the offset past that field. + auto &layout = Builder.CGM.getDataLayout(); + if (!Packed) + offset = offset.alignTo(CharUnits::fromQuantity( + layout.getABITypeAlignment(type))); + offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type)); + + CachedOffsetEnd = Builder.Buffer.size(); + CachedOffsetFromGlobal = offset; + + return position; +} + +CharUnits ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const{ + size_t cacheEnd = CachedOffsetEnd; + assert(cacheEnd <= end); + + // Fast path: if the cache is valid, just use it. + if (cacheEnd == end) { + return CachedOffsetFromGlobal; + } + + // If the cached range ends before the index at which the current + // aggregate starts, recurse for the parent. + CharUnits offset; + if (cacheEnd < Begin) { + assert(cacheEnd == 0); + assert(Parent && "Begin != 0 for root builder"); + cacheEnd = Begin; + offset = Parent->getOffsetFromGlobalTo(Begin); + } else { + offset = CachedOffsetFromGlobal; + } + + // Perform simple layout on the elements in cacheEnd..<end. + if (cacheEnd != end) { + auto &layout = Builder.CGM.getDataLayout(); + do { + llvm::Constant *element = Builder.Buffer[cacheEnd]; + assert(element != nullptr && + "cannot compute offset when a placeholder is present"); + llvm::Type *elementType = element->getType(); + if (!Packed) + offset = offset.alignTo(CharUnits::fromQuantity( + layout.getABITypeAlignment(elementType))); + offset += CharUnits::fromQuantity(layout.getTypeStoreSize(elementType)); + } while (++cacheEnd != end); + } + + // Cache and return. + CachedOffsetEnd = cacheEnd; + CachedOffsetFromGlobal = offset; + return offset; +} + +llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy) { + markFinished(); + + auto &buffer = getBuffer(); + assert((Begin < buffer.size() || + (Begin == buffer.size() && eltTy)) + && "didn't add any array elements without element type"); + auto elts = llvm::makeArrayRef(buffer).slice(Begin); + if (!eltTy) eltTy = elts[0]->getType(); + auto type = llvm::ArrayType::get(eltTy, elts.size()); + auto constant = llvm::ConstantArray::get(type, elts); + buffer.erase(buffer.begin() + Begin, buffer.end()); + return constant; +} + +llvm::Constant * +ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) { + markFinished(); + + auto &buffer = getBuffer(); + auto elts = llvm::makeArrayRef(buffer).slice(Begin); + + if (ty == nullptr && elts.empty()) + ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed); + + llvm::Constant *constant; + if (ty) { + assert(ty->isPacked() == Packed); + constant = llvm::ConstantStruct::get(ty, elts); + } else { + constant = llvm::ConstantStruct::getAnon(elts, Packed); + } + + buffer.erase(buffer.begin() + Begin, buffer.end()); + return constant; +} diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 5bc9e5011aa89..a1023473bdd33 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -961,12 +961,10 @@ struct CounterCoverageMappingBuilder } }; -bool isMachO(const CodeGenModule &CGM) { - return CGM.getTarget().getTriple().isOSBinFormatMachO(); -} - -StringRef getCoverageSection(const CodeGenModule &CGM) { - return llvm::getInstrProfCoverageSectionName(isMachO(CGM)); +std::string getCoverageSection(const CodeGenModule &CGM) { + return llvm::getInstrProfSectionName( + llvm::IPSK_covmap, + CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); } std::string normalizeFilename(StringRef Filename) { diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h index 2435830385583..c7bdeac58a1a4 100644 --- a/lib/CodeGen/EHScopeStack.h +++ b/lib/CodeGen/EHScopeStack.h @@ -202,7 +202,7 @@ public: template <std::size_t... Is> T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) { // It's important that the restores are emitted in order. The braced init - // list guarentees that. + // list guarantees that. return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...}; } diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index f7a8dd66c527c..dac2d15fa4061 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -24,8 +24,8 @@ #include "CGVTables.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Mangle.h" #include "clang/AST/Type.h" #include "clang/AST/StmtCXX.h" @@ -207,8 +207,9 @@ public: void EmitCXXConstructors(const CXXConstructorDecl *D) override; - void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl<CanQualType> &ArgTys) override; + AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl<CanQualType> &ArgTys) override; bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, CXXDtorType DT) const override { @@ -225,11 +226,10 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - unsigned addImplicitConstructorArgs(CodeGenFunction &CGF, - const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, - CallArgList &Args) override; + AddedStructorArgs + addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, + CXXCtorType Type, bool ForVirtualBase, + bool Delegating, CallArgList &Args) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -1134,8 +1134,8 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { // Mark the function as nounwind readonly. llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind, llvm::Attribute::ReadOnly }; - llvm::AttributeSet Attrs = llvm::AttributeSet::get( - CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs); + llvm::AttributeList Attrs = llvm::AttributeList::get( + CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs); } @@ -1353,7 +1353,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { } } -void +CGCXXABI::AddedStructorArgs ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, SmallVectorImpl<CanQualType> &ArgTys) { ASTContext &Context = getContext(); @@ -1362,9 +1362,12 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, // These are Clang types, so we don't need to worry about sret yet. // Check if we need to add a VTT parameter (which has type void **). - if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) + if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) { ArgTys.insert(ArgTys.begin() + 1, Context.getPointerType(Context.VoidPtrTy)); + return AddedStructorArgs::prefix(1); + } + return AddedStructorArgs{}; } void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { @@ -1429,11 +1432,11 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue); } -unsigned ItaniumCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) { if (!NeedsVTTParameter(GlobalDecl(D, Type))) - return 0; + return AddedStructorArgs{}; // Insert the implicit 'vtt' argument as the second argument. llvm::Value *VTT = @@ -1441,7 +1444,7 @@ unsigned ItaniumCXXABI::addImplicitConstructorArgs( QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false)); - return 1; // Added one arg. + return AddedStructorArgs::prefix(1); // Added one arg. } void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1907,10 +1910,11 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy), GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_acquire", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_acquire", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, @@ -1918,10 +1922,11 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, // void __cxa_guard_release(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_release", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_release", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, @@ -1929,10 +1934,11 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, // void __cxa_guard_abort(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_abort", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_abort", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } namespace { @@ -2015,10 +2021,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // The ABI says: "It is suggested that it be emitted in the same COMDAT // group as the associated data object." In practice, this doesn't work for - // non-ELF object formats, so only do it for ELF. + // non-ELF and non-Wasm object formats, so only do it for ELF and Wasm. llvm::Comdat *C = var->getComdat(); if (!D.isLocalVarDecl() && C && - CGM.getTarget().getTriple().isOSBinFormatELF()) { + (CGM.getTarget().getTriple().isOSBinFormatELF() || + CGM.getTarget().getTriple().isOSBinFormatWasm())) { guard->setComdat(C); // An inline variable's guard function is run from the per-TU // initialization function, not via a dedicated global ctor function, so @@ -2161,7 +2168,9 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // Create a variable that binds the atexit to this shared object. llvm::Constant *handle = - CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts()); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); llvm::Value *args[] = { llvm::ConstantExpr::getBitCast(dtor, dtorTy), @@ -2634,7 +2643,6 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: return false; @@ -2814,7 +2822,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { llvm_unreachable("References shouldn't get here"); case Type::Auto: - llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: llvm_unreachable("Pipe types shouldn't get here"); @@ -3044,7 +3053,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, llvm_unreachable("References shouldn't get here"); case Type::Auto: - llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: llvm_unreachable("Pipe type shouldn't get here"); @@ -3534,8 +3544,9 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM, return StructorCodegen::RAUW; if (llvm::GlobalValue::isWeakForLinker(Linkage)) { - // Only ELF supports COMDATs with arbitrary names (C5/D5). - if (CGM.getTarget().getTriple().isOSBinFormatELF()) + // Only ELF and wasm support COMDATs with arbitrary names (C5/D5). + if (CGM.getTarget().getTriple().isOSBinFormatELF() || + CGM.getTarget().getTriple().isOSBinFormatWasm()) return StructorCodegen::COMDAT; return StructorCodegen::Emit; } @@ -3919,9 +3930,8 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); - llvm::Constant *fnRef = - CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate", - llvm::AttributeSet(), /*Local=*/true); + llvm::Constant *fnRef = CGM.CreateRuntimeFunction( + fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); llvm::Function *fn = dyn_cast<llvm::Function>(fnRef); if (fn && fn->empty()) { diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp new file mode 100644 index 0000000000000..acea5c1143cf8 --- /dev/null +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -0,0 +1,207 @@ +//===--- MacroPPCallbacks.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains implementation for the macro preprocessors callbacks. +// +//===----------------------------------------------------------------------===// + +#include "MacroPPCallbacks.h" +#include "CGDebugInfo.h" +#include "clang/CodeGen/ModuleBuilder.h" +#include "clang/Parse/Parser.h" + +using namespace clang; + +void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II, + const MacroInfo &MI, + Preprocessor &PP, raw_ostream &Name, + raw_ostream &Value) { + Name << II.getName(); + + if (MI.isFunctionLike()) { + Name << '('; + if (!MI.arg_empty()) { + MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end(); + for (; AI + 1 != E; ++AI) { + Name << (*AI)->getName(); + Name << ','; + } + + // Last argument. + if ((*AI)->getName() == "__VA_ARGS__") + Name << "..."; + else + Name << (*AI)->getName(); + } + + if (MI.isGNUVarargs()) + // #define foo(x...) + Name << "..."; + + Name << ')'; + } + + SmallString<128> SpellingBuffer; + bool First = true; + for (const auto &T : MI.tokens()) { + if (!First && T.hasLeadingSpace()) + Value << ' '; + + Value << PP.getSpelling(T, SpellingBuffer); + First = false; + } +} + +MacroPPCallbacks::MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP) + : Gen(Gen), PP(PP), Status(NoScope) {} + +// This is the expected flow of enter/exit compiler and user files: +// - Main File Enter +// - <built-in> file enter +// {Compiler macro definitions} - (Line=0, no scope) +// - (Optional) <command line> file enter +// {Command line macro definitions} - (Line=0, no scope) +// - (Optional) <command line> file exit +// {Command line file includes} - (Line=0, Main file scope) +// {macro definitions and file includes} - (Line!=0, Parent scope) +// - <built-in> file exit +// {User code macro definitions and file includes} - (Line!=0, Parent scope) + +llvm::DIMacroFile *MacroPPCallbacks::getCurrentScope() { + if (Status == MainFileScope || Status == CommandLineIncludeScope) + return Scopes.back(); + return nullptr; +} + +SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) { + if (Status == MainFileScope || EnteredCommandLineIncludeFiles) + return Loc; + + // While parsing skipped files, location of macros is invalid. + // Invalid location represents line zero. + return SourceLocation(); +} + +static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) { + StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); + return Filename.equals("<built-in>"); +} + +static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) { + StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); + return Filename.equals("<command line>"); +} + +void MacroPPCallbacks::updateStatusToNextScope() { + switch (Status) { + case NoScope: + Status = InitializedScope; + break; + case InitializedScope: + Status = BuiltinScope; + break; + case BuiltinScope: + Status = CommandLineIncludeScope; + break; + case CommandLineIncludeScope: + Status = MainFileScope; + break; + case MainFileScope: + llvm_unreachable("There is no next scope, already in the final scope"); + } +} + +void MacroPPCallbacks::FileEntered(SourceLocation Loc) { + SourceLocation LineLoc = getCorrectLocation(LastHashLoc); + switch (Status) { + case NoScope: + updateStatusToNextScope(); + break; + case InitializedScope: + updateStatusToNextScope(); + return; + case BuiltinScope: + if (isCommandLineFile(PP.getSourceManager(), Loc)) + return; + updateStatusToNextScope(); + LLVM_FALLTHROUGH; + case CommandLineIncludeScope: + EnteredCommandLineIncludeFiles++; + break; + case MainFileScope: + break; + } + + Scopes.push_back(Gen->getCGDebugInfo()->CreateTempMacroFile(getCurrentScope(), + LineLoc, Loc)); +} + +void MacroPPCallbacks::FileExited(SourceLocation Loc) { + switch (Status) { + default: + llvm_unreachable("Do not expect to exit a file from current scope"); + case BuiltinScope: + if (!isBuiltinFile(PP.getSourceManager(), Loc)) + // Skip next scope and change status to MainFileScope. + Status = MainFileScope; + return; + case CommandLineIncludeScope: + if (!EnteredCommandLineIncludeFiles) { + updateStatusToNextScope(); + return; + } + EnteredCommandLineIncludeFiles--; + break; + case MainFileScope: + break; + } + + Scopes.pop_back(); +} + +void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) { + // Only care about enter file or exit file changes. + if (Reason == EnterFile) + FileEntered(Loc); + else if (Reason == ExitFile) + FileExited(Loc); +} + +void MacroPPCallbacks::InclusionDirective( + SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, const Module *Imported) { + + // Record the line location of the current included file. + LastHashLoc = HashLoc; +} + +void MacroPPCallbacks::MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) { + IdentifierInfo *Id = MacroNameTok.getIdentifierInfo(); + SourceLocation location = getCorrectLocation(MacroNameTok.getLocation()); + std::string NameBuffer, ValueBuffer; + llvm::raw_string_ostream Name(NameBuffer); + llvm::raw_string_ostream Value(ValueBuffer); + writeMacroDefinition(*Id, *MD->getMacroInfo(), PP, Name, Value); + Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(), + llvm::dwarf::DW_MACINFO_define, location, + Name.str(), Value.str()); +} + +void MacroPPCallbacks::MacroUndefined(const Token &MacroNameTok, + const MacroDefinition &MD) { + IdentifierInfo *Id = MacroNameTok.getIdentifierInfo(); + SourceLocation location = getCorrectLocation(MacroNameTok.getLocation()); + Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(), + llvm::dwarf::DW_MACINFO_undef, location, + Id->getName(), ""); +} diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h new file mode 100644 index 0000000000000..06217f9c5883c --- /dev/null +++ b/lib/CodeGen/MacroPPCallbacks.h @@ -0,0 +1,117 @@ +//===--- MacroPPCallbacks.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines implementation for the macro preprocessors callbacks. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/PPCallbacks.h" + +namespace llvm { +class DIMacroFile; +class DIMacroNode; +} +namespace clang { +class Preprocessor; +class MacroInfo; +class CodeGenerator; + +class MacroPPCallbacks : public PPCallbacks { + /// A pointer to code generator, where debug info generator can be found. + CodeGenerator *Gen; + + /// Preprocessor. + Preprocessor &PP; + + /// Location of recent included file, used for line number. + SourceLocation LastHashLoc; + + /// Counts current number of command line included files, which were entered + /// and were not exited yet. + int EnteredCommandLineIncludeFiles = 0; + + enum FileScopeStatus { + NoScope = 0, // Scope is not initialized yet. + InitializedScope, // Main file scope is initialized but not set yet. + BuiltinScope, // <built-in> and <command line> file scopes. + CommandLineIncludeScope, // Included file, from <command line> file, scope. + MainFileScope // Main file scope. + }; + FileScopeStatus Status; + + /// Parent contains all entered files that were not exited yet according to + /// the inclusion order. + llvm::SmallVector<llvm::DIMacroFile *, 4> Scopes; + + /// Get current DIMacroFile scope. + /// \return current DIMacroFile scope or nullptr if there is no such scope. + llvm::DIMacroFile *getCurrentScope(); + + /// Get current line location or invalid location. + /// \param Loc current line location. + /// \return current line location \p `Loc`, or invalid location if it's in a + /// skipped file scope. + SourceLocation getCorrectLocation(SourceLocation Loc); + + /// Use the passed preprocessor to write the macro name and value from the + /// given macro info and identifier info into the given \p `Name` and \p + /// `Value` output streams. + /// + /// \param II Identifier info, used to get the Macro name. + /// \param MI Macro info, used to get the Macro argumets and values. + /// \param PP Preprocessor. + /// \param [out] Name Place holder for returned macro name and arguments. + /// \param [out] Value Place holder for returned macro value. + static void writeMacroDefinition(const IdentifierInfo &II, + const MacroInfo &MI, Preprocessor &PP, + raw_ostream &Name, raw_ostream &Value); + + /// Update current file scope status to next file scope. + void updateStatusToNextScope(); + + /// Handle the case when entering a file. + /// + /// \param Loc Indicates the new location. + void FileEntered(SourceLocation Loc); + + /// Handle the case when exiting a file. + /// + /// \param Loc Indicates the new location. + void FileExited(SourceLocation Loc); + +public: + MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP); + + /// Callback invoked whenever a source file is entered or exited. + /// + /// \param Loc Indicates the new location. + /// \param PrevFID the file that was exited if \p Reason is ExitFile. + void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID = FileID()) override; + + /// Callback invoked whenever a directive (#xxx) is processed. + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, + const Module *Imported) override; + + /// Hook called whenever a macro definition is seen. + void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) override; + + /// Hook called whenever a macro \#undef is seen. + /// + /// MD is released immediately following this callback. + void MacroUndefined(const Token &MacroNameTok, + const MacroDefinition &MD) override; +}; + +} // end namespace clang diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 38df455011e3a..4cacf494e6941 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -19,8 +19,8 @@ #include "CGVTables.h" #include "CodeGenModule.h" #include "CodeGenTypes.h" -#include "ConstantBuilder.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" @@ -206,8 +206,9 @@ public: // lacks a definition for the destructor, non-base destructors must always // delegate to or alias the base destructor. - void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl<CanQualType> &ArgTys) override; + AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl<CanQualType> &ArgTys) override; /// Non-base dtors should be emitted as delegating thunks in this ABI. bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, @@ -248,11 +249,10 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - unsigned addImplicitConstructorArgs(CodeGenFunction &CGF, - const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, - CallArgList &Args) override; + AddedStructorArgs + addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, + CXXCtorType Type, bool ForVirtualBase, + bool Delegating, CallArgList &Args) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -1261,17 +1261,19 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, } } -void +CGCXXABI::AddedStructorArgs MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, SmallVectorImpl<CanQualType> &ArgTys) { + AddedStructorArgs Added; // TODO: 'for base' flag if (T == StructorType::Deleting) { // The scalar deleting destructor takes an implicit int parameter. ArgTys.push_back(getContext().IntTy); + ++Added.Suffix; } auto *CD = dyn_cast<CXXConstructorDecl>(MD); if (!CD) - return; + return Added; // All parameters are already in place except is_most_derived, which goes // after 'this' if it's variadic and last if it's not. @@ -1279,11 +1281,16 @@ MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, const CXXRecordDecl *Class = CD->getParent(); const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>(); if (Class->getNumVBases()) { - if (FPT->isVariadic()) + if (FPT->isVariadic()) { ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy); - else + ++Added.Prefix; + } else { ArgTys.push_back(getContext().IntTy); + ++Added.Suffix; + } } + + return Added; } void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { @@ -1493,14 +1500,14 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { } } -unsigned MicrosoftCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) { assert(Type == Ctor_Complete || Type == Ctor_Base); // Check if we need a 'most_derived' parameter. if (!D->getParent()->getNumVBases()) - return 0; + return AddedStructorArgs{}; // Add the 'most_derived' argument second if we are variadic or last if not. const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); @@ -1511,13 +1518,13 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs( MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete); } RValue RV = RValue::get(MostDerivedArg); - if (FPT->isVariadic()) + if (FPT->isVariadic()) { Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy, /*needscopy=*/false)); - else - Args.add(RV, getContext().IntTy); - - return 1; // Added one arg. + return AddedStructorArgs::prefix(1); + } + Args.add(RV, getContext().IntTy); + return AddedStructorArgs::suffix(1); } void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1554,7 +1561,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD, llvm::GlobalVariable *VTable) { - if (!CGM.getCodeGenOpts().PrepareForLTO) + if (!CGM.getCodeGenOpts().LTOUnit) return; // The location of the first virtual function pointer in the virtual table, @@ -2203,9 +2210,8 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get( CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false); - llvm::Constant *TLRegDtor = - CGF.CGM.CreateRuntimeFunction(TLRegDtorTy, "__tlregdtor", - llvm::AttributeSet(), /*Local=*/true); + llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction( + TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor)) TLRegDtorFn->setDoesNotThrow(); @@ -2301,9 +2307,9 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_header", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2313,9 +2319,9 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_footer", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2325,9 +2331,9 @@ static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_abort", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -3713,7 +3719,7 @@ CatchTypeInfo MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type, QualType CatchHandlerType) { // TypeDescriptors for exceptions never have qualified pointer types, - // qualifiers are stored seperately in order to support qualification + // qualifiers are stored separately in order to support qualification // conversions. bool IsConst, IsVolatile, IsUnaligned; Type = @@ -3918,16 +3924,16 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0); // Insert any ABI-specific implicit constructor arguments. - unsigned ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete, - /*ForVirtualBase=*/false, - /*Delegating=*/false, Args); - + AddedStructorArgs ExtraArgs = + addImplicitConstructorArgs(CGF, CD, Ctor_Complete, + /*ForVirtualBase=*/false, + /*Delegating=*/false, Args); // Call the destructor with our arguments. llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(CD, StructorType::Complete); CGCallee Callee = CGCallee::forDirect(CalleePtr, CD); const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall( - Args, CD, Ctor_Complete, ExtraArgs); + Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix); CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args); Cleanups.ForceCleanup(); diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index f925c2549175c..89090c8b6a1b5 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -92,6 +92,10 @@ namespace { return M.get(); } + CGDebugInfo *getCGDebugInfo() { + return Builder->getModuleDebugInfo(); + } + llvm::Module *ReleaseModule() { return M.release(); } @@ -299,6 +303,10 @@ llvm::Module *CodeGenerator::ReleaseModule() { return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule(); } +CGDebugInfo *CodeGenerator::getCGDebugInfo() { + return static_cast<CodeGeneratorImpl*>(this)->getCGDebugInfo(); +} + const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) { return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name); } diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 754f9968b67f2..37ecc05aa1eee 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -171,7 +171,8 @@ public: // Prepare CGDebugInfo to emit debug info for a clang module. auto *DI = Builder->getModuleDebugInfo(); StringRef ModuleName = llvm::sys::path::filename(MainFileName); - DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL}); + DI->setPCHDescriptor({ModuleName, "", OutputFileName, + ASTFileSignature{{{~0U, ~0U, ~0U, ~0U, ~1U}}}}); DI->setModuleMap(MMap); } @@ -241,7 +242,11 @@ public: // PCH files don't have a signature field in the control block, // but LLVM detects DWO CUs by looking for a non-zero DWO id. - uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL; + // We use the lower 64 bits for debug info. + uint64_t Signature = + Buffer->Signature + ? (uint64_t)Buffer->Signature[1] << 32 | Buffer->Signature[0] + : ~1ULL; Builder->getModuleDebugInfo()->setDwoId(Signature); // Finalize the Builder. diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index d2fc3888ef292..94c3880ea26ec 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1197,6 +1197,39 @@ static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { return Size == 32 || Size == 64; } +static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, + uint64_t &Size) { + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), Context)) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += Context.getTypeSize(FD->getType()); + } + return true; +} + +static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, + uint64_t &Size) { + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : RD->bases()) { + if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), + Size)) + return false; + } + if (!addFieldSizes(Context, RD, Size)) + return false; + return true; +} + /// Test whether an argument type which is to be passed indirectly (on the /// stack) would have the equivalent layout if it was expanded into separate /// arguments. If so, we prefer to do the latter to avoid inhibiting @@ -1207,8 +1240,9 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { if (!RT) return false; const RecordDecl *RD = RT->getDecl(); + uint64_t Size = 0; if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!IsWin32StructABI ) { + if (!IsWin32StructABI) { // On non-Windows, we have to conservatively match our old bitcode // prototypes in order to be ABI-compatible at the bitcode level. if (!CXXRD->isCLike()) @@ -1217,30 +1251,12 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { // Don't do this for dynamic classes. if (CXXRD->isDynamicClass()) return false; - // Don't do this if there are any non-empty bases. - for (const CXXBaseSpecifier &Base : CXXRD->bases()) { - if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) - return false; - } } - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - // Scalar arguments on the stack get 4 byte alignment on x86. If the - // argument is smaller than 32-bits, expanding the struct will create - // alignment padding. - if (!is32Or64BitBasicType(FD->getType(), getContext())) + if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) + } else { + if (!addFieldSizes(getContext(), RD, Size)) return false; - - Size += getContext().getTypeSize(FD->getType()); } // We can do this if there was no alignment padding. @@ -1885,10 +1901,10 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes( + llvm::AttributeList::FunctionIndex, + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, B)); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -4819,7 +4835,7 @@ public: : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -4901,7 +4917,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { return coerceToIntArray(Ty, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(Ty); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4951,7 +4967,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return coerceToIntArray(RetTy, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(RetTy); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -5433,10 +5449,10 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes( + llvm::AttributeList::FunctionIndex, + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, B)); } }; @@ -6884,6 +6900,31 @@ MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, } //===----------------------------------------------------------------------===// +// AVR ABI Implementation. +//===----------------------------------------------------------------------===// + +namespace { +class AVRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AVRTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + auto *Fn = cast<llvm::Function>(GV); + + if (FD->getAttr<AVRInterruptAttr>()) + Fn->addFnAttr("interrupt"); + + if (FD->getAttr<AVRSignalAttr>()) + Fn->addFnAttr("signal"); + } +}; +} + +//===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute // handling. @@ -7261,9 +7302,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast<llvm::Function>(GV); - if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + if (ReqdWGS || FlatWGS) { + unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; + unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); @@ -8386,6 +8432,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::avr: + return SetCGInfo(new AVRTargetCodeGenInfo(Types)); + case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; |