summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:02:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:02:28 +0000
commit7442d6faa2719e4e7d33a7021c406c5a4facd74d (patch)
treec72b9241553fc9966179aba84f90f17bfa9235c3 /lib/CodeGen
parentb52119637f743680a99710ce5fdb6646da2772af (diff)
Notes
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/ABIInfo.h1
-rw-r--r--lib/CodeGen/BackendUtil.cpp416
-rw-r--r--lib/CodeGen/CGAtomic.cpp2
-rw-r--r--lib/CodeGen/CGBlocks.cpp419
-rw-r--r--lib/CodeGen/CGBuiltin.cpp261
-rw-r--r--lib/CodeGen/CGCUDANV.cpp2
-rw-r--r--lib/CodeGen/CGCXX.cpp2
-rw-r--r--lib/CodeGen/CGCXXABI.h27
-rw-r--r--lib/CodeGen/CGCall.cpp588
-rw-r--r--lib/CodeGen/CGCall.h52
-rw-r--r--lib/CodeGen/CGClass.cpp66
-rw-r--r--lib/CodeGen/CGCleanup.cpp43
-rw-r--r--lib/CodeGen/CGCoroutine.cpp277
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp252
-rw-r--r--lib/CodeGen/CGDebugInfo.h67
-rw-r--r--lib/CodeGen/CGDecl.cpp81
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp2
-rw-r--r--lib/CodeGen/CGException.cpp17
-rw-r--r--lib/CodeGen/CGExpr.cpp273
-rw-r--r--lib/CodeGen/CGExprAgg.cpp9
-rw-r--r--lib/CodeGen/CGExprCXX.cpp50
-rw-r--r--lib/CodeGen/CGExprComplex.cpp16
-rw-r--r--lib/CodeGen/CGExprScalar.cpp217
-rw-r--r--lib/CodeGen/CGGPUBuiltin.cpp (renamed from lib/CodeGen/CGCUDABuiltin.cpp)13
-rw-r--r--lib/CodeGen/CGObjC.cpp119
-rw-r--r--lib/CodeGen/CGObjCGNU.cpp4
-rw-r--r--lib/CodeGen/CGObjCMac.cpp26
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.cpp3
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp306
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h53
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp1710
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h128
-rw-r--r--lib/CodeGen/CGStmt.cpp10
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp298
-rw-r--r--lib/CodeGen/CGVTables.cpp20
-rw-r--r--lib/CodeGen/CMakeLists.txt4
-rw-r--r--lib/CodeGen/CodeGenAction.cpp293
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp81
-rw-r--r--lib/CodeGen/CodeGenFunction.h173
-rw-r--r--lib/CodeGen/CodeGenModule.cpp199
-rw-r--r--lib/CodeGen/CodeGenModule.h63
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp34
-rw-r--r--lib/CodeGen/CodeGenPGO.h3
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp4
-rw-r--r--lib/CodeGen/CodeGenTypes.h7
-rw-r--r--lib/CodeGen/ConstantBuilder.h444
-rw-r--r--lib/CodeGen/ConstantInitBuilder.cpp280
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp10
-rw-r--r--lib/CodeGen/EHScopeStack.h2
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp86
-rw-r--r--lib/CodeGen/MacroPPCallbacks.cpp207
-rw-r--r--lib/CodeGen/MacroPPCallbacks.h117
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp82
-rw-r--r--lib/CodeGen/ModuleBuilder.cpp8
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp9
-rw-r--r--lib/CodeGen/TargetInfo.cpp121
56 files changed, 6030 insertions, 2027 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index ac31dfdaf3e4b..c0be60ef53bc5 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -10,6 +10,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
#define LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
+#include "clang/AST/CharUnits.h"
#include "clang/AST/Type.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Type.h"
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index d2ce6ea48e419..855d6795b9d6c 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -61,6 +61,9 @@ using namespace llvm;
namespace {
+// Default filename used for profile generation.
+static constexpr StringLiteral DefaultProfileGenName = "default_%m.profraw";
+
class EmitAssemblyHelper {
DiagnosticsEngine &Diags;
const HeaderSearchOptions &HSOpts;
@@ -73,7 +76,6 @@ class EmitAssemblyHelper {
std::unique_ptr<raw_pwrite_stream> OS;
-private:
TargetIRAnalysis getTargetIRAnalysis() const {
if (TM)
return TM->getTargetIRAnalysis();
@@ -262,7 +264,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
TLII->disableAllFunctions();
else {
// Disable individual libc/libm calls in TargetLibraryInfo.
- LibFunc::Func F;
+ LibFunc F;
for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs())
if (TLII->getLibFunc(FuncName, F))
TLII->setUnavailable(F);
@@ -292,6 +294,140 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts,
MPM->add(createRewriteSymbolsPass(DL));
}
+static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) {
+ switch (CodeGenOpts.OptimizationLevel) {
+ default:
+ llvm_unreachable("Invalid optimization level!");
+ case 0:
+ return CodeGenOpt::None;
+ case 1:
+ return CodeGenOpt::Less;
+ case 2:
+ return CodeGenOpt::Default; // O2/Os/Oz
+ case 3:
+ return CodeGenOpt::Aggressive;
+ }
+}
+
+static llvm::CodeModel::Model getCodeModel(const CodeGenOptions &CodeGenOpts) {
+ unsigned CodeModel =
+ llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
+ .Case("small", llvm::CodeModel::Small)
+ .Case("kernel", llvm::CodeModel::Kernel)
+ .Case("medium", llvm::CodeModel::Medium)
+ .Case("large", llvm::CodeModel::Large)
+ .Case("default", llvm::CodeModel::Default)
+ .Default(~0u);
+ assert(CodeModel != ~0u && "invalid code model!");
+ return static_cast<llvm::CodeModel::Model>(CodeModel);
+}
+
+static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) {
+ // Keep this synced with the equivalent code in
+ // lib/Frontend/CompilerInvocation.cpp
+ llvm::Optional<llvm::Reloc::Model> RM;
+ RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
+ .Case("static", llvm::Reloc::Static)
+ .Case("pic", llvm::Reloc::PIC_)
+ .Case("ropi", llvm::Reloc::ROPI)
+ .Case("rwpi", llvm::Reloc::RWPI)
+ .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
+ .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
+ assert(RM.hasValue() && "invalid PIC model!");
+ return *RM;
+}
+
+static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) {
+ if (Action == Backend_EmitObj)
+ return TargetMachine::CGFT_ObjectFile;
+ else if (Action == Backend_EmitMCNull)
+ return TargetMachine::CGFT_Null;
+ else {
+ assert(Action == Backend_EmitAssembly && "Invalid action!");
+ return TargetMachine::CGFT_AssemblyFile;
+ }
+}
+
+static void initTargetOptions(llvm::TargetOptions &Options,
+ const CodeGenOptions &CodeGenOpts,
+ const clang::TargetOptions &TargetOpts,
+ const LangOptions &LangOpts,
+ const HeaderSearchOptions &HSOpts) {
+ Options.ThreadModel =
+ llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel)
+ .Case("posix", llvm::ThreadModel::POSIX)
+ .Case("single", llvm::ThreadModel::Single);
+
+ // Set float ABI type.
+ assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" ||
+ CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) &&
+ "Invalid Floating Point ABI!");
+ Options.FloatABIType =
+ llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI)
+ .Case("soft", llvm::FloatABI::Soft)
+ .Case("softfp", llvm::FloatABI::Soft)
+ .Case("hard", llvm::FloatABI::Hard)
+ .Default(llvm::FloatABI::Default);
+
+ // Set FP fusion mode.
+ switch (LangOpts.getDefaultFPContractMode()) {
+ case LangOptions::FPC_Off:
+ Options.AllowFPOpFusion = llvm::FPOpFusion::Strict;
+ break;
+ case LangOptions::FPC_On:
+ Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
+ break;
+ case LangOptions::FPC_Fast:
+ Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
+ break;
+ }
+
+ Options.UseInitArray = CodeGenOpts.UseInitArray;
+ Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
+ Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
+ Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
+
+ // Set EABI version.
+ Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion)
+ .Case("4", llvm::EABI::EABI4)
+ .Case("5", llvm::EABI::EABI5)
+ .Case("gnu", llvm::EABI::GNU)
+ .Default(llvm::EABI::Default);
+
+ if (LangOpts.SjLjExceptions)
+ Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
+
+ Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
+ Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
+ Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
+ Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
+ Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
+ Options.FunctionSections = CodeGenOpts.FunctionSections;
+ Options.DataSections = CodeGenOpts.DataSections;
+ Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
+ Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
+ Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
+
+ Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
+ Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
+ Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
+ Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
+ Options.MCOptions.MCIncrementalLinkerCompatible =
+ CodeGenOpts.IncrementalLinkerCompatible;
+ Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations;
+ Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
+ Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
+ Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments;
+ Options.MCOptions.ABIName = TargetOpts.ABI;
+ for (const auto &Entry : HSOpts.UserEntries)
+ if (!Entry.IsFramework &&
+ (Entry.Group == frontend::IncludeDirGroup::Quoted ||
+ Entry.Group == frontend::IncludeDirGroup::Angled ||
+ Entry.Group == frontend::IncludeDirGroup::System))
+ Options.MCOptions.IASSearchPaths.push_back(
+ Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
+}
+
void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
legacy::FunctionPassManager &FPM) {
// Handle disabling of all LLVM passes, where we want to preserve the
@@ -316,8 +452,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
!CodeGenOpts.DisableLifetimeMarkers);
PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
} else {
+ // We do not want to inline hot callsites for SamplePGO module-summary build
+ // because profile annotation will happen again in ThinLTO backend, and we
+ // want the IR of the hot path to match the profile.
PMBuilder.Inliner = createFunctionInliningPass(
- CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize);
+ CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize,
+ (!CodeGenOpts.SampleProfileFile.empty() &&
+ CodeGenOpts.EmitSummaryIndex));
}
PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
@@ -334,16 +475,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
MPM.add(new TargetLibraryInfoWrapperPass(*TLII));
- // Add target-specific passes that need to run as early as possible.
if (TM)
- PMBuilder.addExtension(
- PassManagerBuilder::EP_EarlyAsPossible,
- [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- TM->addEarlyAsPossiblePasses(PM);
- });
+ TM->adjustPassManager(PMBuilder);
- PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
- addAddDiscriminatorsPass);
+ if (CodeGenOpts.DebugInfoForProfiling ||
+ !CodeGenOpts.SampleProfileFile.empty())
+ PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+ addAddDiscriminatorsPass);
// In ObjC ARC mode, add the main ARC optimization passes.
if (LangOpts.ObjCAutoRefCount) {
@@ -454,7 +592,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (!CodeGenOpts.InstrProfileOutput.empty())
PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput;
else
- PMBuilder.PGOInstrGen = "default_%m.profraw";
+ PMBuilder.PGOInstrGen = DefaultProfileGenName;
}
if (CodeGenOpts.hasProfileIRUse())
PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath;
@@ -495,126 +633,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
return;
}
- unsigned CodeModel =
- llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
- .Case("small", llvm::CodeModel::Small)
- .Case("kernel", llvm::CodeModel::Kernel)
- .Case("medium", llvm::CodeModel::Medium)
- .Case("large", llvm::CodeModel::Large)
- .Case("default", llvm::CodeModel::Default)
- .Default(~0u);
- assert(CodeModel != ~0u && "invalid code model!");
- llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel);
-
+ llvm::CodeModel::Model CM = getCodeModel(CodeGenOpts);
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
-
- // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp.
- llvm::Optional<llvm::Reloc::Model> RM;
- RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
- .Case("static", llvm::Reloc::Static)
- .Case("pic", llvm::Reloc::PIC_)
- .Case("ropi", llvm::Reloc::ROPI)
- .Case("rwpi", llvm::Reloc::RWPI)
- .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
- .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
- assert(RM.hasValue() && "invalid PIC model!");
-
- CodeGenOpt::Level OptLevel;
- switch (CodeGenOpts.OptimizationLevel) {
- default:
- llvm_unreachable("Invalid optimization level!");
- case 0:
- OptLevel = CodeGenOpt::None;
- break;
- case 1:
- OptLevel = CodeGenOpt::Less;
- break;
- case 2:
- OptLevel = CodeGenOpt::Default;
- break; // O2/Os/Oz
- case 3:
- OptLevel = CodeGenOpt::Aggressive;
- break;
- }
+ llvm::Reloc::Model RM = getRelocModel(CodeGenOpts);
+ CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
llvm::TargetOptions Options;
-
- Options.ThreadModel =
- llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel)
- .Case("posix", llvm::ThreadModel::POSIX)
- .Case("single", llvm::ThreadModel::Single);
-
- // Set float ABI type.
- assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" ||
- CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) &&
- "Invalid Floating Point ABI!");
- Options.FloatABIType =
- llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI)
- .Case("soft", llvm::FloatABI::Soft)
- .Case("softfp", llvm::FloatABI::Soft)
- .Case("hard", llvm::FloatABI::Hard)
- .Default(llvm::FloatABI::Default);
-
- // Set FP fusion mode.
- switch (CodeGenOpts.getFPContractMode()) {
- case CodeGenOptions::FPC_Off:
- Options.AllowFPOpFusion = llvm::FPOpFusion::Strict;
- break;
- case CodeGenOptions::FPC_On:
- Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
- break;
- case CodeGenOptions::FPC_Fast:
- Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
- break;
- }
-
- Options.UseInitArray = CodeGenOpts.UseInitArray;
- Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
- Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
- Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
-
- // Set EABI version.
- Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion)
- .Case("4", llvm::EABI::EABI4)
- .Case("5", llvm::EABI::EABI5)
- .Case("gnu", llvm::EABI::GNU)
- .Default(llvm::EABI::Default);
-
- if (LangOpts.SjLjExceptions)
- Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
-
- Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD;
- Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
- Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
- Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
- Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
- Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
- Options.FunctionSections = CodeGenOpts.FunctionSections;
- Options.DataSections = CodeGenOpts.DataSections;
- Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
- Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
- Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
-
- Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
- Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
- Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
- Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
- Options.MCOptions.MCIncrementalLinkerCompatible =
- CodeGenOpts.IncrementalLinkerCompatible;
- Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations;
- Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
- Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
- Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments;
- Options.MCOptions.ABIName = TargetOpts.ABI;
- for (const auto &Entry : HSOpts.UserEntries)
- if (!Entry.IsFramework &&
- (Entry.Group == frontend::IncludeDirGroup::Quoted ||
- Entry.Group == frontend::IncludeDirGroup::Angled ||
- Entry.Group == frontend::IncludeDirGroup::System))
- Options.MCOptions.IASSearchPaths.push_back(
- Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
-
+ initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts);
TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
Options, RM, CM, OptLevel));
}
@@ -630,13 +656,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
// Normal mode, emit a .s or .o file by running the code generator. Note,
// this also adds codegenerator level optimization passes.
- TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile;
- if (Action == Backend_EmitObj)
- CGFT = TargetMachine::CGFT_ObjectFile;
- else if (Action == Backend_EmitMCNull)
- CGFT = TargetMachine::CGFT_Null;
- else
- assert(Action == Backend_EmitAssembly && "Invalid action!");
+ TargetMachine::CodeGenFileType CGFT = getCodeGenFileType(Action);
// Add ObjC ARC final-cleanup optimizations. This is done as part of the
// "codegen" passes so that it isn't run multiple times when there is
@@ -683,14 +703,31 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
+ std::unique_ptr<raw_fd_ostream> ThinLinkOS;
+
switch (Action) {
case Backend_EmitNothing:
break;
case Backend_EmitBC:
- PerModulePasses.add(createBitcodeWriterPass(
- *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex,
- CodeGenOpts.EmitSummaryIndex));
+ if (CodeGenOpts.EmitSummaryIndex) {
+ if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
+ std::error_code EC;
+ ThinLinkOS.reset(new llvm::raw_fd_ostream(
+ CodeGenOpts.ThinLinkBitcodeFile, EC,
+ llvm::sys::fs::F_None));
+ if (EC) {
+ Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile
+ << EC.message();
+ return;
+ }
+ }
+ PerModulePasses.add(
+ createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get()));
+ }
+ else
+ PerModulePasses.add(
+ createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists));
break;
case Backend_EmitLL:
@@ -779,7 +816,24 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
return;
TheModule->setDataLayout(TM->createDataLayout());
- PassBuilder PB(TM.get());
+ PGOOptions PGOOpt;
+
+ // -fprofile-generate.
+ PGOOpt.RunProfileGen = CodeGenOpts.hasProfileIRInstr();
+ if (PGOOpt.RunProfileGen)
+ PGOOpt.ProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() ?
+ DefaultProfileGenName : CodeGenOpts.InstrProfileOutput;
+
+ // -fprofile-use.
+ if (CodeGenOpts.hasProfileIRUse())
+ PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath;
+
+ // Only pass a PGO options struct if -fprofile-generate or
+ // -fprofile-use were passed on the cmdline.
+ PassBuilder PB(TM.get(),
+ (PGOOpt.RunProfileGen ||
+ !PGOOpt.ProfileUseFile.empty()) ?
+ Optional<PGOOptions>(PGOOpt) : None);
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
@@ -861,8 +915,31 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
}
+Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) {
+ Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
+ if (!BMsOrErr)
+ return BMsOrErr.takeError();
+
+ // The bitcode file may contain multiple modules, we want the one with a
+ // summary.
+ for (BitcodeModule &BM : *BMsOrErr) {
+ Expected<bool> HasSummary = BM.hasSummary();
+ if (HasSummary && *HasSummary)
+ return BM;
+ }
+
+ return make_error<StringError>("Could not find module summary",
+ inconvertibleErrorCode());
+}
+
static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
- std::unique_ptr<raw_pwrite_stream> OS) {
+ const HeaderSearchOptions &HeaderOpts,
+ const CodeGenOptions &CGOpts,
+ const clang::TargetOptions &TOpts,
+ const LangOptions &LOpts,
+ std::unique_ptr<raw_pwrite_stream> OS,
+ std::string SampleProfile,
+ BackendAction Action) {
StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -897,32 +974,15 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
return;
}
- Expected<std::vector<BitcodeModule>> BMsOrErr =
- getBitcodeModuleList(**MBOrErr);
- if (!BMsOrErr) {
- handleAllErrors(BMsOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+ Expected<BitcodeModule> BMOrErr = FindThinLTOModule(**MBOrErr);
+ if (!BMOrErr) {
+ handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) {
errs() << "Error loading imported file '" << I.first()
<< "': " << EIB.message() << '\n';
});
return;
}
-
- // The bitcode file may contain multiple modules, we want the one with a
- // summary.
- bool FoundModule = false;
- for (BitcodeModule &BM : *BMsOrErr) {
- Expected<bool> HasSummary = BM.hasSummary();
- if (HasSummary && *HasSummary) {
- ModuleMap.insert({I.first(), BM});
- FoundModule = true;
- break;
- }
- }
- if (!FoundModule) {
- errs() << "Error loading imported file '" << I.first()
- << "': Could not find module summary\n";
- return;
- }
+ ModuleMap.insert({I.first(), *BMOrErr});
OwnedImports.push_back(std::move(*MBOrErr));
}
@@ -930,6 +990,35 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
return llvm::make_unique<lto::NativeObjectStream>(std::move(OS));
};
lto::Config Conf;
+ Conf.CPU = TOpts.CPU;
+ Conf.CodeModel = getCodeModel(CGOpts);
+ Conf.MAttrs = TOpts.Features;
+ Conf.RelocModel = getRelocModel(CGOpts);
+ Conf.CGOptLevel = getCGOptLevel(CGOpts);
+ initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
+ Conf.SampleProfile = std::move(SampleProfile);
+ switch (Action) {
+ case Backend_EmitNothing:
+ Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
+ return false;
+ };
+ break;
+ case Backend_EmitLL:
+ Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
+ M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists);
+ return false;
+ };
+ break;
+ case Backend_EmitBC:
+ Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
+ WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists);
+ return false;
+ };
+ break;
+ default:
+ Conf.CGFileType = getCodeGenFileType(Action);
+ break;
+ }
if (Error E = thinBackend(
Conf, 0, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) {
@@ -965,7 +1054,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// of an error).
bool DoThinLTOBackend = CombinedIndex != nullptr;
if (DoThinLTOBackend) {
- runThinLTOBackend(CombinedIndex.get(), M, std::move(OS));
+ runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
+ LOpts, std::move(OS), CGOpts.SampleProfileFile, Action);
return;
}
}
@@ -996,6 +1086,7 @@ static const char* getSectionNameForBitcode(const Triple &T) {
return "__LLVM,__bitcode";
case Triple::COFF:
case Triple::ELF:
+ case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmbc";
}
@@ -1008,6 +1099,7 @@ static const char* getSectionNameForCommandline(const Triple &T) {
return "__LLVM,__cmdline";
case Triple::COFF:
case Triple::ELF:
+ case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmcmd";
}
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 9287e46127bd5..28e20b53d6562 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -1181,7 +1181,7 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
if (LVal.isBitField())
return CGF.EmitLoadOfBitfieldLValue(
LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
- LVal.getAlignmentSource()));
+ LVal.getAlignmentSource()), loc);
if (LVal.isVectorElt())
return CGF.EmitLoadOfLValue(
LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index b250b9a32b181..1a57b3e6608dc 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -16,7 +16,7 @@
#include "CGObjCRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/DeclObjC.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/CallSite.h"
@@ -266,7 +266,7 @@ static bool isSafeForCXXConstantCapture(QualType type) {
static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM,
CodeGenFunction *CGF,
const VarDecl *var) {
- // Return if this is a function paramter. We shouldn't try to
+ // Return if this is a function parameter. We shouldn't try to
// rematerialize default arguments of function parameters.
if (isa<ParmVarDecl>(var))
return nullptr;
@@ -318,6 +318,19 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
elementTypes.push_back(CGM.getBlockDescriptorType());
}
+static QualType getCaptureFieldType(const CodeGenFunction &CGF,
+ const BlockDecl::Capture &CI) {
+ const VarDecl *VD = CI.getVariable();
+
+ // If the variable is captured by an enclosing block or lambda expression,
+ // use the type of the capture field.
+ if (CGF.BlockInfo && CI.isNested())
+ return CGF.BlockInfo->getCapture(VD).fieldType();
+ if (auto *FD = CGF.LambdaCaptureFields.lookup(VD))
+ return FD->getType();
+ return VD->getType();
+}
+
/// Compute the layout of the given block. Attempts to lay the block
/// out with minimal space requirements.
static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
@@ -432,15 +445,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
}
}
- QualType VT = variable->getType();
-
- // If the variable is captured by an enclosing block or lambda expression,
- // use the type of the capture field.
- if (CGF->BlockInfo && CI.isNested())
- VT = CGF->BlockInfo->getCapture(variable).fieldType();
- else if (auto *FD = CGF->LambdaCaptureFields.lookup(variable))
- VT = FD->getType();
-
+ QualType VT = getCaptureFieldType(*CGF, CI);
CharUnits size = C.getTypeSizeInChars(VT);
CharUnits align = C.getDeclAlign(variable);
@@ -606,8 +611,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
if (capture.isConstant()) continue;
// Ignore objects that aren't destructed.
- QualType::DestructionKind dtorKind =
- variable->getType().isDestructedType();
+ QualType VT = getCaptureFieldType(CGF, CI);
+ QualType::DestructionKind dtorKind = VT.isDestructedType();
if (dtorKind == QualType::DK_none) continue;
CodeGenFunction::Destroyer *destroyer;
@@ -634,7 +639,7 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
if (useArrayEHCleanup)
cleanupKind = InactiveNormalAndEHCleanup;
- CGF.pushDestroy(cleanupKind, addr, variable->getType(),
+ CGF.pushDestroy(cleanupKind, addr, VT,
destroyer, useArrayEHCleanup);
// Remember where that cleanup was.
@@ -718,7 +723,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// Otherwise, we have to emit this as a local block.
- llvm::Constant *isa = CGM.getNSConcreteStackBlock();
+ llvm::Constant *isa =
+ (!CGM.getContext().getLangOpts().OpenCL)
+ ? CGM.getNSConcreteStackBlock()
+ : CGM.getNullPointer(cast<llvm::PointerType>(
+ CGM.getNSConcreteStackBlock()->getType()),
+ QualType(getContext().VoidPtrTy));
isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy);
// Build the block descriptor.
@@ -906,9 +916,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// Cast to the converted block-pointer type, which happens (somewhat
// unfortunately) to be a pointer to function type.
- llvm::Value *result =
- Builder.CreateBitCast(blockAddr.getPointer(),
- ConvertType(blockInfo.getBlockExpr()->getType()));
+ llvm::Value *result = Builder.CreatePointerCast(
+ blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
return result;
}
@@ -976,21 +985,41 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
// Get a pointer to the generic block literal.
+ // For OpenCL we generate generic AS void ptr to be able to reuse the same
+ // block definition for blocks with captures generated as private AS local
+ // variables and without captures generated as global AS program scope
+ // variables.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
+
llvm::Type *BlockLiteralTy =
- llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType());
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
// Bitcast the callee to a block literal.
- BlockPtr = Builder.CreateBitCast(BlockPtr, BlockLiteralTy, "block.literal");
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
// Get the function pointer from the literal.
llvm::Value *FuncPtr =
Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
- BlockPtr = Builder.CreateBitCast(BlockPtr, VoidPtrTy);
// Add the block literal.
CallArgList Args;
- Args.add(RValue::get(BlockPtr), getContext().VoidPtrTy);
+
+ QualType VoidPtrQualTy = getContext().VoidPtrTy;
+ llvm::Type *GenericVoidPtrTy = VoidPtrTy;
+ if (getLangOpts().OpenCL) {
+ GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ VoidPtrQualTy =
+ getContext().getPointerType(getContext().getAddrSpaceQualType(
+ getContext().VoidTy, LangAS::opencl_generic));
+ }
+
+ BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
+ Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
QualType FnType = BPT->getPointeeType();
@@ -1097,7 +1126,12 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
auto fields = builder.beginStruct();
// isa
- fields.add(CGM.getNSConcreteGlobalBlock());
+ fields.add(
+ (!CGM.getContext().getLangOpts().OpenCL)
+ ? CGM.getNSConcreteGlobalBlock()
+ : CGM.getNullPointer(cast<llvm::PointerType>(
+ CGM.getNSConcreteGlobalBlock()->getType()),
+ QualType(CGM.getContext().VoidPtrTy)));
// __flags
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
@@ -1114,16 +1148,19 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
- llvm::Constant *literal =
- fields.finishAndCreateGlobal("__block_literal_global",
- blockInfo.BlockAlign,
- /*constant*/ true);
+ unsigned AddrSpace = 0;
+ if (CGM.getContext().getLangOpts().OpenCL)
+ AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+
+ llvm::Constant *literal = fields.finishAndCreateGlobal(
+ "__block_literal_global", blockInfo.BlockAlign,
+ /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
// Return a constant of the appropriately-casted type.
llvm::Type *RequiredType =
CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType());
llvm::Constant *Result =
- llvm::ConstantExpr::getBitCast(literal, RequiredType);
+ llvm::ConstantExpr::getPointerCast(literal, RequiredType);
CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
return Result;
}
@@ -1155,9 +1192,13 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
// Instead of messing around with LocalDeclMap, just set the value
// directly as BlockPointer.
- BlockPointer = Builder.CreateBitCast(arg,
- BlockInfo->StructureType->getPointerTo(),
- "block");
+ BlockPointer = Builder.CreatePointerCast(
+ arg,
+ BlockInfo->StructureType->getPointerTo(
+ getContext().getLangOpts().OpenCL
+ ? getContext().getTargetAddressSpace(LangAS::opencl_generic)
+ : 0),
+ "block");
}
Address CodeGenFunction::LoadBlockStruct() {
@@ -1196,6 +1237,15 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
// The first argument is the block pointer. Just take it as a void*
// and cast it later.
QualType selfTy = getContext().VoidPtrTy;
+
+ // For OpenCL passed block pointer can be private AS local variable or
+ // global AS program scope variable (for the case with and without captures).
+ // Generic AS is used therefore to be able to accomodate both private and
+ // generic AS in one implementation.
+ if (getLangOpts().OpenCL)
+ selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType(
+ getContext().VoidTy, LangAS::opencl_generic));
+
IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor");
ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl),
@@ -1323,23 +1373,102 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
return fn;
}
-/*
- notes.push_back(HelperInfo());
- HelperInfo &note = notes.back();
- note.index = capture.getIndex();
- note.RequiresCopying = (ci->hasCopyExpr() || BlockRequiresCopying(type));
- note.cxxbar_import = ci->getCopyExpr();
-
- if (ci->isByRef()) {
- note.flag = BLOCK_FIELD_IS_BYREF;
- if (type.isObjCGCWeak())
- note.flag |= BLOCK_FIELD_IS_WEAK;
- } else if (type->isBlockPointerType()) {
- note.flag = BLOCK_FIELD_IS_BLOCK;
- } else {
- note.flag = BLOCK_FIELD_IS_OBJECT;
- }
- */
+namespace {
+
+/// Represents a type of copy/destroy operation that should be performed for an
+/// entity that's captured by a block.
+enum class BlockCaptureEntityKind {
+ CXXRecord, // Copy or destroy
+ ARCWeak,
+ ARCStrong,
+ BlockObject, // Assign or release
+ None
+};
+
+/// Represents a captured entity that requires extra operations in order for
+/// this entity to be copied or destroyed correctly.
+struct BlockCaptureManagedEntity {
+ BlockCaptureEntityKind Kind;
+ BlockFieldFlags Flags;
+ const BlockDecl::Capture &CI;
+ const CGBlockInfo::Capture &Capture;
+
+ BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags,
+ const BlockDecl::Capture &CI,
+ const CGBlockInfo::Capture &Capture)
+ : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {}
+};
+
+} // end anonymous namespace
+
+static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
+computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
+ const LangOptions &LangOpts) {
+ if (CI.getCopyExpr()) {
+ assert(!CI.isByRef());
+ // don't bother computing flags
+ return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
+ }
+ BlockFieldFlags Flags;
+ if (CI.isByRef()) {
+ Flags = BLOCK_FIELD_IS_BYREF;
+ if (T.isObjCGCWeak())
+ Flags |= BLOCK_FIELD_IS_WEAK;
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+ }
+ if (!T->isObjCRetainableType())
+ // For all other types, the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+
+ Flags = BLOCK_FIELD_IS_OBJECT;
+ bool isBlockPointer = T->isBlockPointerType();
+ if (isBlockPointer)
+ Flags = BLOCK_FIELD_IS_BLOCK;
+
+ // Special rules for ARC captures:
+ Qualifiers QS = T.getQualifiers();
+
+ // We need to register __weak direct captures with the runtime.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+ return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
+
+ // We need to retain the copied value for __strong direct captures.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) {
+ // If it's a block pointer, we have to copy the block and
+ // assign that to the destination pointer, so we might as
+ // well use _Block_object_assign. Otherwise we can avoid that.
+ return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong
+ : BlockCaptureEntityKind::BlockObject,
+ Flags);
+ }
+
+ // Non-ARC captures of retainable pointers are strong and
+ // therefore require a call to _Block_object_assign.
+ if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+ // Otherwise the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+}
+
+/// Find the set of block captures that need to be explicitly copied or destroy.
+static void findBlockCapturedManagedEntities(
+ const CGBlockInfo &BlockInfo, const LangOptions &LangOpts,
+ SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures,
+ llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>(
+ const BlockDecl::Capture &, QualType, const LangOptions &)>
+ Predicate) {
+ for (const auto &CI : BlockInfo.getBlockDecl()->captures()) {
+ const VarDecl *Variable = CI.getVariable();
+ const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable);
+ if (Capture.isConstant())
+ continue;
+
+ auto Info = Predicate(CI, Variable->getType(), LangOpts);
+ if (Info.first != BlockCaptureEntityKind::None)
+ ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture);
+ }
+}
/// Generate the copy-helper function for a block closure object:
/// static void block_copy_helper(block_t *dst, block_t *src);
@@ -1399,78 +1528,28 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign);
dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest");
- const BlockDecl *blockDecl = blockInfo.getBlockDecl();
+ SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures,
+ computeCopyInfoForBlockCapture);
- for (const auto &CI : blockDecl->captures()) {
- const VarDecl *variable = CI.getVariable();
- QualType type = variable->getType();
-
- const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
- if (capture.isConstant()) continue;
-
- const Expr *copyExpr = CI.getCopyExpr();
- BlockFieldFlags flags;
-
- bool useARCWeakCopy = false;
- bool useARCStrongCopy = false;
-
- if (copyExpr) {
- assert(!CI.isByRef());
- // don't bother computing flags
-
- } else if (CI.isByRef()) {
- flags = BLOCK_FIELD_IS_BYREF;
- if (type.isObjCGCWeak())
- flags |= BLOCK_FIELD_IS_WEAK;
-
- } else if (type->isObjCRetainableType()) {
- flags = BLOCK_FIELD_IS_OBJECT;
- bool isBlockPointer = type->isBlockPointerType();
- if (isBlockPointer)
- flags = BLOCK_FIELD_IS_BLOCK;
-
- // Special rules for ARC captures:
- Qualifiers qs = type.getQualifiers();
-
- // We need to register __weak direct captures with the runtime.
- if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
- useARCWeakCopy = true;
-
- // We need to retain the copied value for __strong direct captures.
- } else if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
- // If it's a block pointer, we have to copy the block and
- // assign that to the destination pointer, so we might as
- // well use _Block_object_assign. Otherwise we can avoid that.
- if (!isBlockPointer)
- useARCStrongCopy = true;
-
- // Non-ARC captures of retainable pointers are strong and
- // therefore require a call to _Block_object_assign.
- } else if (!qs.getObjCLifetime() && !getLangOpts().ObjCAutoRefCount) {
- // fall through
-
- // Otherwise the memcpy is fine.
- } else {
- continue;
- }
-
- // For all other types, the memcpy is fine.
- } else {
- continue;
- }
+ for (const auto &CopiedCapture : CopiedCaptures) {
+ const BlockDecl::Capture &CI = CopiedCapture.CI;
+ const CGBlockInfo::Capture &capture = CopiedCapture.Capture;
+ BlockFieldFlags flags = CopiedCapture.Flags;
unsigned index = capture.getIndex();
Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset());
Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset());
// If there's an explicit copy expression, we do that.
- if (copyExpr) {
- EmitSynthesizedCXXCopyCtor(dstField, srcField, copyExpr);
- } else if (useARCWeakCopy) {
+ if (CI.getCopyExpr()) {
+ assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord);
+ EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
+ } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
EmitARCCopyWeak(dstField, srcField);
} else {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
- if (useARCStrongCopy) {
+ if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
// At -O0, store null into the destination field (so that the
// storeStrong doesn't over-release) and then call storeStrong.
// This is a workaround to not having an initStrong call.
@@ -1491,6 +1570,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
}
} else {
+ assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject);
srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
llvm::Value *dstAddr =
Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
@@ -1498,6 +1578,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
};
+ const VarDecl *variable = CI.getVariable();
bool copyCanThrow = false;
if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) {
const Expr *copyExpr =
@@ -1521,6 +1602,52 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
}
+static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
+computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
+ const LangOptions &LangOpts) {
+ BlockFieldFlags Flags;
+ if (CI.isByRef()) {
+ Flags = BLOCK_FIELD_IS_BYREF;
+ if (T.isObjCGCWeak())
+ Flags |= BLOCK_FIELD_IS_WEAK;
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+ }
+
+ if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) {
+ if (Record->hasTrivialDestructor())
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
+ }
+
+ // Other types don't need to be destroy explicitly.
+ if (!T->isObjCRetainableType())
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+
+ Flags = BLOCK_FIELD_IS_OBJECT;
+ if (T->isBlockPointerType())
+ Flags = BLOCK_FIELD_IS_BLOCK;
+
+ // Special rules for ARC captures.
+ Qualifiers QS = T.getQualifiers();
+
+ // Use objc_storeStrong for __strong direct captures; the
+ // dynamic tools really like it when we do this.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Strong)
+ return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags);
+
+ // Support __weak direct captures.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+ return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
+
+ // Non-ARC captures are strong, and we need to use
+ // _Block_object_dispose.
+ if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+ // Otherwise, we have nothing to do.
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+}
+
/// Generate the destroy-helper function for a block closure object:
/// static void block_destroy_helper(block_t *theBlock);
///
@@ -1570,79 +1697,39 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
src = Builder.CreateBitCast(src, structPtrTy, "block");
- const BlockDecl *blockDecl = blockInfo.getBlockDecl();
-
CodeGenFunction::RunCleanupsScope cleanups(*this);
- for (const auto &CI : blockDecl->captures()) {
- const VarDecl *variable = CI.getVariable();
- QualType type = variable->getType();
-
- const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
- if (capture.isConstant()) continue;
-
- BlockFieldFlags flags;
- const CXXDestructorDecl *dtor = nullptr;
+ SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures,
+ computeDestroyInfoForBlockCapture);
- bool useARCWeakDestroy = false;
- bool useARCStrongDestroy = false;
-
- if (CI.isByRef()) {
- flags = BLOCK_FIELD_IS_BYREF;
- if (type.isObjCGCWeak())
- flags |= BLOCK_FIELD_IS_WEAK;
- } else if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
- if (record->hasTrivialDestructor())
- continue;
- dtor = record->getDestructor();
- } else if (type->isObjCRetainableType()) {
- flags = BLOCK_FIELD_IS_OBJECT;
- if (type->isBlockPointerType())
- flags = BLOCK_FIELD_IS_BLOCK;
-
- // Special rules for ARC captures.
- Qualifiers qs = type.getQualifiers();
-
- // Use objc_storeStrong for __strong direct captures; the
- // dynamic tools really like it when we do this.
- if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
- useARCStrongDestroy = true;
-
- // Support __weak direct captures.
- } else if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
- useARCWeakDestroy = true;
-
- // Non-ARC captures are strong, and we need to use _Block_object_dispose.
- } else if (!qs.hasObjCLifetime() && !getLangOpts().ObjCAutoRefCount) {
- // fall through
-
- // Otherwise, we have nothing to do.
- } else {
- continue;
- }
- } else {
- continue;
- }
+ for (const auto &DestroyedCapture : DestroyedCaptures) {
+ const BlockDecl::Capture &CI = DestroyedCapture.CI;
+ const CGBlockInfo::Capture &capture = DestroyedCapture.Capture;
+ BlockFieldFlags flags = DestroyedCapture.Flags;
Address srcField =
Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
- // If there's an explicit copy expression, we do that.
- if (dtor) {
- PushDestructorCleanup(dtor, srcField);
+ // If the captured record has a destructor then call it.
+ if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) {
+ const auto *Dtor =
+ CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor();
+ PushDestructorCleanup(Dtor, srcField);
- // If this is a __weak capture, emit the release directly.
- } else if (useARCWeakDestroy) {
+ // If this is a __weak capture, emit the release directly.
+ } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
EmitARCDestroyWeak(srcField);
// Destroy strong objects with a call if requested.
- } else if (useARCStrongDestroy) {
+ } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
EmitARCDestroyStrong(srcField, ARCImpreciseLifetime);
// Otherwise we call _Block_object_dispose. It wouldn't be too
// hard to just emit this as a cleanup if we wanted to make sure
// that things were done in reverse.
} else {
+ assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject);
llvm::Value *value = Builder.CreateLoad(srcField);
value = Builder.CreateBitCast(value, VoidPtrTy);
BuildBlockRelease(value, flags);
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index b3d02f1f51c61..6ea0a325a4296 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -420,10 +420,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
llvm::Value *
CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType) {
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE) {
uint64_t ObjectSize;
if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
- return emitBuiltinObjectSize(E, Type, ResType);
+ return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
}
@@ -432,9 +433,14 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
/// it)
/// - A call to the @llvm.objectsize intrinsic
+///
+/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
+/// and we wouldn't otherwise try to reference a pass_object_size parameter,
+/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
llvm::Value *
CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType) {
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE) {
// We need to reference an argument if the pointer is a parameter with the
// pass_object_size attribute.
if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
@@ -457,16 +463,20 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
- if (Type == 3 || E->HasSideEffects(getContext()))
+ if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
return getDefaultBuiltinObjectSizeResult(Type, ResType);
- // LLVM only supports 0 and 2, make sure that we pass along that
- // as a boolean.
- auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
- // FIXME: Get right address space.
- llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
- Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
- return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
+ Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
+ assert(Ptr->getType()->isPointerTy() &&
+ "Non-pointer passed to __builtin_object_size?");
+
+ Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
+
+ // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
+ Value *Min = Builder.getInt1((Type & 2) != 0);
+ // For GCC compatability, __builtin_object_size treat NULL as unknown size.
+ Value *NullIsUnknown = Builder.getTrue();
+ return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
}
// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
@@ -482,10 +492,12 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
+ _interlockedbittestandset,
+ __fastfail,
};
Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
- const CallExpr *E) {
+ const CallExpr *E) {
switch (BuiltinID) {
case MSVCIntrin::_BitScanForward:
case MSVCIntrin::_BitScanReverse: {
@@ -548,6 +560,22 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
+ case MSVCIntrin::_interlockedbittestandset: {
+ llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Or, Addr,
+ Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ // Shift the relevant bit to the least significant position, truncate to
+ // the result type, and test the low bit.
+ llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
+ llvm::Value *Truncated =
+ Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
+ return Builder.CreateAnd(Truncated,
+ ConstantInt::get(Truncated->getType(), 1));
+ }
+
case MSVCIntrin::_InterlockedDecrement: {
llvm::Type *IntTy = ConvertType(E->getType());
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
@@ -566,6 +594,37 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
llvm::AtomicOrdering::SequentiallyConsistent);
return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
}
+
+ case MSVCIntrin::__fastfail: {
+ // Request immediate process termination from the kernel. The instruction
+ // sequences to do this are documented on MSDN:
+ // https://msdn.microsoft.com/en-us/library/dn774154.aspx
+ llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
+ StringRef Asm, Constraints;
+ switch (ISA) {
+ default:
+ ErrorUnsupported(E, "__fastfail call for this architecture");
+ break;
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ Asm = "int $$0x29";
+ Constraints = "{cx}";
+ break;
+ case llvm::Triple::thumb:
+ Asm = "udf #251";
+ Constraints = "{r0}";
+ break;
+ }
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+ llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoReturn);
+ CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
+ CS.setAttributes(NoReturnAttr);
+ return CS.getInstruction();
+ }
}
llvm_unreachable("Incorrect MSVC intrinsic!");
}
@@ -932,7 +991,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// We pass this builtin onto the optimizer so that it can figure out the
// object size in more complex cases.
- return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
+ return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
+ /*EmittedE=*/nullptr));
}
case Builtin::BI__builtin_prefetch: {
Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
@@ -2195,16 +2255,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedXor16:
case Builtin::BI_InterlockedXor:
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
- case Builtin::BI__readfsdword: {
- llvm::Type *IntTy = ConvertType(E->getType());
- Value *IntToPtr =
- Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(IntTy, 257));
- LoadInst *Load = Builder.CreateAlignedLoad(
- IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType()));
- Load->setVolatile(true);
- return RValue::get(Load);
- }
+ case Builtin::BI_interlockedbittestandset:
+ return RValue::get(
+ EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
case Builtin::BI__exception_code:
case Builtin::BI_exception_code:
@@ -2218,9 +2271,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_setjmpex: {
if (getTarget().getTriple().isOSMSVCRT()) {
llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
- llvm::AttributeSet ReturnsTwiceAttr =
- AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
+ llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::ReturnsTwice);
llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
"_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
@@ -2238,9 +2291,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
}
case Builtin::BI_setjmp: {
if (getTarget().getTriple().isOSMSVCRT()) {
- llvm::AttributeSet ReturnsTwiceAttr =
- AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
+ llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::ReturnsTwice);
llvm::Value *Buf = Builder.CreateBitOrPointerCast(
EmitScalarExpr(E->getArg(0)), Int8PtrTy);
llvm::CallSite CS;
@@ -2276,6 +2329,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
break;
}
+ case Builtin::BI__fastfail:
+ return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
+
case Builtin::BI__builtin_coro_size: {
auto & Context = getContext();
auto SizeTy = Context.getSizeType();
@@ -2492,25 +2548,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
unsigned NumArgs = E->getNumArgs();
llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
- llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
- llvm::Value *Range = EmitScalarExpr(E->getArg(2));
+ LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
+ llvm::Value *Range = NDRangeL.getAddress().getPointer();
+ llvm::Type *RangeTy = NDRangeL.getAddress().getType();
if (NumArgs == 4) {
// The most basic form of the call with parameters:
// queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
Name = "__enqueue_kernel_basic";
- llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
+ llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
- llvm::Value *Block =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+ llvm::Value *Block = Builder.CreatePointerCast(
+ EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
- return RValue::get(Builder.CreateCall(
- CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
+ AttrBuilder B;
+ B.addAttribute(Attribute::ByVal);
+ llvm::AttributeList ByValAttrSet =
+ llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
+
+ auto RTCall =
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
+ {Queue, Flags, Range, Block});
+ RTCall->setAttributes(ByValAttrSet);
+ return RValue::get(RTCall);
}
assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
@@ -2518,14 +2585,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
Name = "__enqueue_kernel_vaargs";
- llvm::Value *Block =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+ llvm::Value *Block = Builder.CreatePointerCast(
+ EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
ConstantInt::get(IntTy, NumArgs - 4)};
- std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
- IntTy};
+ std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
+ GenericVoidPtrTy, IntTy};
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
@@ -2555,12 +2622,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
- llvm::Value *Block =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
+ llvm::Value *Block = Builder.CreatePointerCast(
+ EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
- std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy,
- Int32Ty, EventPtrTy, EventPtrTy,
- Int8PtrTy};
+ std::vector<llvm::Type *> ArgTys = {
+ QueueTy, Int32Ty, RangeTy, Int32Ty,
+ EventPtrTy, EventPtrTy, GenericVoidPtrTy};
std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
EventList, ClkEvent, Block};
@@ -2596,26 +2663,30 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
// parameter.
case Builtin::BIget_kernel_work_group_size: {
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, Int8PtrTy, false),
- "__get_kernel_work_group_size_impl"),
- Arg));
+ Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
+ "__get_kernel_work_group_size_impl"),
+ Arg));
}
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+ Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+ llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
"__get_kernel_preferred_work_group_multiple_impl"),
Arg));
}
case Builtin::BIprintf:
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
- return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
+ if (getTarget().getTriple().isNVPTX())
+ return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
break;
case Builtin::BI__builtin_canonicalize:
case Builtin::BI__builtin_canonicalizef:
@@ -7115,6 +7186,13 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
}
+static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
+ llvm::Type *DstTy) {
+ unsigned NumberOfElements = DstTy->getVectorNumElements();
+ Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
+ return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
+}
+
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
if (BuiltinID == X86::BI__builtin_ms_va_start ||
@@ -7321,7 +7399,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
- return UndefValue::get(ConvertType(E->getType()));
+ // The x86 definition of "undef" is not the same as the LLVM definition
+ // (PR32176). We leave optimizing away an unnecessary zero constant to the
+ // IR optimizer and backend.
+ // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+ // value, we should use that here instead of a zero.
+ return llvm::Constant::getNullValue(ConvertType(E->getType()));
case X86::BI__builtin_ia32_vec_init_v8qi:
case X86::BI__builtin_ia32_vec_init_v4hi:
case X86::BI__builtin_ia32_vec_init_v2si:
@@ -7408,6 +7491,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
+
+ case X86::BI__builtin_ia32_cvtmask2b128:
+ case X86::BI__builtin_ia32_cvtmask2b256:
+ case X86::BI__builtin_ia32_cvtmask2b512:
+ case X86::BI__builtin_ia32_cvtmask2w128:
+ case X86::BI__builtin_ia32_cvtmask2w256:
+ case X86::BI__builtin_ia32_cvtmask2w512:
+ case X86::BI__builtin_ia32_cvtmask2d128:
+ case X86::BI__builtin_ia32_cvtmask2d256:
+ case X86::BI__builtin_ia32_cvtmask2d512:
+ case X86::BI__builtin_ia32_cvtmask2q128:
+ case X86::BI__builtin_ia32_cvtmask2q256:
+ case X86::BI__builtin_ia32_cvtmask2q512:
+ return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
+
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
@@ -7922,6 +8020,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// instruction, but it will create a memset that won't be optimized away.
return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
}
+ case X86::BI__ud2:
+ // llvm.trap makes a ud2a instruction on x86.
+ return EmitTrapCall(Intrinsic::trap);
+ case X86::BI__int2c: {
+ // This syscall signals a driver assertion failure in x86 NT kernels.
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
+ llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoReturn);
+ CallSite CS = Builder.CreateCall(IA);
+ CS.setAttributes(NoReturnAttr);
+ return CS.getInstruction();
+ }
+ case X86::BI__readfsbyte:
+ case X86::BI__readfsword:
+ case X86::BI__readfsdword:
+ case X86::BI__readfsqword: {
+ llvm::Type *IntTy = ConvertType(E->getType());
+ Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
+ llvm::PointerType::get(IntTy, 257));
+ LoadInst *Load = Builder.CreateAlignedLoad(
+ IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
+ Load->setVolatile(true);
+ return Load;
+ }
+ case X86::BI__readgsbyte:
+ case X86::BI__readgsword:
+ case X86::BI__readgsdword:
+ case X86::BI__readgsqword: {
+ llvm::Type *IntTy = ConvertType(E->getType());
+ Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
+ llvm::PointerType::get(IntTy, 256));
+ LoadInst *Load = Builder.CreateAlignedLoad(
+ IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
+ Load->setVolatile(true);
+ return Load;
+ }
}
}
@@ -8326,6 +8463,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
+ llvm::SmallVector<llvm::Value *, 5> Args;
+ for (unsigned I = 0; I != 5; ++I)
+ Args.push_back(EmitScalarExpr(E->getArg(I)));
+ Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
+ Args[0]->getType());
+ return Builder.CreateCall(F, Args);
+ }
case AMDGPU::BI__builtin_amdgcn_div_fixup:
case AMDGPU::BI__builtin_amdgcn_div_fixupf:
case AMDGPU::BI__builtin_amdgcn_div_fixuph:
@@ -8391,7 +8536,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_classf:
case AMDGPU::BI__builtin_amdgcn_classh:
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
-
+ case AMDGPU::BI__builtin_amdgcn_fmed3f:
+ case AMDGPU::BI__builtin_amdgcn_fmed3h:
+ return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
case AMDGPU::BI__builtin_amdgcn_read_exec: {
CallInst *CI = cast<CallInst>(
EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 83febcb4af8cf..813cd74001867 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -15,7 +15,7 @@
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 59010f4407c29..0f3141ab76d0d 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -256,7 +256,7 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor(
return GetOrCreateLLVMFunction(
getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer,
- /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeSet(), IsForDefinition);
+ /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition);
}
static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index d53fd4cb63b2f..7b912e3aca576 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -291,11 +291,26 @@ public:
/// Emit constructor variants required by this ABI.
virtual void EmitCXXConstructors(const CXXConstructorDecl *D) = 0;
+ /// Notes how many arguments were added to the beginning (Prefix) and ending
+ /// (Suffix) of an arg list.
+ ///
+ /// Note that Prefix actually refers to the number of args *after* the first
+ /// one: `this` arguments always come first.
+ struct AddedStructorArgs {
+ unsigned Prefix = 0;
+ unsigned Suffix = 0;
+ AddedStructorArgs() = default;
+ AddedStructorArgs(unsigned P, unsigned S) : Prefix(P), Suffix(S) {}
+ static AddedStructorArgs prefix(unsigned N) { return {N, 0}; }
+ static AddedStructorArgs suffix(unsigned N) { return {0, N}; }
+ };
+
/// Build the signature of the given constructor or destructor variant by
/// adding any required parameters. For convenience, ArgTys has been
/// initialized with the type of 'this'.
- virtual void buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
- SmallVectorImpl<CanQualType> &ArgTys) = 0;
+ virtual AddedStructorArgs
+ buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ SmallVectorImpl<CanQualType> &ArgTys) = 0;
/// Returns true if the given destructor type should be emitted as a linkonce
/// delegating thunk, regardless of whether the dtor is defined in this TU or
@@ -355,9 +370,9 @@ public:
/// Add any ABI-specific implicit arguments needed to call a constructor.
///
- /// \return The number of args added to the call, which is typically zero or
- /// one.
- virtual unsigned
+ /// \return The number of arguments added at the beginning and end of the
+ /// call, which is typically zero or one.
+ virtual AddedStructorArgs
addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
CXXCtorType Type, bool ForVirtualBase,
bool Delegating, CallArgList &Args) = 0;
@@ -377,7 +392,7 @@ public:
isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
CodeGenFunction::VPtr Vptr) = 0;
- /// Checks if ABI requires to initilize vptrs for given dynamic class.
+ /// Checks if ABI requires to initialize vptrs for given dynamic class.
virtual bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) = 0;
/// Get the address point of the vtable for the given base subobject.
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index c7c61e0c8ecb1..8af32055fc4c2 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -101,39 +101,64 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
FTNP->getExtInfo(), {}, RequiredArgs(0));
}
+static void addExtParameterInfosForCall(
+ llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+ const FunctionProtoType *proto,
+ unsigned prefixArgs,
+ unsigned totalArgs) {
+ assert(proto->hasExtParameterInfos());
+ assert(paramInfos.size() <= prefixArgs);
+ assert(proto->getNumParams() + prefixArgs <= totalArgs);
+
+ paramInfos.reserve(totalArgs);
+
+ // Add default infos for any prefix args that don't already have infos.
+ paramInfos.resize(prefixArgs);
+
+ // Add infos for the prototype.
+ for (const auto &ParamInfo : proto->getExtParameterInfos()) {
+ paramInfos.push_back(ParamInfo);
+ // pass_object_size params have no parameter info.
+ if (ParamInfo.hasPassObjectSize())
+ paramInfos.emplace_back();
+ }
+
+ assert(paramInfos.size() <= totalArgs &&
+ "Did we forget to insert pass_object_size args?");
+ // Add default infos for the variadic and/or suffix arguments.
+ paramInfos.resize(totalArgs);
+}
+
/// Adds the formal paramaters in FPT to the given prefix. If any parameter in
/// FPT has pass_object_size attrs, then we'll add parameters for those, too.
static void appendParameterTypes(const CodeGenTypes &CGT,
SmallVectorImpl<CanQualType> &prefix,
SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
- CanQual<FunctionProtoType> FPT,
- const FunctionDecl *FD) {
- // Fill out paramInfos.
- if (FPT->hasExtParameterInfos() || !paramInfos.empty()) {
- assert(paramInfos.size() <= prefix.size());
- auto protoParamInfos = FPT->getExtParameterInfos();
- paramInfos.reserve(prefix.size() + protoParamInfos.size());
- paramInfos.resize(prefix.size());
- paramInfos.append(protoParamInfos.begin(), protoParamInfos.end());
- }
-
- // Fast path: unknown target.
- if (FD == nullptr) {
+ CanQual<FunctionProtoType> FPT) {
+ // Fast path: don't touch param info if we don't need to.
+ if (!FPT->hasExtParameterInfos()) {
+ assert(paramInfos.empty() &&
+ "We have paramInfos, but the prototype doesn't?");
prefix.append(FPT->param_type_begin(), FPT->param_type_end());
return;
}
- // In the vast majority cases, we'll have precisely FPT->getNumParams()
+ unsigned PrefixSize = prefix.size();
+ // In the vast majority of cases, we'll have precisely FPT->getNumParams()
// parameters; the only thing that can change this is the presence of
// pass_object_size. So, we preallocate for the common case.
prefix.reserve(prefix.size() + FPT->getNumParams());
- assert(FD->getNumParams() == FPT->getNumParams());
+ auto ExtInfos = FPT->getExtParameterInfos();
+ assert(ExtInfos.size() == FPT->getNumParams());
for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) {
prefix.push_back(FPT->getParamType(I));
- if (FD->getParamDecl(I)->hasAttr<PassObjectSizeAttr>())
+ if (ExtInfos[I].hasPassObjectSize())
prefix.push_back(CGT.getContext().getSizeType());
}
+
+ addExtParameterInfosForCall(paramInfos, FPT.getTypePtr(), PrefixSize,
+ prefix.size());
}
/// Arrange the LLVM function layout for a value of the given function
@@ -147,7 +172,7 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
RequiredArgs Required =
RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD);
// FIXME: Kill copy.
- appendParameterTypes(CGT, prefix, paramInfos, FTP, FD);
+ appendParameterTypes(CGT, prefix, paramInfos, FTP);
CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
@@ -286,9 +311,19 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
// Add the formal parameters.
if (PassParams)
- appendParameterTypes(*this, argTypes, paramInfos, FTP, MD);
-
- TheCXXABI.buildStructorSignature(MD, Type, argTypes);
+ appendParameterTypes(*this, argTypes, paramInfos, FTP);
+
+ CGCXXABI::AddedStructorArgs AddedArgs =
+ TheCXXABI.buildStructorSignature(MD, Type, argTypes);
+ if (!paramInfos.empty()) {
+ // Note: prefix implies after the first param.
+ if (AddedArgs.Prefix)
+ paramInfos.insert(paramInfos.begin() + 1, AddedArgs.Prefix,
+ FunctionProtoType::ExtParameterInfo{});
+ if (AddedArgs.Suffix)
+ paramInfos.append(AddedArgs.Suffix,
+ FunctionProtoType::ExtParameterInfo{});
+ }
RequiredArgs required =
(PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size())
@@ -321,26 +356,6 @@ getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) {
return argTypes;
}
-static void addExtParameterInfosForCall(
- llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
- const FunctionProtoType *proto,
- unsigned prefixArgs,
- unsigned totalArgs) {
- assert(proto->hasExtParameterInfos());
- assert(paramInfos.size() <= prefixArgs);
- assert(proto->getNumParams() + prefixArgs <= totalArgs);
-
- // Add default infos for any prefix args that don't already have infos.
- paramInfos.resize(prefixArgs);
-
- // Add infos for the prototype.
- auto protoInfos = proto->getExtParameterInfos();
- paramInfos.append(protoInfos.begin(), protoInfos.end());
-
- // Add default infos for the variadic arguments.
- paramInfos.resize(totalArgs);
-}
-
static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16>
getExtParameterInfosForCall(const FunctionProtoType *proto,
unsigned prefixArgs, unsigned totalArgs) {
@@ -352,18 +367,31 @@ getExtParameterInfosForCall(const FunctionProtoType *proto,
}
/// Arrange a call to a C++ method, passing the given arguments.
+///
+/// ExtraPrefixArgs is the number of ABI-specific args passed after the `this`
+/// parameter.
+/// ExtraSuffixArgs is the number of ABI-specific args passed at the end of
+/// args.
+/// PassProtoArgs indicates whether `args` has args for the parameters in the
+/// given CXXConstructorDecl.
const CGFunctionInfo &
CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
const CXXConstructorDecl *D,
CXXCtorType CtorKind,
- unsigned ExtraArgs) {
+ unsigned ExtraPrefixArgs,
+ unsigned ExtraSuffixArgs,
+ bool PassProtoArgs) {
// FIXME: Kill copy.
SmallVector<CanQualType, 16> ArgTypes;
for (const auto &Arg : args)
ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
+ // +1 for implicit this, which should always be args[0].
+ unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs;
+
CanQual<FunctionProtoType> FPT = GetFormalType(D);
- RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D);
+ RequiredArgs Required =
+ RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D);
GlobalDecl GD(D, CtorKind);
CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
? ArgTypes.front()
@@ -372,8 +400,14 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
: Context.VoidTy;
FunctionType::ExtInfo Info = FPT->getExtInfo();
- auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs,
- ArgTypes.size());
+ llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos;
+ // If the prototype args are elided, we should only have ABI-specific args,
+ // which never have param info.
+ if (PassProtoArgs && FPT->hasExtParameterInfos()) {
+ // ABI-specific suffix arguments are treated the same as variadic arguments.
+ addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs,
+ ArgTypes.size());
+ }
return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
/*chainCall=*/false, ArgTypes, Info,
ParamInfos, Required);
@@ -617,15 +651,20 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
}
/// Arrange a call to a C++ method, passing the given arguments.
+///
+/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It
+/// does not count `this`.
const CGFunctionInfo &
CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
const FunctionProtoType *proto,
- RequiredArgs required) {
- unsigned numRequiredArgs =
- (proto->isVariadic() ? required.getNumRequiredArgs() : args.size());
- unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams();
+ RequiredArgs required,
+ unsigned numPrefixArgs) {
+ assert(numPrefixArgs + 1 <= args.size() &&
+ "Emitting a call with less args than the required prefix?");
+ // Add one to account for `this`. It's a bit awkward here, but we don't count
+ // `this` in similar places elsewhere.
auto paramInfos =
- getExtParameterInfosForCall(proto, numPrefixArgs, args.size());
+ getExtParameterInfosForCall(proto, numPrefixArgs + 1, args.size());
// FIXME: Kill copy.
auto argTypes = getArgTypesForCall(Context, args);
@@ -680,7 +719,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
RequiredArgs required) {
assert(std::all_of(argTypes.begin(), argTypes.end(),
- std::mem_fun_ref(&CanQualType::isCanonicalAsParam)));
+ [](CanQualType T) { return T.isCanonicalAsParam(); }));
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
@@ -1620,15 +1659,113 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
+void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs) {
+ // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
+ if (!HasOptnone) {
+ if (CodeGenOpts.OptimizeSize)
+ FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
+ if (CodeGenOpts.OptimizeSize == 2)
+ FuncAttrs.addAttribute(llvm::Attribute::MinSize);
+ }
+
+ if (CodeGenOpts.DisableRedZone)
+ FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+ if (CodeGenOpts.NoImplicitFloat)
+ FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+
+ if (AttrOnCallSite) {
+ // Attributes that should go on the call site only.
+ if (!CodeGenOpts.SimplifyLibCalls ||
+ CodeGenOpts.isNoBuiltinFunc(Name.data()))
+ FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
+ if (!CodeGenOpts.TrapFuncName.empty())
+ FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
+ } else {
+ // Attributes that should go on the function, but not the call site.
+ if (!CodeGenOpts.DisableFPElim) {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+ } else if (CodeGenOpts.OmitLeafFramePointer) {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+ FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ } else {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
+ FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ }
+
+ FuncAttrs.addAttribute("less-precise-fpmad",
+ llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
+
+ if (!CodeGenOpts.FPDenormalMode.empty())
+ FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
+
+ FuncAttrs.addAttribute("no-trapping-math",
+ llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+
+ // TODO: Are these all needed?
+ // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
+ FuncAttrs.addAttribute("no-infs-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
+ FuncAttrs.addAttribute("no-nans-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
+ FuncAttrs.addAttribute("unsafe-fp-math",
+ llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
+ FuncAttrs.addAttribute("use-soft-float",
+ llvm::toStringRef(CodeGenOpts.SoftFloat));
+ FuncAttrs.addAttribute("stack-protector-buffer-size",
+ llvm::utostr(CodeGenOpts.SSPBufferSize));
+ FuncAttrs.addAttribute("no-signed-zeros-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoSignedZeros));
+ FuncAttrs.addAttribute(
+ "correctly-rounded-divide-sqrt-fp-math",
+ llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
+
+ // TODO: Reciprocal estimate codegen options should apply to instructions?
+ std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
+ if (!Recips.empty())
+ FuncAttrs.addAttribute("reciprocal-estimates",
+ llvm::join(Recips.begin(), Recips.end(), ","));
+
+ if (CodeGenOpts.StackRealignment)
+ FuncAttrs.addAttribute("stackrealign");
+ if (CodeGenOpts.Backchain)
+ FuncAttrs.addAttribute("backchain");
+ }
+
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all functions and calls in CUDA as convergent
+ // (meaning, they may call an intrinsically convergent op, such as
+ // __syncthreads(), and so can't have certain optimizations applied around
+ // them). LLVM will remove this attribute where it safely can.
+ FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+ // Exceptions aren't supported in CUDA device code.
+ FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+
+ // Respect -fcuda-flush-denormals-to-zero.
+ if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+ FuncAttrs.addAttribute("nvptx-f32ftz", "true");
+ }
+}
+
+void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
+ llvm::AttrBuilder FuncAttrs;
+ ConstructDefaultFnAttrList(F.getName(),
+ F.hasFnAttribute(llvm::Attribute::OptimizeNone),
+ /* AttrOnCallsite = */ false, FuncAttrs);
+ llvm::AttributeList AS = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);
+ F.addAttributes(llvm::AttributeList::FunctionIndex, AS);
+}
+
void CodeGenModule::ConstructAttributeList(
StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo,
AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) {
llvm::AttrBuilder FuncAttrs;
llvm::AttrBuilder RetAttrs;
- bool HasOptnone = false;
CallingConv = FI.getEffectiveCallingConvention();
-
if (FI.isNoReturn())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
@@ -1639,7 +1776,7 @@ void CodeGenModule::ConstructAttributeList(
const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
- bool HasAnyX86InterruptAttr = false;
+ bool HasOptnone = false;
// FIXME: handle sseregparm someday...
if (TargetDecl) {
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1679,7 +1816,6 @@ void CodeGenModule::ConstructAttributeList(
if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
RetAttrs.addAttribute(llvm::Attribute::NonNull);
- HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Optional<unsigned> NumElemsParam;
@@ -1691,86 +1827,19 @@ void CodeGenModule::ConstructAttributeList(
}
}
- // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
- if (!HasOptnone) {
- if (CodeGenOpts.OptimizeSize)
- FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
- if (CodeGenOpts.OptimizeSize == 2)
- FuncAttrs.addAttribute(llvm::Attribute::MinSize);
- }
+ ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
- if (CodeGenOpts.DisableRedZone)
- FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
- if (CodeGenOpts.NoImplicitFloat)
- FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
if (CodeGenOpts.EnableSegmentedStacks &&
!(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
FuncAttrs.addAttribute("split-stack");
- if (AttrOnCallSite) {
- // Attributes that should go on the call site only.
- if (!CodeGenOpts.SimplifyLibCalls ||
- CodeGenOpts.isNoBuiltinFunc(Name.data()))
- FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
- if (!CodeGenOpts.TrapFuncName.empty())
- FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
- } else {
- // Attributes that should go on the function, but not the call site.
- if (!CodeGenOpts.DisableFPElim) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- } else if (CodeGenOpts.OmitLeafFramePointer) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- } else {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- }
-
+ if (!AttrOnCallSite) {
bool DisableTailCalls =
- CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
- (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
- FuncAttrs.addAttribute(
- "disable-tail-calls",
- llvm::toStringRef(DisableTailCalls));
-
- FuncAttrs.addAttribute("less-precise-fpmad",
- llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
-
- if (!CodeGenOpts.FPDenormalMode.empty())
- FuncAttrs.addAttribute("denormal-fp-math",
- CodeGenOpts.FPDenormalMode);
-
- FuncAttrs.addAttribute("no-trapping-math",
- llvm::toStringRef(CodeGenOpts.NoTrappingMath));
-
- // TODO: Are these all needed?
- // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
- FuncAttrs.addAttribute("no-infs-fp-math",
- llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
- FuncAttrs.addAttribute("no-nans-fp-math",
- llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
- FuncAttrs.addAttribute("unsafe-fp-math",
- llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
- FuncAttrs.addAttribute("use-soft-float",
- llvm::toStringRef(CodeGenOpts.SoftFloat));
- FuncAttrs.addAttribute("stack-protector-buffer-size",
- llvm::utostr(CodeGenOpts.SSPBufferSize));
- FuncAttrs.addAttribute("no-signed-zeros-fp-math",
- llvm::toStringRef(CodeGenOpts.NoSignedZeros));
- FuncAttrs.addAttribute(
- "correctly-rounded-divide-sqrt-fp-math",
- llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
-
- // TODO: Reciprocal estimate codegen options should apply to instructions?
- std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
- if (!Recips.empty())
- FuncAttrs.addAttribute("reciprocal-estimates",
- llvm::join(Recips.begin(), Recips.end(), ","));
-
- if (CodeGenOpts.StackRealignment)
- FuncAttrs.addAttribute("stackrealign");
- if (CodeGenOpts.Backchain)
- FuncAttrs.addAttribute("backchain");
+ CodeGenOpts.DisableTailCalls ||
+ (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+ TargetDecl->hasAttr<AnyX86InterruptAttr>()));
+ FuncAttrs.addAttribute("disable-tail-calls",
+ llvm::toStringRef(DisableTailCalls));
// Add target-cpu and target-features attributes to functions. If
// we have a decl for the function and it has a target attribute then
@@ -1819,21 +1888,6 @@ void CodeGenModule::ConstructAttributeList(
}
}
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
- // Conservatively, mark all functions and calls in CUDA as convergent
- // (meaning, they may call an intrinsically convergent op, such as
- // __syncthreads(), and so can't have certain optimizations applied around
- // them). LLVM will remove this attribute where it safely can.
- FuncAttrs.addAttribute(llvm::Attribute::Convergent);
-
- // Exceptions aren't supported in CUDA device code.
- FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
-
- // Respect -fcuda-flush-denormals-to-zero.
- if (getLangOpts().CUDADeviceFlushDenormalsToZero)
- FuncAttrs.addAttribute("nvptx-f32ftz", "true");
- }
-
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
QualType RetTy = FI.getReturnType();
@@ -1878,8 +1932,8 @@ void CodeGenModule::ConstructAttributeList(
// Attach return attributes.
if (RetAttrs.hasAttributes()) {
- PAL.push_back(llvm::AttributeSet::get(
- getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs));
+ PAL.push_back(llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::ReturnIndex, RetAttrs));
}
bool hasUsedSRet = false;
@@ -1891,7 +1945,7 @@ void CodeGenModule::ConstructAttributeList(
hasUsedSRet = true;
if (RetAI.getInReg())
SRETAttrs.addAttribute(llvm::Attribute::InReg);
- PAL.push_back(llvm::AttributeSet::get(
+ PAL.push_back(llvm::AttributeList::get(
getLLVMContext(), IRFunctionArgs.getSRetArgNo() + 1, SRETAttrs));
}
@@ -1899,7 +1953,7 @@ void CodeGenModule::ConstructAttributeList(
if (IRFunctionArgs.hasInallocaArg()) {
llvm::AttrBuilder Attrs;
Attrs.addAttribute(llvm::Attribute::InAlloca);
- PAL.push_back(llvm::AttributeSet::get(
+ PAL.push_back(llvm::AttributeList::get(
getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs));
}
@@ -1914,7 +1968,7 @@ void CodeGenModule::ConstructAttributeList(
// Add attribute for padding argument, if necessary.
if (IRFunctionArgs.hasPaddingArg(ArgNo)) {
if (AI.getPaddingInReg())
- PAL.push_back(llvm::AttributeSet::get(
+ PAL.push_back(llvm::AttributeList::get(
getLLVMContext(), IRFunctionArgs.getPaddingArgNo(ArgNo) + 1,
llvm::Attribute::InReg));
}
@@ -2031,17 +2085,15 @@ void CodeGenModule::ConstructAttributeList(
unsigned FirstIRArg, NumIRArgs;
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
for (unsigned i = 0; i < NumIRArgs; i++)
- PAL.push_back(llvm::AttributeSet::get(getLLVMContext(),
- FirstIRArg + i + 1, Attrs));
+ PAL.push_back(llvm::AttributeList::get(getLLVMContext(),
+ FirstIRArg + i + 1, Attrs));
}
}
assert(ArgNo == FI.arg_size());
if (FuncAttrs.hasAttributes())
- PAL.push_back(llvm::
- AttributeSet::get(getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- FuncAttrs));
+ PAL.push_back(llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs));
}
/// An argument came in as a promoted argument; demote it back to its
@@ -2152,8 +2204,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (IRFunctionArgs.hasSRetArg()) {
auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]);
AI->setName("agg.result");
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
- llvm::Attribute::NoAlias));
+ AI->addAttr(llvm::AttributeList::get(getLLVMContext(), AI->getArgNo() + 1,
+ llvm::Attribute::NoAlias));
}
// Track if we received the parameter as a pointer (indirect, byval, or
@@ -2244,9 +2296,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
PVD->getFunctionScopeIndex()))
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NonNull));
+ AI->addAttr(llvm::AttributeList::get(getLLVMContext(),
+ AI->getArgNo() + 1,
+ llvm::Attribute::NonNull));
QualType OTy = PVD->getOriginalType();
if (const auto *ArrTy =
@@ -2263,12 +2315,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::AttrBuilder Attrs;
Attrs.addDereferenceableAttr(
getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1, Attrs));
+ AI->addAttr(llvm::AttributeList::get(
+ getLLVMContext(), AI->getArgNo() + 1, Attrs));
} else if (getContext().getTargetAddressSpace(ETy) == 0) {
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NonNull));
+ AI->addAttr(llvm::AttributeList::get(getLLVMContext(),
+ AI->getArgNo() + 1,
+ llvm::Attribute::NonNull));
}
}
} else if (const auto *ArrTy =
@@ -2278,9 +2330,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// we know that it must be nonnull.
if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
!getContext().getTargetAddressSpace(ArrTy->getElementType()))
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NonNull));
+ AI->addAttr(llvm::AttributeList::get(getLLVMContext(),
+ AI->getArgNo() + 1,
+ llvm::Attribute::NonNull));
}
const auto *AVAttr = PVD->getAttr<AlignValueAttr>();
@@ -2298,15 +2350,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::AttrBuilder Attrs;
Attrs.addAlignmentAttr(Alignment);
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1, Attrs));
+ AI->addAttr(llvm::AttributeList::get(getLLVMContext(),
+ AI->getArgNo() + 1, Attrs));
}
}
if (Arg->getType().isRestrictQualified())
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NoAlias));
+ AI->addAttr(llvm::AttributeList::get(
+ getLLVMContext(), AI->getArgNo() + 1, llvm::Attribute::NoAlias));
// LLVM expects swifterror parameters to be used in very restricted
// ways. Copy the value into a less-restricted temporary.
@@ -2858,19 +2909,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
llvm::Instruction *Ret;
if (RV) {
- if (CurCodeDecl && SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) {
- if (auto RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>()) {
- SanitizerScope SanScope(this);
- llvm::Value *Cond = Builder.CreateICmpNE(
- RV, llvm::Constant::getNullValue(RV->getType()));
- llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(EndLoc),
- EmitCheckSourceLocation(RetNNAttr->getLocation()),
- };
- EmitCheck(std::make_pair(Cond, SanitizerKind::ReturnsNonnullAttribute),
- SanitizerHandler::NonnullReturn, StaticData, None);
- }
- }
+ EmitReturnValueCheck(RV, EndLoc);
Ret = Builder.CreateRet(RV);
} else {
Ret = Builder.CreateRetVoid();
@@ -2880,6 +2919,63 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
Ret->setDebugLoc(std::move(RetDbgLoc));
}
+void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV,
+ SourceLocation EndLoc) {
+ // A current decl may not be available when emitting vtable thunks.
+ if (!CurCodeDecl)
+ return;
+
+ ReturnsNonNullAttr *RetNNAttr = nullptr;
+ if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute))
+ RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>();
+
+ if (!RetNNAttr && !requiresReturnValueNullabilityCheck())
+ return;
+
+ // Prefer the returns_nonnull attribute if it's present.
+ SourceLocation AttrLoc;
+ SanitizerMask CheckKind;
+ SanitizerHandler Handler;
+ if (RetNNAttr) {
+ assert(!requiresReturnValueNullabilityCheck() &&
+ "Cannot check nullability and the nonnull attribute");
+ AttrLoc = RetNNAttr->getLocation();
+ CheckKind = SanitizerKind::ReturnsNonnullAttribute;
+ Handler = SanitizerHandler::NonnullReturn;
+ } else {
+ if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl))
+ if (auto *TSI = DD->getTypeSourceInfo())
+ if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>())
+ AttrLoc = FTL.getReturnLoc().findNullabilityLoc();
+ CheckKind = SanitizerKind::NullabilityReturn;
+ Handler = SanitizerHandler::NullabilityReturn;
+ }
+
+ SanitizerScope SanScope(this);
+
+ llvm::BasicBlock *Check = nullptr;
+ llvm::BasicBlock *NoCheck = nullptr;
+ if (requiresReturnValueNullabilityCheck()) {
+ // Before doing the nullability check, make sure that the preconditions for
+ // the check are met.
+ Check = createBasicBlock("nullcheck");
+ NoCheck = createBasicBlock("no.nullcheck");
+ Builder.CreateCondBr(RetValNullabilityPrecondition, Check, NoCheck);
+ EmitBlock(Check);
+ }
+
+ // Now do the null check. If the returns_nonnull attribute is present, this
+ // is done unconditionally.
+ llvm::Value *Cond = Builder.CreateIsNotNull(RV);
+ llvm::Constant *StaticData[] = {
+ EmitCheckSourceLocation(EndLoc), EmitCheckSourceLocation(AttrLoc),
+ };
+ EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None);
+
+ if (requiresReturnValueNullabilityCheck())
+ EmitBlock(NoCheck);
+}
+
static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
@@ -3188,50 +3284,63 @@ void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
SourceLocation ArgLoc,
- const FunctionDecl *FD,
+ AbstractCallee AC,
unsigned ParmNum) {
- if (!SanOpts.has(SanitizerKind::NonnullAttribute) || !FD)
+ if (!AC.getDecl() || !(SanOpts.has(SanitizerKind::NonnullAttribute) ||
+ SanOpts.has(SanitizerKind::NullabilityArg)))
return;
- auto PVD = ParmNum < FD->getNumParams() ? FD->getParamDecl(ParmNum) : nullptr;
+
+ // The param decl may be missing in a variadic function.
+ auto PVD = ParmNum < AC.getNumParams() ? AC.getParamDecl(ParmNum) : nullptr;
unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum;
- auto NNAttr = getNonNullAttr(FD, PVD, ArgType, ArgNo);
- if (!NNAttr)
+
+ // Prefer the nonnull attribute if it's present.
+ const NonNullAttr *NNAttr = nullptr;
+ if (SanOpts.has(SanitizerKind::NonnullAttribute))
+ NNAttr = getNonNullAttr(AC.getDecl(), PVD, ArgType, ArgNo);
+
+ bool CanCheckNullability = false;
+ if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) {
+ auto Nullability = PVD->getType()->getNullability(getContext());
+ CanCheckNullability = Nullability &&
+ *Nullability == NullabilityKind::NonNull &&
+ PVD->getTypeSourceInfo();
+ }
+
+ if (!NNAttr && !CanCheckNullability)
return;
+
+ SourceLocation AttrLoc;
+ SanitizerMask CheckKind;
+ SanitizerHandler Handler;
+ if (NNAttr) {
+ AttrLoc = NNAttr->getLocation();
+ CheckKind = SanitizerKind::NonnullAttribute;
+ Handler = SanitizerHandler::NonnullArg;
+ } else {
+ AttrLoc = PVD->getTypeSourceInfo()->getTypeLoc().findNullabilityLoc();
+ CheckKind = SanitizerKind::NullabilityArg;
+ Handler = SanitizerHandler::NullabilityArg;
+ }
+
SanitizerScope SanScope(this);
assert(RV.isScalar());
llvm::Value *V = RV.getScalarVal();
llvm::Value *Cond =
Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType()));
llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(ArgLoc),
- EmitCheckSourceLocation(NNAttr->getLocation()),
+ EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc),
llvm::ConstantInt::get(Int32Ty, ArgNo + 1),
};
- EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute),
- SanitizerHandler::NonnullArg, StaticData, None);
+ EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None);
}
void CodeGenFunction::EmitCallArgs(
CallArgList &Args, ArrayRef<QualType> ArgTypes,
llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
- const FunctionDecl *CalleeDecl, unsigned ParamsToSkip,
- EvaluationOrder Order) {
+ AbstractCallee AC, unsigned ParamsToSkip, EvaluationOrder Order) {
assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin()));
- auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg) {
- if (CalleeDecl == nullptr || I >= CalleeDecl->getNumParams())
- return;
- auto *PS = CalleeDecl->getParamDecl(I)->getAttr<PassObjectSizeAttr>();
- if (PS == nullptr)
- return;
-
- const auto &Context = getContext();
- auto SizeTy = Context.getSizeType();
- auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
- llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T);
- Args.add(RValue::get(V), SizeTy);
- };
-
// We *have* to evaluate arguments from right to left in the MS C++ ABI,
// because arguments are destroyed left to right in the callee. As a special
// case, there are certain language constructs that require left-to-right
@@ -3242,6 +3351,27 @@ void CodeGenFunction::EmitCallArgs(
? Order == EvaluationOrder::ForceLeftToRight
: Order != EvaluationOrder::ForceRightToLeft;
+ auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg,
+ RValue EmittedArg) {
+ if (!AC.hasFunctionDecl() || I >= AC.getNumParams())
+ return;
+ auto *PS = AC.getParamDecl(I)->getAttr<PassObjectSizeAttr>();
+ if (PS == nullptr)
+ return;
+
+ const auto &Context = getContext();
+ auto SizeTy = Context.getSizeType();
+ auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
+ assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?");
+ llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T,
+ EmittedArg.getScalarVal());
+ Args.add(RValue::get(V), SizeTy);
+ // If we're emitting args in reverse, be sure to do so with
+ // pass_object_size, as well.
+ if (!LeftToRight)
+ std::swap(Args.back(), *(&Args.back() - 1));
+ };
+
// Insert a stack save if we're going to need any inalloca args.
bool HasInAllocaArgs = false;
if (CGM.getTarget().getCXXABI().isMicrosoft()) {
@@ -3259,11 +3389,20 @@ void CodeGenFunction::EmitCallArgs(
for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
unsigned Idx = LeftToRight ? I : E - I - 1;
CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx;
- if (!LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg);
+ unsigned InitialArgSize = Args.size();
EmitCallArg(Args, *Arg, ArgTypes[Idx]);
- EmitNonNullArgCheck(Args.back().RV, ArgTypes[Idx], (*Arg)->getExprLoc(),
- CalleeDecl, ParamsToSkip + Idx);
- if (LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg);
+ // In particular, we depend on it being the last arg in Args, and the
+ // objectsize bits depend on there only being one arg if !LeftToRight.
+ assert(InitialArgSize + 1 == Args.size() &&
+ "The code below depends on only adding one arg per EmitCallArg");
+ (void)InitialArgSize;
+ RValue RVArg = Args.back().RV;
+ EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
+ ParamsToSkip + Idx);
+ // @llvm.objectsize should never have side-effects and shouldn't need
+ // destruction/cleanups, so we can safely "emit" it after its arg,
+ // regardless of right-to-leftness
+ MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
}
if (!LeftToRight) {
@@ -3571,12 +3710,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Address ArgMemory = Address::invalid();
const llvm::StructLayout *ArgMemoryLayout = nullptr;
if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
- ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct);
+ const llvm::DataLayout &DL = CGM.getDataLayout();
+ ArgMemoryLayout = DL.getStructLayout(ArgStruct);
llvm::Instruction *IP = CallArgs.getStackBase();
llvm::AllocaInst *AI;
if (IP) {
IP = IP->getNextNode();
- AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+ AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(),
+ "argmem", IP);
} else {
AI = CreateTempAlloca(ArgStruct, "argmem");
}
@@ -3977,8 +4118,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Callee.getAbstractInfo(),
AttributeList, CallingConv,
/*AttrOnCallSite=*/true);
- llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
- AttributeList);
+ llvm::AttributeList Attrs =
+ llvm::AttributeList::get(getLLVMContext(), AttributeList);
// Apply some call-site-specific attributes.
// TODO: work this into building the attribute set.
@@ -3989,15 +4130,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
!(Callee.getAbstractInfo().getCalleeDecl() &&
Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) {
Attrs =
- Attrs.addAttribute(getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
+ Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::AlwaysInline);
}
// Disable inlining inside SEH __try blocks.
if (isSEHTryScope()) {
Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+ Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoInline);
}
@@ -4014,7 +4154,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CannotThrow = true;
} else {
// Otherwise, nounwind call sites will never throw.
- CannotThrow = Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
+ CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
}
llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
@@ -4210,6 +4350,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(),
OffsetValue);
+ } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
+ llvm::Value *ParamVal =
+ CallArgs[AA->getParamIndex() - 1].RV.getScalarVal();
+ EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
}
}
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 031ce831cb375..97221e20c1959 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -25,10 +25,10 @@
#include "ABIInfo.h"
namespace llvm {
- class AttributeSet;
- class Function;
- class Type;
- class Value;
+class AttributeList;
+class Function;
+class Type;
+class Value;
}
namespace clang {
@@ -39,28 +39,28 @@ namespace clang {
class VarDecl;
namespace CodeGen {
- typedef SmallVector<llvm::AttributeSet, 8> AttributeListType;
-
- /// Abstract information about a function or function prototype.
- class CGCalleeInfo {
- /// \brief The function prototype of the callee.
- const FunctionProtoType *CalleeProtoTy;
- /// \brief The function declaration of the callee.
- const Decl *CalleeDecl;
-
- public:
- explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
- CGCalleeInfo(const Decl *calleeDecl)
- : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
-
- const FunctionProtoType *getCalleeFunctionProtoType() const {
- return CalleeProtoTy;
- }
- const Decl *getCalleeDecl() const { return CalleeDecl; }
+typedef SmallVector<llvm::AttributeList, 8> AttributeListType;
+
+/// Abstract information about a function or function prototype.
+class CGCalleeInfo {
+ /// \brief The function prototype of the callee.
+ const FunctionProtoType *CalleeProtoTy;
+ /// \brief The function declaration of the callee.
+ const Decl *CalleeDecl;
+
+public:
+ explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
+ CGCalleeInfo(const Decl *calleeDecl)
+ : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
+
+ const FunctionProtoType *getCalleeFunctionProtoType() const {
+ return CalleeProtoTy;
+ }
+ const Decl *getCalleeDecl() const { return CalleeDecl; }
};
/// All available information about a concrete callee.
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 05d056739524d..7ba03a0d42dd4 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -309,8 +309,10 @@ Address CodeGenFunction::GetAddressOfBaseClass(
// just do a bitcast; null checks are unnecessary.
if (NonVirtualOffset.isZero() && !VBase) {
if (sanitizePerformTypeCheck()) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::Null, !NullCheckValue);
EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(),
- DerivedTy, DerivedAlign, !NullCheckValue);
+ DerivedTy, DerivedAlign, SkippedChecks);
}
return Builder.CreateBitCast(Value, BasePtrTy);
}
@@ -331,8 +333,10 @@ Address CodeGenFunction::GetAddressOfBaseClass(
}
if (sanitizePerformTypeCheck()) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::Null, true);
EmitTypeCheck(VBase ? TCK_UpcastToVirtualBase : TCK_Upcast, Loc,
- Value.getPointer(), DerivedTy, DerivedAlign, true);
+ Value.getPointer(), DerivedTy, DerivedAlign, SkippedChecks);
}
// Compute the virtual offset.
@@ -685,7 +689,8 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS,
/// complete-to-base constructor delegation optimization, i.e.
/// emitting the complete constructor as a simple call to the base
/// constructor.
-static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor) {
+bool CodeGenFunction::IsConstructorDelegationValid(
+ const CXXConstructorDecl *Ctor) {
// Currently we disable the optimization for classes with virtual
// bases because (1) the addresses of parameter variables need to be
@@ -1131,10 +1136,11 @@ namespace {
RHS = EC->getSubExpr();
if (!RHS)
return nullptr;
- MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
- if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
- return nullptr;
- return Field;
+ if (MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS)) {
+ if (ME2->getMemberDecl() == Field)
+ return Field;
+ }
+ return nullptr;
} else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
if (!(MD && isMemcpyEquivalentSpecialMember(MD)))
@@ -1384,6 +1390,20 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl());
CXXDtorType DtorType = CurGD.getDtorType();
+ // For an abstract class, non-base destructors are never used (and can't
+ // be emitted in general, because vbase dtors may not have been validated
+ // by Sema), but the Itanium ABI doesn't make them optional and Clang may
+ // in fact emit references to them from other compilations, so emit them
+ // as functions containing a trap instruction.
+ if (DtorType != Dtor_Base && Dtor->getParent()->isAbstract()) {
+ llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
+ return;
+ }
+
Stmt *Body = Dtor->getBody();
if (Body)
incrementProfileCounter(Body);
@@ -1416,9 +1436,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
// we'd introduce *two* handler blocks. In the Microsoft ABI, we
// always delegate because we might not have a definition in this TU.
switch (DtorType) {
- case Dtor_Comdat:
- llvm_unreachable("not expecting a COMDAT");
-
+ case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT");
case Dtor_Deleting: llvm_unreachable("already handled deleting case");
case Dtor_Complete:
@@ -1433,7 +1451,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
/*Delegating=*/false, LoadCXXThisAddress());
break;
}
+
// Fallthrough: act like we're in the base variant.
+ LLVM_FALLTHROUGH;
case Dtor_Base:
assert(Body);
@@ -1950,7 +1970,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
// Add the rest of the user-supplied arguments.
const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
- EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
+ EvaluationOrder Order = E->isListInitialization()
+ ? EvaluationOrder::ForceLeftToRight
+ : EvaluationOrder::Default;
+ EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(),
+ /*ParamsToSkip*/ 0, Order);
EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
}
@@ -1970,7 +1994,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
// Likewise if they're inalloca.
const CGFunctionInfo &Info =
- CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0);
+ CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0, 0);
if (Info.usesInAlloca())
return false;
}
@@ -2012,10 +2036,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
return;
}
+ bool PassPrototypeArgs = true;
// Check whether we can actually emit the constructor before trying to do so.
if (auto Inherited = D->getInheritedConstructor()) {
- if (getTypes().inheritingCtorHasParams(Inherited, Type) &&
- !canEmitDelegateCallArgs(*this, D, Type, Args)) {
+ PassPrototypeArgs = getTypes().inheritingCtorHasParams(Inherited, Type);
+ if (PassPrototypeArgs && !canEmitDelegateCallArgs(*this, D, Type, Args)) {
EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase,
Delegating, Args);
return;
@@ -2023,14 +2048,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
}
// Insert any ABI-specific implicit constructor arguments.
- unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
- *this, D, Type, ForVirtualBase, Delegating, Args);
+ CGCXXABI::AddedStructorArgs ExtraArgs =
+ CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase,
+ Delegating, Args);
// Emit the call.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(D, getFromCtorType(Type));
- const CGFunctionInfo &Info =
- CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs);
+ const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
+ Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
CGCallee Callee = CGCallee::forDirect(CalleePtr, D);
EmitCall(Info, Callee, ReturnValueSlot(), Args);
@@ -2102,7 +2128,9 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase,
bool Delegating, CallArgList &Args) {
- InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType));
+ GlobalDecl GD(Ctor, CtorType);
+ InlinedInheritingConstructorScope Scope(*this, GD);
+ ApplyInlineDebugLocation DebugScope(*this, GD);
// Save the arguments to be passed to the inherited constructor.
CXXInheritedCtorInitExprArgs = Args;
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 3666858e63d24..437ab7dd46493 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -418,11 +418,15 @@ void CodeGenFunction::ResolveBranchFixups(llvm::BasicBlock *Block) {
}
/// Pops cleanup blocks until the given savepoint is reached.
-void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) {
+void CodeGenFunction::PopCleanupBlocks(
+ EHScopeStack::stable_iterator Old,
+ std::initializer_list<llvm::Value **> ValuesToReload) {
assert(Old.isValid());
+ bool HadBranches = false;
while (EHStack.stable_begin() != Old) {
EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.begin());
+ HadBranches |= Scope.hasBranches();
// As long as Old strictly encloses the scope's enclosing normal
// cleanup, we're going to emit another normal cleanup which
@@ -432,14 +436,41 @@ void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) {
PopCleanupBlock(FallThroughIsBranchThrough);
}
+
+ // If we didn't have any branches, the insertion point before cleanups must
+ // dominate the current insertion point and we don't need to reload any
+ // values.
+ if (!HadBranches)
+ return;
+
+ // Spill and reload all values that the caller wants to be live at the current
+ // insertion point.
+ for (llvm::Value **ReloadedValue : ValuesToReload) {
+ auto *Inst = dyn_cast_or_null<llvm::Instruction>(*ReloadedValue);
+ if (!Inst)
+ continue;
+ Address Tmp =
+ CreateDefaultAlignTempAlloca(Inst->getType(), "tmp.exprcleanup");
+
+ // Find an insertion point after Inst and spill it to the temporary.
+ llvm::BasicBlock::iterator InsertBefore;
+ if (auto *Invoke = dyn_cast<llvm::InvokeInst>(Inst))
+ InsertBefore = Invoke->getNormalDest()->getFirstInsertionPt();
+ else
+ InsertBefore = std::next(Inst->getIterator());
+ CGBuilderTy(CGM, &*InsertBefore).CreateStore(Inst, Tmp);
+
+ // Reload the value at the current insertion point.
+ *ReloadedValue = Builder.CreateLoad(Tmp);
+ }
}
/// Pops cleanup blocks until the given savepoint is reached, then add the
/// cleanups from the given savepoint in the lifetime-extended cleanups stack.
-void
-CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old,
- size_t OldLifetimeExtendedSize) {
- PopCleanupBlocks(Old);
+void CodeGenFunction::PopCleanupBlocks(
+ EHScopeStack::stable_iterator Old, size_t OldLifetimeExtendedSize,
+ std::initializer_list<llvm::Value **> ValuesToReload) {
+ PopCleanupBlocks(Old, ValuesToReload);
// Move our deferred cleanups onto the EH stack.
for (size_t I = OldLifetimeExtendedSize,
@@ -578,7 +609,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF,
llvm::SwitchInst *si = cast<llvm::SwitchInst>(use.getUser());
if (si->getNumCases() == 1 && si->getDefaultDest() == unreachableBB) {
// Replace the switch with a branch.
- llvm::BranchInst::Create(si->case_begin().getCaseSuccessor(), si);
+ llvm::BranchInst::Create(si->case_begin()->getCaseSuccessor(), si);
// The switch operand is a load from the cleanup-dest alloca.
llvm::LoadInst *condition = cast<llvm::LoadInst>(si->getCondition());
diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp
index 2fdb1279ece97..0ef680ef66092 100644
--- a/lib/CodeGen/CGCoroutine.cpp
+++ b/lib/CodeGen/CGCoroutine.cpp
@@ -12,28 +12,58 @@
//===----------------------------------------------------------------------===//
#include "CodeGenFunction.h"
+#include "llvm/ADT/ScopeExit.h"
#include "clang/AST/StmtCXX.h"
using namespace clang;
using namespace CodeGen;
-namespace clang {
-namespace CodeGen {
+using llvm::Value;
+using llvm::BasicBlock;
+
+namespace {
+enum class AwaitKind { Init, Normal, Yield, Final };
+static constexpr llvm::StringLiteral AwaitKindStr[] = {"init", "await", "yield",
+ "final"};
+}
+
+struct clang::CodeGen::CGCoroData {
+ // What is the current await expression kind and how many
+ // await/yield expressions were encountered so far.
+ // These are used to generate pretty labels for await expressions in LLVM IR.
+ AwaitKind CurrentAwaitKind = AwaitKind::Init;
+ unsigned AwaitNum = 0;
+ unsigned YieldNum = 0;
+
+ // How many co_return statements are in the coroutine. Used to decide whether
+ // we need to add co_return; equivalent at the end of the user authored body.
+ unsigned CoreturnCount = 0;
+
+ // A branch to this block is emitted when coroutine needs to suspend.
+ llvm::BasicBlock *SuspendBB = nullptr;
+
+ // Stores the jump destination just before the coroutine memory is freed.
+ // This is the destination that every suspend point jumps to for the cleanup
+ // branch.
+ CodeGenFunction::JumpDest CleanupJD;
+
+ // Stores the jump destination just before the final suspend. The co_return
+ // statements jumps to this point after calling return_xxx promise member.
+ CodeGenFunction::JumpDest FinalJD;
-struct CGCoroData {
// Stores the llvm.coro.id emitted in the function so that we can supply it
// as the first argument to coro.begin, coro.alloc and coro.free intrinsics.
// Note: llvm.coro.id returns a token that cannot be directly expressed in a
// builtin.
llvm::CallInst *CoroId = nullptr;
+
// If coro.id came from the builtin, remember the expression to give better
// diagnostic. If CoroIdExpr is nullptr, the coro.id was created by
// EmitCoroutineBody.
CallExpr const *CoroIdExpr = nullptr;
};
-}
-}
+// Defining these here allows to keep CGCoroData private to this file.
clang::CodeGen::CodeGenFunction::CGCoroInfo::CGCoroInfo() {}
CodeGenFunction::CGCoroInfo::~CGCoroInfo() {}
@@ -59,19 +89,250 @@ static void createCoroData(CodeGenFunction &CGF,
CurCoro.Data->CoroIdExpr = CoroIdExpr;
}
+// Synthesize a pretty name for a suspend point.
+static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) {
+ unsigned No = 0;
+ switch (Kind) {
+ case AwaitKind::Init:
+ case AwaitKind::Final:
+ break;
+ case AwaitKind::Normal:
+ No = ++Coro.AwaitNum;
+ break;
+ case AwaitKind::Yield:
+ No = ++Coro.YieldNum;
+ break;
+ }
+ SmallString<32> Prefix(AwaitKindStr[static_cast<unsigned>(Kind)]);
+ if (No > 1) {
+ Twine(No).toVector(Prefix);
+ }
+ return Prefix;
+}
+
+// Emit suspend expression which roughly looks like:
+//
+// auto && x = CommonExpr();
+// if (!x.await_ready()) {
+// llvm_coro_save();
+// x.await_suspend(...); (*)
+// llvm_coro_suspend(); (**)
+// }
+// x.await_resume();
+//
+// where the result of the entire expression is the result of x.await_resume()
+//
+// (*) If x.await_suspend return type is bool, it allows to veto a suspend:
+// if (x.await_suspend(...))
+// llvm_coro_suspend();
+//
+// (**) llvm_coro_suspend() encodes three possible continuations as
+// a switch instruction:
+//
+// %where-to = call i8 @llvm.coro.suspend(...)
+// switch i8 %where-to, label %coro.ret [ ; jump to epilogue to suspend
+// i8 0, label %yield.ready ; go here when resumed
+// i8 1, label %yield.cleanup ; go here when destroyed
+// ]
+//
+// See llvm's docs/Coroutines.rst for more details.
+//
+static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
+ CoroutineSuspendExpr const &S,
+ AwaitKind Kind, AggValueSlot aggSlot,
+ bool ignoreResult) {
+ auto *E = S.getCommonExpr();
+ auto Binder =
+ CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
+ auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); });
+
+ auto Prefix = buildSuspendPrefixStr(Coro, Kind);
+ BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
+ BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
+ BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));
+
+ // If expression is ready, no need to suspend.
+ CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
+
+ // Otherwise, emit suspend logic.
+ CGF.EmitBlock(SuspendBlock);
+
+ auto &Builder = CGF.Builder;
+ llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
+ auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
+ auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
+
+ auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr());
+ if (SuspendRet != nullptr) {
+ // Veto suspension if requested by bool returning await_suspend.
+ assert(SuspendRet->getType()->isIntegerTy(1) &&
+ "Sema should have already checked that it is void or bool");
+ BasicBlock *RealSuspendBlock =
+ CGF.createBasicBlock(Prefix + Twine(".suspend.bool"));
+ CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock);
+ SuspendBlock = RealSuspendBlock;
+ CGF.EmitBlock(RealSuspendBlock);
+ }
+
+ // Emit the suspend point.
+ const bool IsFinalSuspend = (Kind == AwaitKind::Final);
+ llvm::Function *CoroSuspend =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
+ auto *SuspendResult = Builder.CreateCall(
+ CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
+
+ // Create a switch capturing three possible continuations.
+ auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
+ Switch->addCase(Builder.getInt8(0), ReadyBlock);
+ Switch->addCase(Builder.getInt8(1), CleanupBlock);
+
+ // Emit cleanup for this suspend point.
+ CGF.EmitBlock(CleanupBlock);
+ CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
+
+ // Emit await_resume expression.
+ CGF.EmitBlock(ReadyBlock);
+ return CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+}
+
+RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E,
+ AggValueSlot aggSlot,
+ bool ignoreResult) {
+ return emitSuspendExpression(*this, *CurCoro.Data, E,
+ CurCoro.Data->CurrentAwaitKind, aggSlot,
+ ignoreResult);
+}
+RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E,
+ AggValueSlot aggSlot,
+ bool ignoreResult) {
+ return emitSuspendExpression(*this, *CurCoro.Data, E, AwaitKind::Yield,
+ aggSlot, ignoreResult);
+}
+
+void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) {
+ ++CurCoro.Data->CoreturnCount;
+ EmitStmt(S.getPromiseCall());
+ EmitBranchThroughCleanup(CurCoro.Data->FinalJD);
+}
+
+// For WinEH exception representation backend need to know what funclet coro.end
+// belongs to. That information is passed in a funclet bundle.
+static SmallVector<llvm::OperandBundleDef, 1>
+getBundlesForCoroEnd(CodeGenFunction &CGF) {
+ SmallVector<llvm::OperandBundleDef, 1> BundleList;
+
+ if (llvm::Instruction *EHPad = CGF.CurrentFuncletPad)
+ BundleList.emplace_back("funclet", EHPad);
+
+ return BundleList;
+}
+
+namespace {
+// We will insert coro.end to cut any of the destructors for objects that
+// do not need to be destroyed once the coroutine is resumed.
+// See llvm/docs/Coroutines.rst for more details about coro.end.
+struct CallCoroEnd final : public EHScopeStack::Cleanup {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ auto &CGM = CGF.CGM;
+ auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy);
+ llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
+ // See if we have a funclet bundle to associate coro.end with. (WinEH)
+ auto Bundles = getBundlesForCoroEnd(CGF);
+ auto *CoroEnd = CGF.Builder.CreateCall(
+ CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles);
+ if (Bundles.empty()) {
+ // Otherwise, (landingpad model), create a conditional branch that leads
+ // either to a cleanup block or a block with EH resume instruction.
+ auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true);
+ auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont");
+ CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB);
+ CGF.EmitBlock(CleanupContBB);
+ }
+ }
+};
+}
+
+namespace {
+// Make sure to call coro.delete on scope exit.
+struct CallCoroDelete final : public EHScopeStack::Cleanup {
+ Stmt *Deallocate;
+
+ // TODO: Wrap deallocate in if(coro.free(...)) Deallocate.
+ void Emit(CodeGenFunction &CGF, Flags) override {
+ // Note: That deallocation will be emitted twice: once for a normal exit and
+ // once for exceptional exit. This usage is safe because Deallocate does not
+ // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr()
+ // builds a single call to a deallocation function which is safe to emit
+ // multiple times.
+ CGF.EmitStmt(Deallocate);
+ }
+ explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {}
+};
+}
+
void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
auto &TI = CGM.getContext().getTargetInfo();
unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth();
+ auto *FinalBB = createBasicBlock("coro.final");
+ auto *RetBB = createBasicBlock("coro.ret");
+
auto *CoroId = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::coro_id),
{Builder.getInt32(NewAlign), NullPtr, NullPtr, NullPtr});
createCoroData(*this, CurCoro, CoroId);
+ CurCoro.Data->SuspendBB = RetBB;
+
+ auto *AllocateCall = EmitScalarExpr(S.getAllocate());
+
+ // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
+ if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) {
+ auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure");
+ auto *InitBB = createBasicBlock("coro.init");
+
+ // See if allocation was successful.
+ auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy);
+ auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr);
+ Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB);
+
+ // If not, return OnAllocFailure object.
+ EmitBlock(RetOnFailureBB);
+ EmitStmt(RetOnAllocFailure);
+
+ EmitBlock(InitBB);
+ }
+
+ CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB);
+ {
+ CodeGenFunction::RunCleanupsScope ResumeScope(*this);
+ EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate());
+
+ EmitStmt(S.getPromiseDeclStmt());
+
+ EHStack.pushCleanup<CallCoroEnd>(EHCleanup);
+
+ CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB);
+
+ // FIXME: Emit initial suspend and more before the body.
+
+ CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal;
+ EmitStmt(S.getBody());
+
+ // See if we need to generate final suspend.
+ const bool CanFallthrough = Builder.GetInsertBlock();
+ const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0;
+ if (CanFallthrough || HasCoreturns) {
+ EmitBlock(FinalBB);
+ // FIXME: Emit final suspend.
+ }
+ }
+
+ EmitBlock(RetBB);
+ llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
+ Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()});
- EmitScalarExpr(S.getAllocate());
- // FIXME: Emit the rest of the coroutine.
- EmitStmt(S.getDeallocate());
+ // FIXME: Emit return for the coroutine return object.
}
// Emit coroutine intrinsic and patch up arguments of the token type.
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 12a68036b09c6..818b51543d30a 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -107,8 +107,8 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation,
// Construct a location that has a valid scope, but no line info.
assert(!DI->LexicalBlockStack.empty());
- CGF->Builder.SetCurrentDebugLocation(
- llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back()));
+ CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
+ 0, 0, DI->LexicalBlockStack.back(), DI->getInlinedAt()));
}
ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E)
@@ -134,6 +134,30 @@ ApplyDebugLocation::~ApplyDebugLocation() {
CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation));
}
+ApplyInlineDebugLocation::ApplyInlineDebugLocation(CodeGenFunction &CGF,
+ GlobalDecl InlinedFn)
+ : CGF(&CGF) {
+ if (!CGF.getDebugInfo()) {
+ this->CGF = nullptr;
+ return;
+ }
+ auto &DI = *CGF.getDebugInfo();
+ SavedLocation = DI.getLocation();
+ assert((DI.getInlinedAt() ==
+ CGF.Builder.getCurrentDebugLocation()->getInlinedAt()) &&
+ "CGDebugInfo and IRBuilder are out of sync");
+
+ DI.EmitInlineFunctionStart(CGF.Builder, InlinedFn);
+}
+
+ApplyInlineDebugLocation::~ApplyInlineDebugLocation() {
+ if (!CGF)
+ return;
+ auto &DI = *CGF->getDebugInfo();
+ DI.EmitInlineFunctionEnd(CGF->Builder);
+ DI.EmitLocation(CGF->Builder, SavedLocation);
+}
+
void CGDebugInfo::setLocation(SourceLocation Loc) {
// If the new location isn't valid return.
if (Loc.isInvalid())
@@ -249,8 +273,8 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
<< OC->getIdentifier()->getNameStart() << ')';
}
} else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
- OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '('
- << OCD->getIdentifier()->getNameStart() << ')';
+ OS << OCD->getClassInterface()->getName() << '('
+ << OCD->getName() << ')';
} else if (isa<ObjCProtocolDecl>(DC)) {
// We can extract the type of the class from the self pointer.
if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) {
@@ -509,7 +533,8 @@ void CGDebugInfo::CreateCompileUnit() {
Checksum),
Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers,
CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */,
- CGM.getCodeGenOpts().SplitDwarfInlining);
+ CGM.getCodeGenOpts().SplitDwarfInlining,
+ CGM.getCodeGenOpts().DebugInfoForProfiling);
}
llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -581,8 +606,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy);
case BuiltinType::OCLQueue:
return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
- case BuiltinType::OCLNDRange:
- return getOrCreateStructPtrType("opencl_ndrange_t", OCLNDRangeDITy);
case BuiltinType::OCLReserveID:
return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
@@ -793,17 +816,19 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
// Bit size, align and offset of the type.
// Size is always the size of a pointer. We can't use getTypeSize here
// because that does not return the correct value for references.
- unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
- uint64_t Size = CGM.getTarget().getPointerWidth(AS);
+ unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy);
+ uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace);
auto Align = getTypeAlignIfRequired(Ty, CGM.getContext());
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(AddressSpace);
if (Tag == llvm::dwarf::DW_TAG_reference_type ||
Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit),
- Size, Align);
+ Size, Align, DWARFAddressSpace);
else
return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size,
- Align);
+ Align, DWARFAddressSpace);
}
llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
@@ -1608,8 +1633,13 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy);
llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements);
unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
+ unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
+
llvm::DIType *vtbl_ptr_type =
- DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type");
+ DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace,
+ "__vtbl_ptr_type");
VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size);
return VTablePtrType;
}
@@ -1648,10 +1678,14 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
unsigned VSlotCount =
VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData;
unsigned VTableWidth = PtrWidth * VSlotCount;
+ unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
// Create a very wide void* type and insert it directly in the element list.
llvm::DIType *VTableType =
- DBuilder.createPointerType(nullptr, VTableWidth, 0, "__vtbl_ptr_type");
+ DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace,
+ "__vtbl_ptr_type");
EltTys.push_back(VTableType);
// The vptr is a pointer to this special vtable type.
@@ -1714,7 +1748,27 @@ void CGDebugInfo::completeType(const RecordDecl *RD) {
completeRequiredType(RD);
}
+/// Return true if the class or any of its methods are marked dllimport.
+static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) {
+ if (RD->hasAttr<DLLImportAttr>())
+ return true;
+ for (const CXXMethodDecl *MD : RD->methods())
+ if (MD->hasAttr<DLLImportAttr>())
+ return true;
+ return false;
+}
+
void CGDebugInfo::completeClassData(const RecordDecl *RD) {
+ if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+ if (CXXRD->isDynamicClass() &&
+ CGM.getVTableLinkage(CXXRD) ==
+ llvm::GlobalValue::AvailableExternallyLinkage &&
+ !isClassOrMethodDLLImport(CXXRD))
+ return;
+ completeClass(RD);
+}
+
+void CGDebugInfo::completeClass(const RecordDecl *RD) {
if (DebugKind <= codegenoptions::DebugLineTablesOnly)
return;
QualType Ty = CGM.getContext().getRecordType(RD);
@@ -1760,22 +1814,16 @@ static bool isDefinedInClangModule(const RecordDecl *RD) {
return true;
}
-/// Return true if the class or any of its methods are marked dllimport.
-static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) {
- if (RD->hasAttr<DLLImportAttr>())
- return true;
- for (const CXXMethodDecl *MD : RD->methods())
- if (MD->hasAttr<DLLImportAttr>())
- return true;
- return false;
-}
-
static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind,
bool DebugTypeExtRefs, const RecordDecl *RD,
const LangOptions &LangOpts) {
if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition()))
return true;
+ if (auto *ES = RD->getASTContext().getExternalSource())
+ if (ES->hasExternalDefinitions(RD) == ExternalASTSource::EK_Always)
+ return true;
+
if (DebugKind > codegenoptions::LimitedDebugInfo)
return false;
@@ -2009,7 +2057,11 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
if (CreateSkeletonCU && IsRootModule) {
// PCH files don't have a signature field in the control block,
// but LLVM detects skeleton CUs by looking for a non-zero DWO id.
- uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL;
+ // We use the lower 64 bits for debug info.
+ uint64_t Signature =
+ Mod.getSignature()
+ ? (uint64_t)Mod.getSignature()[1] << 32 | Mod.getSignature()[0]
+ : ~1ULL;
llvm::DIBuilder DIB(CGM.getModule());
DIB.createCompileUnit(TheCU->getSourceLanguage(),
DIB.createFile(Mod.getModuleName(), Mod.getPath()),
@@ -2408,6 +2460,21 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
FullName);
}
+llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
+ unsigned MType, SourceLocation LineLoc,
+ StringRef Name, StringRef Value) {
+ unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc);
+ return DBuilder.createMacro(Parent, Line, MType, Name, Value);
+}
+
+llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent,
+ SourceLocation LineLoc,
+ SourceLocation FileLoc) {
+ llvm::DIFile *FName = getOrCreateFile(FileLoc);
+ unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc);
+ return DBuilder.createTempMacroFile(Parent, Line, FName);
+}
+
static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
Qualifiers Quals;
do {
@@ -2451,8 +2518,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
case Type::SubstTemplateTypeParm:
T = cast<SubstTemplateTypeParmType>(T)->getReplacementType();
break;
- case Type::Auto: {
- QualType DT = cast<AutoType>(T)->getDeducedType();
+ case Type::Auto:
+ case Type::DeducedTemplateSpecialization: {
+ QualType DT = cast<DeducedType>(T)->getDeducedType();
assert(!DT.isNull() && "Undeduced types shouldn't reach here.");
T = DT;
break;
@@ -2488,11 +2556,17 @@ void CGDebugInfo::completeTemplateDefinition(
const ClassTemplateSpecializationDecl &SD) {
if (DebugKind <= codegenoptions::DebugLineTablesOnly)
return;
+ completeUnusedClass(SD);
+}
+
+void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) {
+ if (DebugKind <= codegenoptions::DebugLineTablesOnly)
+ return;
- completeClassData(&SD);
+ completeClassData(&D);
// In case this type has no member function definitions being emitted, ensure
// it is retained
- RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr());
+ RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr());
}
llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
@@ -2618,6 +2692,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
case Type::Attributed:
case Type::Adjusted:
case Type::Decayed:
+ case Type::DeducedTemplateSpecialization:
case Type::Elaborated:
case Type::Paren:
case Type::SubstTemplateTypeParm:
@@ -2774,9 +2849,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
}
// No need to replicate the linkage name if it isn't different from the
// subprogram name, no need to have it at all unless coverage is enabled or
- // debug is set to more than just line tables.
+ // debug is set to more than just line tables or extra debug info is needed.
if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs &&
!CGM.getCodeGenOpts().EmitGcovNotes &&
+ !CGM.getCodeGenOpts().DebugInfoForProfiling &&
DebugKind <= codegenoptions::DebugLineTablesOnly))
LinkageName = StringRef();
@@ -2844,28 +2920,40 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
}
-llvm::DISubprogram *
-CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
+llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
+ bool Stub) {
llvm::DINodeArray TParamsArray;
StringRef Name, LinkageName;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
- SourceLocation Loc = FD->getLocation();
+ SourceLocation Loc = GD.getDecl()->getLocation();
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
-
- collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext,
+ collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext,
TParamsArray, Flags);
+ auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
+
// Build function type.
SmallVector<QualType, 16> ArgTypes;
- for (const ParmVarDecl *Parm: FD->parameters())
- ArgTypes.push_back(Parm->getType());
+ if (FD)
+ for (const ParmVarDecl *Parm : FD->parameters())
+ ArgTypes.push_back(Parm->getType());
CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
QualType FnType = CGM.getContext().getFunctionType(
FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
+ if (Stub) {
+ return DBuilder.createFunction(
+ DContext, Name, LinkageName, Unit, Line,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
+ !FD->isExternallyVisible(),
+ /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize,
+ TParamsArray.get(), getFunctionDeclaration(FD));
+ }
+
llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
+ !FD->isExternallyVisible(),
/* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
TParamsArray.get(), getFunctionDeclaration(FD));
const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
@@ -2875,6 +2963,16 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
return SP;
}
+llvm::DISubprogram *
+CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
+ return getFunctionFwdDeclOrStub(GD, /* Stub = */ false);
+}
+
+llvm::DISubprogram *
+CGDebugInfo::getFunctionStub(GlobalDecl GD) {
+ return getFunctionFwdDeclOrStub(GD, /* Stub = */ true);
+}
+
llvm::DIGlobalVariable *
CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
QualType T;
@@ -3146,6 +3244,27 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
TParamsArray.get(), getFunctionDeclaration(D)));
}
+void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+ // If there is a subprogram for this function available then use it.
+ auto FI = SPCache.find(FD->getCanonicalDecl());
+ llvm::DISubprogram *SP = nullptr;
+ if (FI != SPCache.end())
+ SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second);
+ if (!SP)
+ SP = getFunctionStub(GD);
+ FnBeginRegionCount.push_back(LexicalBlockStack.size());
+ LexicalBlockStack.emplace_back(SP);
+ setInlinedAt(Builder.getCurrentDebugLocation());
+ EmitLocation(Builder, FD->getLocation());
+}
+
+void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) {
+ assert(CurInlinedAt && "unbalanced inline scope stack");
+ EmitFunctionEnd(Builder);
+ setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt());
+}
+
void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
@@ -3155,7 +3274,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
llvm::MDNode *Scope = LexicalBlockStack.back();
Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
- getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope));
+ getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope, CurInlinedAt));
}
void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
@@ -3167,14 +3286,29 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
getColumnNumber(CurLoc)));
}
+void CGDebugInfo::AppendAddressSpaceXDeref(
+ unsigned AddressSpace,
+ SmallVectorImpl<int64_t> &Expr) const {
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(AddressSpace);
+ if (!DWARFAddressSpace)
+ return;
+
+ Expr.push_back(llvm::dwarf::DW_OP_constu);
+ Expr.push_back(DWARFAddressSpace.getValue());
+ Expr.push_back(llvm::dwarf::DW_OP_swap);
+ Expr.push_back(llvm::dwarf::DW_OP_xderef);
+}
+
void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder,
SourceLocation Loc) {
// Set our current location.
setLocation(Loc);
// Emit a line table change for the current location inside the new scope.
- Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
- getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back()));
+ Builder.SetCurrentDebugLocation(
+ llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc),
+ LexicalBlockStack.back(), CurInlinedAt));
if (DebugKind <= codegenoptions::DebugLineTablesOnly)
return;
@@ -3316,13 +3450,16 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
Line = getLineNumber(VD->getLocation());
Column = getColumnNumber(VD->getLocation());
}
- SmallVector<int64_t, 9> Expr;
+ SmallVector<int64_t, 13> Expr;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
if (VD->isImplicit())
Flags |= llvm::DINode::FlagArtificial;
auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
+ unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType());
+ AppendAddressSpaceXDeref(AddressSpace, Expr);
+
// If this is the first argument and it is implicit then
// give it an object pointer flag.
// FIXME: There has to be a better way to do this, but for static
@@ -3360,9 +3497,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
Line, Ty, Align);
// Insert an llvm.dbg.declare into the current block.
- DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
- llvm::DebugLoc::get(Line, Column, Scope),
- Builder.GetInsertBlock());
+ DBuilder.insertDeclare(
+ Storage, D, DBuilder.createExpression(Expr),
+ llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
+ Builder.GetInsertBlock());
return;
} else if (isa<VariableArrayType>(VD->getType()))
Expr.push_back(llvm::dwarf::DW_OP_deref);
@@ -3393,9 +3531,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
Flags | llvm::DINode::FlagArtificial, FieldAlign);
// Insert an llvm.dbg.declare into the current block.
- DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
- llvm::DebugLoc::get(Line, Column, Scope),
- Builder.GetInsertBlock());
+ DBuilder.insertDeclare(
+ Storage, D, DBuilder.createExpression(Expr),
+ llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
+ Builder.GetInsertBlock());
}
}
}
@@ -3411,7 +3550,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
- llvm::DebugLoc::get(Line, Column, Scope),
+ llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
Builder.GetInsertBlock());
}
@@ -3492,7 +3631,8 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
Line, Ty, false, llvm::DINode::FlagZero, Align);
// Insert an llvm.dbg.declare into the current block.
- auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back());
+ auto DL =
+ llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt);
if (InsertPoint)
DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL,
InsertPoint);
@@ -3660,12 +3800,13 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
// Insert an llvm.dbg.value into the current block.
DBuilder.insertDbgValueIntrinsic(
LocalAddr, 0, debugVar, DBuilder.createExpression(),
- llvm::DebugLoc::get(line, column, scope), Builder.GetInsertBlock());
+ llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
+ Builder.GetInsertBlock());
}
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(),
- llvm::DebugLoc::get(line, column, scope),
+ llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
Builder.GetInsertBlock());
}
@@ -3747,9 +3888,16 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
GVE = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext);
} else {
auto Align = getDeclAlignIfRequired(D, CGM.getContext());
+
+ SmallVector<int64_t, 4> Expr;
+ unsigned AddressSpace =
+ CGM.getContext().getTargetAddressSpace(D->getType());
+ AppendAddressSpaceXDeref(AddressSpace, Expr);
+
GVE = DBuilder.createGlobalVariableExpression(
DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
- Var->hasLocalLinkage(), /*Expr=*/nullptr,
+ Var->hasLocalLinkage(),
+ Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
getOrCreateStaticDataMemberDeclarationOrNull(D), Align);
Var->addDebugInfo(GVE);
}
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index ac2e8dd2e0a45..5050ca0ad3fa0 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -61,6 +61,7 @@ class CGDebugInfo {
ModuleMap *ClangModuleMap = nullptr;
ExternalASTSource::ASTSourceDescriptor PCHDescriptor;
SourceLocation CurLoc;
+ llvm::MDNode *CurInlinedAt = nullptr;
llvm::DIType *VTablePtrType = nullptr;
llvm::DIType *ClassTy = nullptr;
llvm::DICompositeType *ObjTy = nullptr;
@@ -292,6 +293,15 @@ class CGDebugInfo {
/// Create a new lexical block node and push it on the stack.
void CreateLexicalBlock(SourceLocation Loc);
+ /// If target-specific LLVM \p AddressSpace directly maps to target-specific
+ /// DWARF address space, appends extended dereferencing mechanism to complex
+ /// expression \p Expr. Otherwise, does nothing.
+ ///
+ /// Extended dereferencing mechanism is has the following format:
+ /// DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ void AppendAddressSpaceXDeref(unsigned AddressSpace,
+ SmallVectorImpl<int64_t> &Expr) const;
+
public:
CGDebugInfo(CodeGenModule &CGM);
~CGDebugInfo();
@@ -320,6 +330,17 @@ public:
/// ignored.
void setLocation(SourceLocation Loc);
+ /// Return the current source location. This does not necessarily correspond
+ /// to the IRBuilder's current DebugLoc.
+ SourceLocation getLocation() const { return CurLoc; }
+
+ /// Update the current inline scope. All subsequent calls to \p EmitLocation
+ /// will create a location with this inlinedAt field.
+ void setInlinedAt(llvm::MDNode *InlinedAt) { CurInlinedAt = InlinedAt; }
+
+ /// \return the current inline scope.
+ llvm::MDNode *getInlinedAt() const { return CurInlinedAt; }
+
// Converts a SourceLocation to a DebugLoc
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Loc);
@@ -336,6 +357,11 @@ public:
SourceLocation ScopeLoc, QualType FnType,
llvm::Function *Fn, CGBuilderTy &Builder);
+ /// Start a new scope for an inlined function.
+ void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD);
+ /// End an inlined function scope.
+ void EmitInlineFunctionEnd(CGBuilderTy &Builder);
+
/// Emit debug info for a function declaration.
void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType);
@@ -409,9 +435,21 @@ public:
void completeType(const RecordDecl *RD);
void completeRequiredType(const RecordDecl *RD);
void completeClassData(const RecordDecl *RD);
+ void completeClass(const RecordDecl *RD);
void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD);
-
+ void completeUnusedClass(const CXXRecordDecl &D);
+
+ /// Create debug info for a macro defined by a #define directive or a macro
+ /// undefined by a #undef directive.
+ llvm::DIMacro *CreateMacro(llvm::DIMacroFile *Parent, unsigned MType,
+ SourceLocation LineLoc, StringRef Name,
+ StringRef Value);
+
+ /// Create debug info for a file referenced by an #include directive.
+ llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent,
+ SourceLocation LineLoc,
+ SourceLocation FileLoc);
private:
/// Emit call to llvm.dbg.declare for a variable declaration.
void EmitDeclare(const VarDecl *decl, llvm::Value *AI,
@@ -491,11 +529,18 @@ private:
llvm::DIDerivedType *
getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D);
+ /// Helper that either creates a forward declaration or a stub.
+ llvm::DISubprogram *getFunctionFwdDeclOrStub(GlobalDecl GD, bool Stub);
+
/// Create a subprogram describing the forward declaration
- /// represented in the given FunctionDecl.
- llvm::DISubprogram *getFunctionForwardDeclaration(const FunctionDecl *FD);
+ /// represented in the given FunctionDecl wrapped in a GlobalDecl.
+ llvm::DISubprogram *getFunctionForwardDeclaration(GlobalDecl GD);
+
+ /// Create a DISubprogram describing the function
+ /// represented in the given FunctionDecl wrapped in a GlobalDecl.
+ llvm::DISubprogram *getFunctionStub(GlobalDecl GD);
- /// Create a global variable describing the forward decalration
+ /// Create a global variable describing the forward declaration
/// represented in the given VarDecl.
llvm::DIGlobalVariable *
getGlobalVariableForwardDeclaration(const VarDecl *VD);
@@ -622,6 +667,20 @@ public:
};
+/// A scoped helper to set the current debug location to an inlined location.
+class ApplyInlineDebugLocation {
+ SourceLocation SavedLocation;
+ CodeGenFunction *CGF;
+
+public:
+ /// Set up the CodeGenFunction's DebugInfo to produce inline locations for the
+ /// function \p InlinedFn. The current debug location becomes the inlined call
+ /// site of the inlined function.
+ ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn);
+ /// Restore everything back to the orginial state.
+ ~ApplyInlineDebugLocation();
+};
+
} // namespace CodeGen
} // namespace clang
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 0a88b2310beb4..0f959043a22ed 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -50,6 +50,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::TemplateTypeParm:
case Decl::UnresolvedUsingValue:
case Decl::NonTypeTemplateParm:
+ case Decl::CXXDeductionGuide:
case Decl::CXXMethod:
case Decl::CXXConstructor:
case Decl::CXXDestructor:
@@ -671,6 +672,27 @@ static void drillIntoBlockVariable(CodeGenFunction &CGF,
lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var));
}
+void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS,
+ SourceLocation Loc) {
+ if (!SanOpts.has(SanitizerKind::NullabilityAssign))
+ return;
+
+ auto Nullability = LHS.getType()->getNullability(getContext());
+ if (!Nullability || *Nullability != NullabilityKind::NonNull)
+ return;
+
+ // Check if the right hand side of the assignment is nonnull, if the left
+ // hand side must be nonnull.
+ SanitizerScope SanScope(this);
+ llvm::Value *IsNotNull = Builder.CreateIsNotNull(RHS);
+ llvm::Constant *StaticData[] = {
+ EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(LHS.getType()),
+ llvm::ConstantInt::get(Int8Ty, 0), // The LogAlignment info is unused.
+ llvm::ConstantInt::get(Int8Ty, TCK_NonnullAssign)};
+ EmitCheck({{IsNotNull, SanitizerKind::NullabilityAssign}},
+ SanitizerHandler::TypeMismatch, StaticData, RHS);
+}
+
void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
LValue lvalue, bool capturedByInit) {
Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
@@ -678,6 +700,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
llvm::Value *value = EmitScalarExpr(init);
if (capturedByInit)
drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
+ EmitNullabilityCheck(lvalue, value, init->getExprLoc());
EmitStoreThroughLValue(RValue::get(value), lvalue, true);
return;
}
@@ -766,6 +789,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
+ EmitNullabilityCheck(lvalue, value, init->getExprLoc());
+
// If the variable might have been accessed by its initializer, we
// might have to initialize with a barrier. We have to do this for
// both __weak and __strong, but __weak got filtered out above.
@@ -1022,11 +1047,21 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Emit a lifetime intrinsic if meaningful. There's no point in doing this
// if we don't have a valid insertion point (?).
if (HaveInsertPoint() && !IsMSCatchParam) {
- // goto or switch-case statements can break lifetime into several
- // regions which need more efforts to handle them correctly. PR28267
- // This is rare case, but it's better just omit intrinsics than have
- // them incorrectly placed.
- if (!Bypasses.IsBypassed(&D)) {
+ // If there's a jump into the lifetime of this variable, its lifetime
+ // gets broken up into several regions in IR, which requires more work
+ // to handle correctly. For now, just omit the intrinsics; this is a
+ // rare case, and it's better to just be conservatively correct.
+ // PR28267.
+ //
+ // We have to do this in all language modes if there's a jump past the
+ // declaration. We also have to do it in C if there's a jump to an
+ // earlier point in the current block because non-VLA lifetimes begin as
+ // soon as the containing block is entered, not when its variables
+ // actually come into scope; suppressing the lifetime annotations
+ // completely in this case is unnecessarily pessimistic, but again, this
+ // is rare.
+ if (!Bypasses.IsBypassed(&D) &&
+ !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) {
uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy);
emission.SizeForLifetimeMarkers =
EmitLifetimeStart(size, address.getPointer());
@@ -1083,6 +1118,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if (D.hasAttr<AnnotateAttr>())
EmitVarAnnotations(&D, address.getPointer());
+ // Make sure we call @llvm.lifetime.end.
+ if (emission.useLifetimeMarkers())
+ EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
+ emission.getAllocatedAddress(),
+ emission.getSizeForLifetimeMarkers());
+
return emission;
}
@@ -1373,13 +1414,6 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
const VarDecl &D = *emission.Variable;
- // Make sure we call @llvm.lifetime.end. This needs to happen
- // *last*, so the cleanup needs to be pushed *first*.
- if (emission.useLifetimeMarkers())
- EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
- emission.getAllocatedAddress(),
- emission.getSizeForLifetimeMarkers());
-
// Check the type for a cleanup.
if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
emitAutoVarTypeCleanup(emission, dtorKind);
@@ -1691,17 +1725,19 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
/// Lazily declare the @llvm.lifetime.start intrinsic.
llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() {
- if (LifetimeStartFn) return LifetimeStartFn;
+ if (LifetimeStartFn)
+ return LifetimeStartFn;
LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(),
- llvm::Intrinsic::lifetime_start);
+ llvm::Intrinsic::lifetime_start, Int8PtrTy);
return LifetimeStartFn;
}
/// Lazily declare the @llvm.lifetime.end intrinsic.
llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() {
- if (LifetimeEndFn) return LifetimeEndFn;
+ if (LifetimeEndFn)
+ return LifetimeEndFn;
LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(),
- llvm::Intrinsic::lifetime_end);
+ llvm::Intrinsic::lifetime_end, Int8PtrTy);
return LifetimeEndFn;
}
@@ -1869,6 +1905,19 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
if (D.hasAttr<AnnotateAttr>())
EmitVarAnnotations(&D, DeclPtr.getPointer());
+
+ // We can only check return value nullability if all arguments to the
+ // function satisfy their nullability preconditions. This makes it necessary
+ // to emit null checks for args in the function body itself.
+ if (requiresReturnValueNullabilityCheck()) {
+ auto Nullability = Ty->getNullability(getContext());
+ if (Nullability && *Nullability == NullabilityKind::NonNull) {
+ SanitizerScope SanScope(this);
+ RetValNullabilityPrecondition =
+ Builder.CreateAnd(RetValNullabilityPrecondition,
+ Builder.CreateIsNotNull(Arg.getAnyValue()));
+ }
+ }
}
void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index f56e182169311..f61d60a63a6a2 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -237,7 +237,7 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
llvm::FunctionType::get(IntTy, dtorStub->getType(), false);
llvm::Constant *atexit =
- CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeSet(),
+ CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(),
/*Local=*/true);
if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit))
atexitFn->setDoesNotThrow();
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index f908bf2b3b0ac..ca1535182ec1b 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -180,8 +180,8 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
// The GCC runtime's personality function inherently doesn't support
// mixed EH. Use the C++ personality just to avoid returning null.
case ObjCRuntime::GCC:
- case ObjCRuntime::ObjFW: // XXX: this will change soon
- return EHPersonality::GNU_ObjC;
+ case ObjCRuntime::ObjFW:
+ return getObjCPersonality(T, L);
case ObjCRuntime::GNUstep:
return EHPersonality::GNU_ObjCXX;
}
@@ -231,7 +231,7 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true),
Personality.PersonalityFn,
- llvm::AttributeSet(), /*Local=*/true);
+ llvm::AttributeList(), /*Local=*/true);
}
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
@@ -1666,23 +1666,12 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy;
- llvm::Function *ParentFn = ParentCGF.CurFn;
const CGFunctionInfo &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args);
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
llvm::Function *Fn = llvm::Function::Create(
FnTy, llvm::GlobalValue::InternalLinkage, Name.str(), &CGM.getModule());
- // The filter is either in the same comdat as the function, or it's internal.
- if (llvm::Comdat *C = ParentFn->getComdat()) {
- Fn->setComdat(C);
- } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) {
- llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName());
- ParentFn->setComdat(C);
- Fn->setComdat(C);
- } else {
- Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- }
IsOutlinedSEHHelper = true;
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index e5e34a5f3ed60..265ef27a46b0f 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -71,7 +71,8 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
/// block.
llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
const Twine &Name) {
- return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
+ return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(),
+ nullptr, Name, AllocaInsertPt);
}
/// CreateDefaultAlignTempAlloca - This creates an alloca with the
@@ -534,7 +535,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const {
void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
llvm::Value *Ptr, QualType Ty,
- CharUnits Alignment, bool SkipNullCheck) {
+ CharUnits Alignment,
+ SanitizerSet SkippedChecks) {
if (!sanitizePerformTypeCheck())
return;
@@ -552,7 +554,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast ||
TCK == TCK_UpcastToVirtualBase;
if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) &&
- !SkipNullCheck) {
+ !SkippedChecks.has(SanitizerKind::Null)) {
// The glvalue must not be an empty glvalue.
llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr);
@@ -568,7 +570,9 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
}
}
- if (SanOpts.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) {
+ if (SanOpts.has(SanitizerKind::ObjectSize) &&
+ !SkippedChecks.has(SanitizerKind::ObjectSize) &&
+ !Ty->isIncompleteType()) {
uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity();
// The glvalue must refer to a large enough storage region.
@@ -578,22 +582,24 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy };
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys);
llvm::Value *Min = Builder.getFalse();
+ llvm::Value *NullIsUnknown = Builder.getFalse();
llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy);
- llvm::Value *LargeEnough =
- Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}),
- llvm::ConstantInt::get(IntPtrTy, Size));
+ llvm::Value *LargeEnough = Builder.CreateICmpUGE(
+ Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}),
+ llvm::ConstantInt::get(IntPtrTy, Size));
Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize));
}
uint64_t AlignVal = 0;
- if (SanOpts.has(SanitizerKind::Alignment)) {
+ if (SanOpts.has(SanitizerKind::Alignment) &&
+ !SkippedChecks.has(SanitizerKind::Alignment)) {
AlignVal = Alignment.getQuantity();
if (!Ty->isIncompleteType() && !AlignVal)
AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity();
// The glvalue must be suitably aligned.
- if (AlignVal) {
+ if (AlignVal > 1) {
llvm::Value *Align =
Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy),
llvm::ConstantInt::get(IntPtrTy, AlignVal - 1));
@@ -624,6 +630,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
// or call a non-static member function
CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
if (SanOpts.has(SanitizerKind::Vptr) &&
+ !SkippedChecks.has(SanitizerKind::Vptr) &&
(TCK == TCK_MemberAccess || TCK == TCK_MemberCall ||
TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference ||
TCK == TCK_UpcastToVirtualBase) &&
@@ -947,15 +954,47 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E,
E->getType());
}
+bool CodeGenFunction::IsWrappedCXXThis(const Expr *Obj) {
+ const Expr *Base = Obj;
+ while (!isa<CXXThisExpr>(Base)) {
+ // The result of a dynamic_cast can be null.
+ if (isa<CXXDynamicCastExpr>(Base))
+ return false;
+
+ if (const auto *CE = dyn_cast<CastExpr>(Base)) {
+ Base = CE->getSubExpr();
+ } else if (const auto *PE = dyn_cast<ParenExpr>(Base)) {
+ Base = PE->getSubExpr();
+ } else if (const auto *UO = dyn_cast<UnaryOperator>(Base)) {
+ if (UO->getOpcode() == UO_Extension)
+ Base = UO->getSubExpr();
+ else
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
LValue LV;
if (SanOpts.has(SanitizerKind::ArrayBounds) && isa<ArraySubscriptExpr>(E))
LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true);
else
LV = EmitLValue(E);
- if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple())
+ if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) {
+ SanitizerSet SkippedChecks;
+ if (const auto *ME = dyn_cast<MemberExpr>(E)) {
+ bool IsBaseCXXThis = IsWrappedCXXThis(ME->getBase());
+ if (IsBaseCXXThis)
+ SkippedChecks.set(SanitizerKind::Alignment, true);
+ if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase()))
+ SkippedChecks.set(SanitizerKind::Null, true);
+ }
EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(),
- E->getType(), LV.getAlignment());
+ E->getType(), LV.getAlignment(), SkippedChecks);
+ }
return LV;
}
@@ -1033,7 +1072,19 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
const auto *cleanups = cast<ExprWithCleanups>(E);
enterFullExpression(cleanups);
RunCleanupsScope Scope(*this);
- return EmitLValue(cleanups->getSubExpr());
+ LValue LV = EmitLValue(cleanups->getSubExpr());
+ if (LV.isSimple()) {
+ // Defend against branches out of gnu statement expressions surrounded by
+ // cleanups.
+ llvm::Value *V = LV.getPointer();
+ Scope.ForceCleanup({&V});
+ return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(),
+ getContext(), LV.getAlignmentSource(),
+ LV.getTBAAInfo());
+ }
+ // FIXME: Is it possible to create an ExprWithCleanups that produces a
+ // bitfield lvalue or some other non-simple lvalue?
+ return LV;
}
case Expr::CXXDefaultArgExprClass:
@@ -1265,6 +1316,53 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
return MDHelper.createRange(Min, End);
}
+bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
+ SourceLocation Loc) {
+ bool HasBoolCheck = SanOpts.has(SanitizerKind::Bool);
+ bool HasEnumCheck = SanOpts.has(SanitizerKind::Enum);
+ if (!HasBoolCheck && !HasEnumCheck)
+ return false;
+
+ bool IsBool = hasBooleanRepresentation(Ty) ||
+ NSAPI(CGM.getContext()).isObjCBOOLType(Ty);
+ bool NeedsBoolCheck = HasBoolCheck && IsBool;
+ bool NeedsEnumCheck = HasEnumCheck && Ty->getAs<EnumType>();
+ if (!NeedsBoolCheck && !NeedsEnumCheck)
+ return false;
+
+ // Single-bit booleans don't need to be checked. Special-case this to avoid
+ // a bit width mismatch when handling bitfield values. This is handled by
+ // EmitFromMemory for the non-bitfield case.
+ if (IsBool &&
+ cast<llvm::IntegerType>(Value->getType())->getBitWidth() == 1)
+ return false;
+
+ llvm::APInt Min, End;
+ if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool))
+ return true;
+
+ SanitizerScope SanScope(this);
+ llvm::Value *Check;
+ --End;
+ if (!Min) {
+ Check = Builder.CreateICmpULE(
+ Value, llvm::ConstantInt::get(getLLVMContext(), End));
+ } else {
+ llvm::Value *Upper = Builder.CreateICmpSLE(
+ Value, llvm::ConstantInt::get(getLLVMContext(), End));
+ llvm::Value *Lower = Builder.CreateICmpSGE(
+ Value, llvm::ConstantInt::get(getLLVMContext(), Min));
+ Check = Builder.CreateAnd(Upper, Lower);
+ }
+ llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc),
+ EmitCheckTypeDescriptor(Ty)};
+ SanitizerMask Kind =
+ NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool;
+ EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue,
+ StaticArgs, EmitCheckValue(Value));
+ return true;
+}
+
llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
QualType Ty,
SourceLocation Loc,
@@ -1273,26 +1371,28 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
QualType TBAABaseType,
uint64_t TBAAOffset,
bool isNontemporal) {
- // For better performance, handle vector loads differently.
- if (Ty->isVectorType()) {
- const llvm::Type *EltTy = Addr.getElementType();
-
- const auto *VTy = cast<llvm::VectorType>(EltTy);
-
- // Handle vectors of size 3 like size 4 for better performance.
- if (VTy->getNumElements() == 3) {
-
- // Bitcast to vec4 type.
- llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
- 4);
- Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
- // Now load value.
- llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-
- // Shuffle vector to get vec3.
- V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
- {0, 1, 2}, "extractVec");
- return EmitFromMemory(V, Ty);
+ if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+ // For better performance, handle vector loads differently.
+ if (Ty->isVectorType()) {
+ const llvm::Type *EltTy = Addr.getElementType();
+
+ const auto *VTy = cast<llvm::VectorType>(EltTy);
+
+ // Handle vectors of size 3 like size 4 for better performance.
+ if (VTy->getNumElements() == 3) {
+
+ // Bitcast to vec4 type.
+ llvm::VectorType *vec4Ty =
+ llvm::VectorType::get(VTy->getElementType(), 4);
+ Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
+ // Now load value.
+ llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+ // Shuffle vector to get vec3.
+ V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
+ {0, 1, 2}, "extractVec");
+ return EmitFromMemory(V, Ty);
+ }
}
}
@@ -1317,35 +1417,9 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
false /*ConvertTypeToTag*/);
}
- bool IsBool = hasBooleanRepresentation(Ty) ||
- NSAPI(CGM.getContext()).isObjCBOOLType(Ty);
- bool NeedsBoolCheck = SanOpts.has(SanitizerKind::Bool) && IsBool;
- bool NeedsEnumCheck =
- SanOpts.has(SanitizerKind::Enum) && Ty->getAs<EnumType>();
- if (NeedsBoolCheck || NeedsEnumCheck) {
- SanitizerScope SanScope(this);
- llvm::APInt Min, End;
- if (getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) {
- --End;
- llvm::Value *Check;
- if (!Min)
- Check = Builder.CreateICmpULE(
- Load, llvm::ConstantInt::get(getLLVMContext(), End));
- else {
- llvm::Value *Upper = Builder.CreateICmpSLE(
- Load, llvm::ConstantInt::get(getLLVMContext(), End));
- llvm::Value *Lower = Builder.CreateICmpSGE(
- Load, llvm::ConstantInt::get(getLLVMContext(), Min));
- Check = Builder.CreateAnd(Upper, Lower);
- }
- llvm::Constant *StaticArgs[] = {
- EmitCheckSourceLocation(Loc),
- EmitCheckTypeDescriptor(Ty)
- };
- SanitizerMask Kind = NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool;
- EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue,
- StaticArgs, EmitCheckValue(Load));
- }
+ if (EmitScalarRangeCheck(Load, Ty, Loc)) {
+ // In order to prevent the optimizer from throwing away the check, don't
+ // attach range metadata to the load.
} else if (CGM.getCodeGenOpts().OptimizationLevel > 0)
if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty))
Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo);
@@ -1386,24 +1460,25 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
uint64_t TBAAOffset,
bool isNontemporal) {
- // Handle vectors differently to get better performance.
- if (Ty->isVectorType()) {
- llvm::Type *SrcTy = Value->getType();
- auto *VecTy = cast<llvm::VectorType>(SrcTy);
- // Handle vec3 special.
- if (VecTy->getNumElements() == 3) {
- // Our source is a vec3, do a shuffle vector to make it a vec4.
- llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
- Builder.getInt32(2),
- llvm::UndefValue::get(Builder.getInt32Ty())};
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
- Value = Builder.CreateShuffleVector(Value,
- llvm::UndefValue::get(VecTy),
- MaskV, "extractVec");
- SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
- }
- if (Addr.getElementType() != SrcTy) {
- Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+ if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+ // Handle vectors differently to get better performance.
+ if (Ty->isVectorType()) {
+ llvm::Type *SrcTy = Value->getType();
+ auto *VecTy = cast<llvm::VectorType>(SrcTy);
+ // Handle vec3 special.
+ if (VecTy->getNumElements() == 3) {
+ // Our source is a vec3, do a shuffle vector to make it a vec4.
+ llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
+ Builder.getInt32(2),
+ llvm::UndefValue::get(Builder.getInt32Ty())};
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
+ MaskV, "extractVec");
+ SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ }
+ if (Addr.getElementType() != SrcTy) {
+ Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+ }
}
}
@@ -1487,10 +1562,11 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
return EmitLoadOfGlobalRegLValue(LV);
assert(LV.isBitField() && "Unknown LValue type!");
- return EmitLoadOfBitfieldLValue(LV);
+ return EmitLoadOfBitfieldLValue(LV, Loc);
}
-RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
+RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV,
+ SourceLocation Loc) {
const CGBitFieldInfo &Info = LV.getBitFieldInfo();
// Get the output type.
@@ -1515,7 +1591,7 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
"bf.clear");
}
Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast");
-
+ EmitScalarRangeCheck(Val, LV.getType(), Loc);
return RValue::get(Val);
}
@@ -2545,8 +2621,8 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF,
llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(
FnType, FnName,
- llvm::AttributeSet::get(CGF.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex, B),
+ llvm::AttributeList::get(CGF.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex, B),
/*Local=*/true);
llvm::CallInst *HandlerCall = CGF.EmitNounwindRuntimeCall(Fn, FnArgs);
if (!MayReturn) {
@@ -2709,6 +2785,24 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
EmitBlock(Cont);
}
+// Emit a stub for __cfi_check function so that the linker knows about this
+// symbol in LTO mode.
+void CodeGenFunction::EmitCfiCheckStub() {
+ llvm::Module *M = &CGM.getModule();
+ auto &Ctx = M->getContext();
+ llvm::Function *F = llvm::Function::Create(
+ llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false),
+ llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M);
+ llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F);
+ // FIXME: consider emitting an intrinsic call like
+ // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2)
+ // which can be lowered in CrossDSOCFI pass to the actual contents of
+ // __cfi_check. This would allow inlining of __cfi_check calls.
+ llvm::CallInst::Create(
+ llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB);
+ llvm::ReturnInst::Create(Ctx, nullptr, BB);
+}
+
// This function is basically a switch over the CFI failure kind, which is
// extracted from CFICheckFailData (1st function argument). Each case is either
// llvm.trap or a call to one of the two runtime handlers, based on
@@ -2821,7 +2915,7 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) {
if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
CGM.getCodeGenOpts().TrapFuncName);
- TrapCall->addAttribute(llvm::AttributeSet::FunctionIndex, A);
+ TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A);
}
return TrapCall;
@@ -3335,7 +3429,14 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
AlignmentSource AlignSource;
Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource);
QualType PtrTy = BaseExpr->getType()->getPointeeType();
- EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy);
+ SanitizerSet SkippedChecks;
+ bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr);
+ if (IsBaseCXXThis)
+ SkippedChecks.set(SanitizerKind::Alignment, true);
+ if (IsBaseCXXThis || isa<DeclRefExpr>(BaseExpr))
+ SkippedChecks.set(SanitizerKind::Null, true);
+ EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy,
+ /*Alignment=*/CharUnits::Zero(), SkippedChecks);
BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource);
} else
BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess);
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 009244784e504..49bbb4808eaa2 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -111,6 +111,13 @@ public:
void VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
Visit(GE->getResultExpr());
}
+ void VisitCoawaitExpr(CoawaitExpr *E) {
+ CGF.EmitCoawaitExpr(*E, Dest, IsResultUnused);
+ }
+ void VisitCoyieldExpr(CoyieldExpr *E) {
+ CGF.EmitCoyieldExpr(*E, Dest, IsResultUnused);
+ }
+ void VisitUnaryCoawait(UnaryOperator *E) { Visit(E->getSubExpr()); }
void VisitUnaryExtension(UnaryOperator *E) { Visit(E->getSubExpr()); }
void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) {
return Visit(E->getReplacement());
@@ -1267,7 +1274,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Store the initializer into the field.
EmitInitializationToLValue(E->getInit(curInitIndex++), LV);
} else {
- // We're out of initalizers; default-initialize to null
+ // We're out of initializers; default-initialize to null
EmitNullInitializationToLValue(LV);
}
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 71c8fb8b7ae35..d02f074dd605f 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -24,7 +24,15 @@
using namespace clang;
using namespace CodeGen;
-static RequiredArgs
+namespace {
+struct MemberCallInfo {
+ RequiredArgs ReqArgs;
+ // Number of prefix arguments for the call. Ignores the `this` pointer.
+ unsigned PrefixSize;
+};
+}
+
+static MemberCallInfo
commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
llvm::Value *This, llvm::Value *ImplicitParam,
QualType ImplicitParamTy, const CallExpr *CE,
@@ -48,6 +56,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD);
+ unsigned PrefixSize = Args.size() - 1;
// And the rest of the call args.
if (RtlArgs) {
@@ -65,7 +74,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
FPT->getNumParams() == 0 &&
"No CallExpr specified for function with non-zero number of arguments");
}
- return required;
+ return {required, PrefixSize};
}
RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
@@ -75,9 +84,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
const CallExpr *CE, CallArgList *RtlArgs) {
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
CallArgList Args;
- RequiredArgs required = commonEmitCXXMemberOrOperatorCall(
+ MemberCallInfo CallInfo = commonEmitCXXMemberOrOperatorCall(
*this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs);
- auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required);
+ auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(
+ Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize);
return EmitCall(FnInfo, Callee, ReturnValue, Args);
}
@@ -290,10 +300,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
if (CE)
CallLoc = CE->getExprLoc();
- EmitTypeCheck(isa<CXXConstructorDecl>(CalleeDecl)
- ? CodeGenFunction::TCK_ConstructorCall
- : CodeGenFunction::TCK_MemberCall,
- CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()));
+ SanitizerSet SkippedChecks;
+ if (const auto *CMCE = dyn_cast<CXXMemberCallExpr>(CE)) {
+ auto *IOA = CMCE->getImplicitObjectArgument();
+ bool IsImplicitObjectCXXThis = IsWrappedCXXThis(IOA);
+ if (IsImplicitObjectCXXThis)
+ SkippedChecks.set(SanitizerKind::Alignment, true);
+ if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA))
+ SkippedChecks.set(SanitizerKind::Null, true);
+ }
+ EmitTypeCheck(
+ isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall
+ : CodeGenFunction::TCK_MemberCall,
+ CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()),
+ /*Alignment=*/CharUnits::Zero(), SkippedChecks);
// FIXME: Uses of 'MD' past this point need to be audited. We may need to use
// 'CalleeDecl' instead.
@@ -420,7 +440,8 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
// And the rest of the call args
EmitCallArgs(Args, FPT, E->arguments());
- return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
+ return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required,
+ /*PrefixSize=*/0),
Callee, ReturnValue, Args);
}
@@ -659,7 +680,10 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF,
// Emit the array size expression.
// We multiply the size of all dimensions for NumElements.
// e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6.
- numElements = CGF.EmitScalarExpr(e->getArraySize());
+ numElements = CGF.CGM.EmitConstantExpr(e->getArraySize(),
+ CGF.getContext().getSizeType(), &CGF);
+ if (!numElements)
+ numElements = CGF.EmitScalarExpr(e->getArraySize());
assert(isa<llvm::IntegerType>(numElements->getType()));
// The number of elements can be have an arbitrary integer type;
@@ -1256,10 +1280,10 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) {
// FIXME: Add addAttribute to CallSite.
if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke))
- CI->addAttribute(llvm::AttributeSet::FunctionIndex,
+ CI->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::Builtin);
else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke))
- II->addAttribute(llvm::AttributeSet::FunctionIndex,
+ II->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::Builtin);
else
llvm_unreachable("unexpected kind of call instruction");
@@ -1560,7 +1584,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
// FIXME: Why do we not pass a CalleeDecl here?
EmitCallArgs(allocatorArgs, allocatorType, E->placement_arguments(),
- /*CalleeDecl*/nullptr, /*ParamsToSkip*/ParamsToSkip);
+ /*AC*/AbstractCallee(), /*ParamsToSkip*/ParamsToSkip);
RValue RV =
EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 59bc9cdbc0567..980972370dc2b 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -110,6 +110,16 @@ public:
VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) {
return Visit(PE->getReplacement());
}
+ ComplexPairTy VisitCoawaitExpr(CoawaitExpr *S) {
+ return CGF.EmitCoawaitExpr(*S).getComplexVal();
+ }
+ ComplexPairTy VisitCoyieldExpr(CoyieldExpr *S) {
+ return CGF.EmitCoyieldExpr(*S).getComplexVal();
+ }
+ ComplexPairTy VisitUnaryCoawait(const UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+
// l-values.
ComplexPairTy VisitDeclRefExpr(DeclRefExpr *E) {
@@ -198,7 +208,11 @@ public:
ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) {
CGF.enterFullExpression(E);
CodeGenFunction::RunCleanupsScope Scope(CGF);
- return Visit(E->getSubExpr());
+ ComplexPairTy Vals = Visit(E->getSubExpr());
+ // Defend against dominance problems caused by jumps out of expression
+ // evaluation through the shared cleanup block.
+ Scope.ForceCleanup({&Vals.first, &Vals.second});
+ return Vals;
}
ComplexPairTy VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
assert(E->getType()->isAnyComplexType() && "Expected complex type!");
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 1b85c45cd4be1..a64303831171d 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenFunction.h"
+#include "CGCleanup.h"
#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
@@ -24,6 +25,7 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -47,7 +49,7 @@ struct BinOpInfo {
Value *RHS;
QualType Ty; // Computation Type.
BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform
- bool FPContractable;
+ FPOptions FPFeatures;
const Expr *E; // Entire expr, for error unsupported. May not be binop.
};
@@ -58,6 +60,75 @@ static bool MustVisitNullValue(const Expr *E) {
return E->getType()->isNullPtrType();
}
+/// If \p E is a widened promoted integer, get its base (unpromoted) type.
+static llvm::Optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx,
+ const Expr *E) {
+ const Expr *Base = E->IgnoreImpCasts();
+ if (E == Base)
+ return llvm::None;
+
+ QualType BaseTy = Base->getType();
+ if (!BaseTy->isPromotableIntegerType() ||
+ Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType()))
+ return llvm::None;
+
+ return BaseTy;
+}
+
+/// Check if \p E is a widened promoted integer.
+static bool IsWidenedIntegerOp(const ASTContext &Ctx, const Expr *E) {
+ return getUnwidenedIntegerType(Ctx, E).hasValue();
+}
+
+/// Check if we can skip the overflow check for \p Op.
+static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
+ assert((isa<UnaryOperator>(Op.E) || isa<BinaryOperator>(Op.E)) &&
+ "Expected a unary or binary operator");
+
+ if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
+ return IsWidenedIntegerOp(Ctx, UO->getSubExpr());
+
+ const auto *BO = cast<BinaryOperator>(Op.E);
+ auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS());
+ if (!OptionalLHSTy)
+ return false;
+
+ auto OptionalRHSTy = getUnwidenedIntegerType(Ctx, BO->getRHS());
+ if (!OptionalRHSTy)
+ return false;
+
+ QualType LHSTy = *OptionalLHSTy;
+ QualType RHSTy = *OptionalRHSTy;
+
+ // We usually don't need overflow checks for binary operations with widened
+ // operands. Multiplication with promoted unsigned operands is a special case.
+ if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) ||
+ !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType())
+ return true;
+
+ // The overflow check can be skipped if either one of the unpromoted types
+ // are less than half the size of the promoted type.
+ unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType());
+ return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize ||
+ (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize;
+}
+
+/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions.
+static void updateFastMathFlags(llvm::FastMathFlags &FMF,
+ FPOptions FPFeatures) {
+ FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement());
+}
+
+/// Propagate fast-math flags from \p Op to the instruction in \p V.
+static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) {
+ if (auto *I = dyn_cast<llvm::Instruction>(V)) {
+ llvm::FastMathFlags FMF = I->getFastMathFlags();
+ updateFastMathFlags(FMF, Op.FPFeatures);
+ I->setFastMathFlags(FMF);
+ }
+ return V;
+}
+
class ScalarExprEmitter
: public StmtVisitor<ScalarExprEmitter, Value*> {
CodeGenFunction &CGF;
@@ -221,6 +292,15 @@ public:
Value *VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
return Visit(GE->getResultExpr());
}
+ Value *VisitCoawaitExpr(CoawaitExpr *S) {
+ return CGF.EmitCoawaitExpr(*S).getScalarVal();
+ }
+ Value *VisitCoyieldExpr(CoyieldExpr *S) {
+ return CGF.EmitCoyieldExpr(*S).getScalarVal();
+ }
+ Value *VisitUnaryCoawait(const UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
// Leaves.
Value *VisitIntegerLiteral(const IntegerLiteral *E) {
@@ -300,6 +380,24 @@ public:
return V;
}
+ Value *VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) {
+ VersionTuple Version = E->getVersion();
+
+ // If we're checking for a platform older than our minimum deployment
+ // target, we can fold the check away.
+ if (Version <= CGF.CGM.getTarget().getPlatformMinVersion())
+ return llvm::ConstantInt::get(Builder.getInt1Ty(), 1);
+
+ Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor();
+ llvm::Value *Args[] = {
+ llvm::ConstantInt::get(CGF.CGM.Int32Ty, Version.getMajor()),
+ llvm::ConstantInt::get(CGF.CGM.Int32Ty, Min ? *Min : 0),
+ llvm::ConstantInt::get(CGF.CGM.Int32Ty, SMin ? *SMin : 0),
+ };
+
+ return CGF.EmitBuiltinAvailable(Args);
+ }
+
Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E);
Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E);
Value *VisitConvertVectorExpr(ConvertVectorExpr *E);
@@ -405,11 +503,7 @@ public:
return CGF.LoadCXXThis();
}
- Value *VisitExprWithCleanups(ExprWithCleanups *E) {
- CGF.enterFullExpression(E);
- CodeGenFunction::RunCleanupsScope Scope(CGF);
- return Visit(E->getSubExpr());
- }
+ Value *VisitExprWithCleanups(ExprWithCleanups *E);
Value *VisitCXXNewExpr(const CXXNewExpr *E) {
return CGF.EmitCXXNewExpr(E);
}
@@ -464,16 +558,21 @@ public:
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
// Fall through.
case LangOptions::SOB_Trapping:
+ if (CanElideOverflowCheck(CGF.getContext(), Ops))
+ return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
return EmitOverflowCheckedBinOp(Ops);
}
}
if (Ops.Ty->isUnsignedIntegerType() &&
- CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow))
+ CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+ !CanElideOverflowCheck(CGF.getContext(), Ops))
return EmitOverflowCheckedBinOp(Ops);
- if (Ops.LHS->getType()->isFPOrFPVectorTy())
- return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
+ if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
+ Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
+ return propagateFMFlags(V, Ops);
+ }
return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul");
}
/// Create a binary op that checks for overflow.
@@ -1616,6 +1715,16 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) {
E->getExprLoc());
}
+Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
+ CGF.enterFullExpression(E);
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ Value *V = Visit(E->getSubExpr());
+ // Defend against dominance problems caused by jumps out of expression
+ // evaluation through the shared cleanup block.
+ Scope.ForceCleanup({&V});
+ return V;
+}
+
//===----------------------------------------------------------------------===//
// Unary Operators
//===----------------------------------------------------------------------===//
@@ -1627,7 +1736,7 @@ static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E,
BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false);
BinOp.Ty = E->getType();
BinOp.Opcode = IsInc ? BO_Add : BO_Sub;
- BinOp.FPContractable = false;
+ // FIXME: once UnaryOperator carries FPFeatures, copy it here.
BinOp.E = E;
return BinOp;
}
@@ -1645,6 +1754,8 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
return Builder.CreateNSWAdd(InVal, Amount, Name);
// Fall through.
case LangOptions::SOB_Trapping:
+ if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr()))
+ return Builder.CreateNSWAdd(InVal, Amount, Name);
return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
}
llvm_unreachable("Unknown SignedOverflowBehaviorTy");
@@ -1891,7 +2002,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType());
BinOp.Ty = E->getType();
BinOp.Opcode = BO_Sub;
- BinOp.FPContractable = false;
+ // FIXME: once UnaryOperator carries FPFeatures, copy it here.
BinOp.E = E;
return EmitSub(BinOp);
}
@@ -2112,7 +2223,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
Result.RHS = Visit(E->getRHS());
Result.Ty = E->getType();
Result.Opcode = E->getOpcode();
- Result.FPContractable = E->isFPContractable();
+ Result.FPFeatures = E->getFPFeatures();
Result.E = E;
return Result;
}
@@ -2132,7 +2243,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
OpInfo.RHS = Visit(E->getRHS());
OpInfo.Ty = E->getComputationResultType();
OpInfo.Opcode = E->getOpcode();
- OpInfo.FPContractable = E->isFPContractable();
+ OpInfo.FPFeatures = E->getFPFeatures();
OpInfo.E = E;
// Load/convert the LHS.
LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
@@ -2263,8 +2374,10 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
SanitizerKind::IntegerDivideByZero));
}
+ const auto *BO = cast<BinaryOperator>(Ops.E);
if (CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow) &&
- Ops.Ty->hasSignedIntegerRepresentation()) {
+ Ops.Ty->hasSignedIntegerRepresentation() &&
+ !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS())) {
llvm::IntegerType *Ty = cast<llvm::IntegerType>(Zero->getType());
llvm::Value *IntMin =
@@ -2324,12 +2437,12 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) {
// Rem in C can't be a floating point type: C99 6.5.5p2.
- if (CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero)) {
+ if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) ||
+ CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) &&
+ Ops.Ty->isIntegerType()) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
-
- if (Ops.Ty->isIntegerType())
- EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false);
+ EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false);
}
if (Ops.Ty->hasUnsignedIntegerRepresentation())
@@ -2577,12 +2690,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
"Only fadd/fsub can be the root of an fmuladd.");
// Check whether this op is marked as fusable.
- if (!op.FPContractable)
- return nullptr;
-
- // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is
- // either disabled, or handled entirely by the LLVM backend).
- if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On)
+ if (!op.FPFeatures.allowFPContractWithinStatement())
return nullptr;
// We have a potentially fusable op. Look for a mul on one of the operands.
@@ -2616,12 +2724,15 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
// Fall through.
case LangOptions::SOB_Trapping:
+ if (CanElideOverflowCheck(CGF.getContext(), op))
+ return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
return EmitOverflowCheckedBinOp(op);
}
}
if (op.Ty->isUnsignedIntegerType() &&
- CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow))
+ CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+ !CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);
if (op.LHS->getType()->isFPOrFPVectorTy()) {
@@ -2629,7 +2740,8 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
return FMulAdd;
- return Builder.CreateFAdd(op.LHS, op.RHS, "add");
+ Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add");
+ return propagateFMFlags(V, op);
}
return Builder.CreateAdd(op.LHS, op.RHS, "add");
@@ -2647,19 +2759,23 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
// Fall through.
case LangOptions::SOB_Trapping:
+ if (CanElideOverflowCheck(CGF.getContext(), op))
+ return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
return EmitOverflowCheckedBinOp(op);
}
}
if (op.Ty->isUnsignedIntegerType() &&
- CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow))
+ CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+ !CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);
if (op.LHS->getType()->isFPOrFPVectorTy()) {
// Try to form an fmuladd.
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
return FMulAdd;
- return Builder.CreateFSub(op.LHS, op.RHS, "sub");
+ Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub");
+ return propagateFMFlags(V, op);
}
return Builder.CreateSub(op.LHS, op.RHS, "sub");
@@ -2751,8 +2867,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
isa<llvm::IntegerType>(Ops.LHS->getType())) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
SmallVector<std::pair<Value *, SanitizerMask>, 2> Checks;
- llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS);
- llvm::Value *ValidExponent = Builder.CreateICmpULE(RHS, WidthMinusOne);
+ llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, Ops.RHS);
+ llvm::Value *ValidExponent = Builder.CreateICmpULE(Ops.RHS, WidthMinusOne);
if (SanitizeExponent) {
Checks.push_back(
@@ -2767,12 +2883,14 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
llvm::BasicBlock *CheckShiftBase = CGF.createBasicBlock("check");
Builder.CreateCondBr(ValidExponent, CheckShiftBase, Cont);
+ llvm::Value *PromotedWidthMinusOne =
+ (RHS == Ops.RHS) ? WidthMinusOne
+ : GetWidthMinusOneValue(Ops.LHS, RHS);
CGF.EmitBlock(CheckShiftBase);
- llvm::Value *BitsShiftedOff =
- Builder.CreateLShr(Ops.LHS,
- Builder.CreateSub(WidthMinusOne, RHS, "shl.zeros",
- /*NUW*/true, /*NSW*/true),
- "shl.check");
+ llvm::Value *BitsShiftedOff = Builder.CreateLShr(
+ Ops.LHS, Builder.CreateSub(PromotedWidthMinusOne, RHS, "shl.zeros",
+ /*NUW*/ true, /*NSW*/ true),
+ "shl.check");
if (CGF.getLangOpts().CPlusPlus) {
// In C99, we are not permitted to shift a 1 bit into the sign bit.
// Under C++11's rules, shifting a 1 bit into the sign bit is
@@ -3038,10 +3156,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
// because the result is altered by the store, i.e., [C99 6.5.16p1]
// 'An assignment expression has the value of the left operand after
// the assignment...'.
- if (LHS.isBitField())
+ if (LHS.isBitField()) {
CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS);
- else
+ } else {
+ CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc());
CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS);
+ }
}
// If the result is clearly ignored, return now.
@@ -3327,9 +3447,11 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// safe to evaluate the LHS and RHS unconditionally.
if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
- CGF.incrementProfileCounter(E);
-
llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
+ llvm::Value *StepV = Builder.CreateZExtOrBitCast(CondV, CGF.Int64Ty);
+
+ CGF.incrementProfileCounter(E, StepV);
+
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
if (!LHS) {
@@ -3491,8 +3613,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// vector to get a vec4, then a bitcast if the target type is different.
if (NumElementsSrc == 3 && NumElementsDst != 3) {
Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- DstTy);
+
+ if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ DstTy);
+ }
+
Src->setName("astype");
return Src;
}
@@ -3501,9 +3627,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// to vec4 if the original type is not vec4, then a shuffle vector to
// get a vec3.
if (NumElementsSrc != 3 && NumElementsDst == 3) {
- auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- Vec4Ty);
+ if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+ auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ Vec4Ty);
+ }
+
Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
Src->setName("astype");
return Src;
diff --git a/lib/CodeGen/CGCUDABuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp
index 44dd003757adf..48156b1b26b78 100644
--- a/lib/CodeGen/CGCUDABuiltin.cpp
+++ b/lib/CodeGen/CGGPUBuiltin.cpp
@@ -1,4 +1,4 @@
-//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===//
+//===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Generates code for built-in CUDA calls which are not runtime-specific.
-// (Runtime-specific codegen lives in CGCUDARuntime.)
+// Generates code for built-in GPU calls which are not runtime-specific.
+// (Runtime-specific codegen lives in programming model specific files.)
//
//===----------------------------------------------------------------------===//
@@ -67,10 +67,9 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
// Note that by the time this function runs, E's args have already undergone the
// standard C vararg promotion (short -> int, float -> double, etc.).
RValue
-CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue) {
- assert(getLangOpts().CUDA);
- assert(getLangOpts().CUDAIsDevice);
+CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue) {
+ assert(getTarget().getTriple().isNVPTX());
assert(E->getBuiltinCallee() == Builtin::BIprintf);
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 932b8a129e6e8..3a09a15dbc152 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -1,4 +1,4 @@
-//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
+//===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -117,10 +117,22 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E);
if (!ALE)
DLE = cast<ObjCDictionaryLiteral>(E);
-
- // Compute the type of the array we're initializing.
+
+ // Optimize empty collections by referencing constants, when available.
uint64_t NumElements =
ALE ? ALE->getNumElements() : DLE->getNumElements();
+ if (NumElements == 0 && CGM.getLangOpts().ObjCRuntime.hasEmptyCollections()) {
+ StringRef ConstantName = ALE ? "__NSArray0__" : "__NSDictionary0__";
+ QualType IdTy(CGM.getContext().getObjCIdType());
+ llvm::Constant *Constant =
+ CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName);
+ Address Addr(Constant, Context.getTypeAlignInChars(IdTy));
+ LValue LV = MakeAddrLValue(Addr, IdTy);
+ return Builder.CreateBitCast(EmitLoadOfScalar(LV, E->getLocStart()),
+ ConvertType(E->getType()));
+ }
+
+ // Compute the type of the array we're initializing.
llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()),
NumElements);
QualType ElementType = Context.getObjCIdType().withConst();
@@ -427,7 +439,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
QualType ResultType = method ? method->getReturnType() : E->getType();
CallArgList Args;
- EmitCallArgs(Args, method, E->arguments());
+ EmitCallArgs(Args, method, E->arguments(), /*AC*/AbstractCallee(method));
// For delegate init calls in ARC, do an unsafe store of null into
// self. This represents the call taking direct ownership of that
@@ -1316,7 +1328,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
BinaryOperator assign(&ivarRef, finalArg, BO_Assign,
ivarRef.getType(), VK_RValue, OK_Ordinary,
- SourceLocation(), false);
+ SourceLocation(), FPOptions());
EmitStmt(&assign);
}
@@ -1469,6 +1481,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
if (DI)
DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+ RunCleanupsScope ForScope(*this);
+
// The local variable comes into scope immediately.
AutoVarEmission variable = AutoVarEmission::invalid();
if (const DeclStmt *SD = dyn_cast<DeclStmt>(S.getElement()))
@@ -1499,8 +1513,6 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
ArrayType::Normal, 0);
Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
- RunCleanupsScope ForScope(*this);
-
// Emit the collection pointer. In ARC, we do a retain.
llvm::Value *Collection;
if (getLangOpts().ObjCAutoRefCount) {
@@ -1802,26 +1814,49 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
}
+static bool IsForwarding(StringRef Name) {
+ return llvm::StringSwitch<bool>(Name)
+ .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV
+ "objc_autorelease", // ARCInstKind::Autorelease
+ "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV
+ "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV
+ "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease
+ "objc_retainedObject", // ARCInstKind::NoopCast
+ "objc_retain", // ARCInstKind::Retain
+ "objc_unretainedObject", // ARCInstKind::NoopCast
+ "objc_unretainedPointer", // ARCInstKind::NoopCast
+ "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV
+ true)
+ .Default(false);
+}
+
static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
- llvm::FunctionType *type,
- StringRef fnName) {
- llvm::Constant *fn = CGM.CreateRuntimeFunction(type, fnName);
+ llvm::FunctionType *FTy,
+ StringRef Name) {
+ llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name);
- if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) {
+ if (auto *F = dyn_cast<llvm::Function>(RTF)) {
// If the target runtime doesn't naturally support ARC, emit weak
// references to the runtime support library. We don't really
// permit this to fail, but we need a particular relocation style.
if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() &&
!CGM.getTriple().isOSBinFormatCOFF()) {
- f->setLinkage(llvm::Function::ExternalWeakLinkage);
- } else if (fnName == "objc_retain" || fnName == "objc_release") {
+ F->setLinkage(llvm::Function::ExternalWeakLinkage);
+ } else if (Name == "objc_retain" || Name == "objc_release") {
// If we have Native ARC, set nonlazybind attribute for these APIs for
// performance.
- f->addFnAttr(llvm::Attribute::NonLazyBind);
+ F->addFnAttr(llvm::Attribute::NonLazyBind);
+ }
+
+ if (IsForwarding(Name)) {
+ llvm::AttrBuilder B;
+ B.addAttribute(llvm::Attribute::Returned);
+
+ F->arg_begin()->addAttr(llvm::AttributeList::get(F->getContext(), 1, B));
}
}
- return fn;
+ return RTF;
}
/// Perform an operation having the signature
@@ -1832,7 +1867,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
llvm::Constant *&fn,
StringRef fnName,
bool isTailCall = false) {
- if (isa<llvm::ConstantPointerNull>(value)) return value;
+ if (isa<llvm::ConstantPointerNull>(value))
+ return value;
if (!fn) {
llvm::FunctionType *fnType =
@@ -3239,7 +3275,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(),
Args, DestTy->getPointeeType(),
- VK_LValue, SourceLocation(), false);
+ VK_LValue, SourceLocation(), FPOptions());
EmitStmt(&TheCall);
@@ -3375,5 +3411,54 @@ CodeGenFunction::EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty) {
return Val;
}
+llvm::Value *
+CodeGenFunction::EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args) {
+ assert(Args.size() == 3 && "Expected 3 argument here!");
+
+ if (!CGM.IsOSVersionAtLeastFn) {
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(Int32Ty, {Int32Ty, Int32Ty, Int32Ty}, false);
+ CGM.IsOSVersionAtLeastFn =
+ CGM.CreateRuntimeFunction(FTy, "__isOSVersionAtLeast");
+ }
+
+ llvm::Value *CallRes =
+ EmitNounwindRuntimeCall(CGM.IsOSVersionAtLeastFn, Args);
+
+ return Builder.CreateICmpNE(CallRes, llvm::Constant::getNullValue(Int32Ty));
+}
+
+void CodeGenModule::emitAtAvailableLinkGuard() {
+ if (!IsOSVersionAtLeastFn)
+ return;
+ // @available requires CoreFoundation only on Darwin.
+ if (!Target.getTriple().isOSDarwin())
+ return;
+ // Add -framework CoreFoundation to the linker commands. We still want to
+ // emit the core foundation reference down below because otherwise if
+ // CoreFoundation is not used in the code, the linker won't link the
+ // framework.
+ auto &Context = getLLVMContext();
+ llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"),
+ llvm::MDString::get(Context, "CoreFoundation")};
+ LinkerOptionsMetadata.push_back(llvm::MDNode::get(Context, Args));
+ // Emit a reference to a symbol from CoreFoundation to ensure that
+ // CoreFoundation is linked into the final binary.
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false);
+ llvm::Constant *CFFunc =
+ CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber");
+
+ llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false);
+ llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction(
+ CheckFTy, "__clang_at_available_requires_core_foundation_framework"));
+ CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+ CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ CodeGenFunction CGF(*this);
+ CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc));
+ CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy));
+ CGF.Builder.CreateUnreachable();
+ addCompilerUsedGlobal(CFLinkCheckFunc);
+}
CGObjCRuntime::~CGObjCRuntime() {}
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index fa2b3d81e29a4..9f6ccb4b5d266 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -18,7 +18,7 @@
#include "CGCleanup.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
@@ -2207,7 +2207,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
IvarNames.push_back(MakeConstantString(IVD->getNameAsString()));
// Get the type encoding for this ivar
std::string TypeStr;
- Context.getObjCEncodingForType(IVD->getType(), TypeStr);
+ Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD);
IvarTypes.push_back(MakeConstantString(TypeStr));
// Get the offset
uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD);
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 7219592fffcd7..43b347ce353f0 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -17,7 +17,7 @@
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
@@ -64,13 +64,11 @@ private:
// Add the non-lazy-bind attribute, since objc_msgSend is likely to
// be called a lot.
llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
- return
- CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy,
- params, true),
- "objc_msgSend",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NonLazyBind));
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NonLazyBind));
}
/// void objc_msgSend_stret (id, SEL, ...)
@@ -589,13 +587,11 @@ public:
llvm::Constant *getSetJmpFn() {
// This is specifically the prototype for x86.
llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() };
- return
- CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty,
- params, false),
- "_setjmp",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NonLazyBind));
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NonLazyBind));
}
public:
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 9062936fdd149..db02c631c9e64 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -58,9 +58,6 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
case BuiltinType::OCLQueue:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.queue_t"), 0);
- case BuiltinType::OCLNDRange:
- return llvm::PointerType::get(
- llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0);
case BuiltinType::OCLReserveID:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0);
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 40252171368b8..874b6a69e513f 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -15,7 +15,7 @@
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
@@ -842,12 +842,12 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}
-llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
- OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+static llvm::Value *emitParallelOrTeamsOutlinedFunction(
+ CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
+ const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
+ const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
- const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
@@ -857,11 +857,27 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
- HasCancel, getOutlinedHelperName());
+ HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}
+llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
+ return emitParallelOrTeamsOutlinedFunction(
+ CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
+}
+
+llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
+ return emitParallelOrTeamsOutlinedFunction(
+ CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
+}
+
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
@@ -2958,7 +2974,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create the offloading info metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
- // Auxiliar methods to create metadata values and strings.
+ // Auxiliary methods to create metadata values and strings.
auto getMDInt = [&](unsigned v) {
return llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
@@ -3561,7 +3577,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
SharedRefLValue.getAddress(), Type);
} else {
// Initialize firstprivate array using element-by-element
- // intialization.
+ // initialization.
CGF.EmitOMPAggregateAssign(
PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
@@ -3764,9 +3780,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// Emit initial values for private copies (if any).
llvm::Value *TaskPrivatesMap = nullptr;
auto *TaskPrivatesMapTy =
- std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
- 3)
- ->getType();
+ std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
TaskPrivatesMap = emitTaskPrivateMappingFunction(
@@ -4006,8 +4020,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepTaskArgs[5] = CGF.Builder.getInt32(0);
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
- auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
- NumDependencies, &TaskArgs,
+ auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
+ &TaskArgs,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
@@ -4241,12 +4255,10 @@ static void emitReductionCombiner(CodeGenFunction &CGF,
CGF.EmitIgnoredExpr(ReductionOp);
}
-static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
- llvm::Type *ArgsType,
- ArrayRef<const Expr *> Privates,
- ArrayRef<const Expr *> LHSExprs,
- ArrayRef<const Expr *> RHSExprs,
- ArrayRef<const Expr *> ReductionOps) {
+llvm::Value *CGOpenMPRuntime::emitReductionFunction(
+ CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps) {
auto &C = CGM.getContext();
// void reduction_func(void *LHSArg, void *RHSArg);
@@ -4329,11 +4341,11 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
return Fn;
}
-static void emitSingleReductionCombiner(CodeGenFunction &CGF,
- const Expr *ReductionOp,
- const Expr *PrivateRef,
- const DeclRefExpr *LHS,
- const DeclRefExpr *RHS) {
+void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
+ const Expr *ReductionOp,
+ const Expr *PrivateRef,
+ const DeclRefExpr *LHS,
+ const DeclRefExpr *RHS) {
if (PrivateRef->getType()->isArrayType()) {
// Emit reduction for array section.
auto *LHSVar = cast<VarDecl>(LHS->getDecl());
@@ -4353,9 +4365,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps,
- bool WithNowait, bool SimpleReduction) {
+ ReductionOptionsTy Options) {
if (!CGF.HaveInsertPoint())
return;
+
+ bool WithNowait = Options.WithNowait;
+ bool SimpleReduction = Options.SimpleReduction;
+
// Next code should be emitted for reduction:
//
// static kmp_critical_name lock = { 0 };
@@ -4497,12 +4513,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
};
auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &RT = CGF.CGM.getOpenMPRuntime();
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
for (auto *E : ReductionOps) {
- emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
- cast<DeclRefExpr>(*IRHS));
+ RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+ cast<DeclRefExpr>(*IRHS));
++IPriv;
++ILHS;
++IRHS;
@@ -4562,7 +4579,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
if (XExpr) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto &&AtomicRedGen = [BO, VD, IPriv,
+ auto &&AtomicRedGen = [BO, VD,
Loc](CodeGenFunction &CGF, const Expr *XExpr,
const Expr *EExpr, const Expr *UpExpr) {
LValue X = CGF.EmitLValue(XExpr);
@@ -4572,7 +4589,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitOMPAtomicSimpleUpdateExpr(
X, E, BO, /*IsXLHSInRHSPart=*/true,
llvm::AtomicOrdering::Monotonic, Loc,
- [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
+ [&CGF, UpExpr, VD, Loc](RValue XRValue) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(
VD, [&CGF, VD, XRValue, Loc]() -> Address {
@@ -4874,25 +4891,45 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
return Body;
}
-/// \brief Emit the num_teams clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no num_teams clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of teams for a target directive. Inspect the num_teams
+/// clause associated with a teams construct combined or closely nested
+/// with the target directive.
+///
+/// Emit a team of size one for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
static llvm::Value *
-emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
- const OMPExecutableDirective &D) {
+emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any num_teams clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
- "Not expecting clause in directive.");
+ auto &Bld = CGF.Builder;
+
+ // If the target directive is combined with a teams directive:
+ // Return the value in the num_teams clause, if any.
+ // Otherwise, return 0 to denote the runtime default.
+ if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
+ if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
+ auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
+ /*IgnoreResultAssign*/ true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ // The default value is 0.
+ return Bld.getInt32(0);
+ }
+
+ // If the target directive is combined with a parallel directive but not a
+ // teams directive, start one team.
+ if (isOpenMPParallelDirective(D.getDirectiveKind()))
+ return Bld.getInt32(1);
// If the current target region has a teams region enclosed, we need to get
// the number of teams to pass to the runtime function call. This is done
@@ -4910,38 +4947,92 @@ emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
- return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
- /*IsSigned=*/true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
}
// If we have an enclosed teams directive but no num_teams clause we use
// the default value 0.
- return CGF.Builder.getInt32(0);
+ return Bld.getInt32(0);
}
// No teams associated with the directive.
return nullptr;
}
-/// \brief Emit the thread_limit clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no thread_limit clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of threads for a target directive. Inspect the
+/// thread_limit clause associated with a teams construct combined or closely
+/// nested with the target directive.
+///
+/// Emit the num_threads clause for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
static llvm::Value *
-emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
- const OMPExecutableDirective &D) {
+emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any thread_limit clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
- "Not expecting clause in directive.");
+ auto &Bld = CGF.Builder;
+
+ //
+ // If the target directive is combined with a teams directive:
+ // Return the value in the thread_limit clause, if any.
+ //
+ // If the target directive is combined with a parallel directive:
+ // Return the value in the num_threads clause, if any.
+ //
+ // If both clauses are set, select the minimum of the two.
+ //
+ // If neither teams or parallel combined directives set the number of threads
+ // in a team, return 0 to denote the runtime default.
+ //
+ // If this is not a teams directive return nullptr.
+
+ if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
+ isOpenMPParallelDirective(D.getDirectiveKind())) {
+ llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
+ llvm::Value *NumThreadsVal = nullptr;
+ llvm::Value *ThreadLimitVal = nullptr;
+
+ if (const auto *ThreadLimitClause =
+ D.getSingleClause<OMPThreadLimitClause>()) {
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
+ auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
+ /*IgnoreResultAssign*/ true);
+ ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ if (const auto *NumThreadsClause =
+ D.getSingleClause<OMPNumThreadsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+ llvm::Value *NumThreads =
+ CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+ /*IgnoreResultAssign*/ true);
+ NumThreadsVal =
+ Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
+ }
+
+ // Select the lesser of thread_limit and num_threads.
+ if (NumThreadsVal)
+ ThreadLimitVal = ThreadLimitVal
+ ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
+ ThreadLimitVal),
+ NumThreadsVal, ThreadLimitVal)
+ : NumThreadsVal;
+
+ // Set default value passed to the runtime if either teams or a target
+ // parallel type directive is found but no clause is specified.
+ if (!ThreadLimitVal)
+ ThreadLimitVal = DefaultThreadLimitVal;
+
+ return ThreadLimitVal;
+ }
// If the current target region has a teams region enclosed, we need to get
// the thread limit to pass to the runtime function call. This is done
@@ -5984,8 +6075,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
OffloadError);
// Fill up the pointer arrays and transfer execution to the device.
- auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
- OutlinedFnID, OffloadError, OffloadErrorQType,
+ auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device,
+ OutlinedFnID, OffloadError,
&D](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
// Emit the offloading arrays.
@@ -6021,24 +6112,50 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Return value of the runtime offloading call.
llvm::Value *Return;
- auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
- auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
+ auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
+ auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
- // If we have NumTeams defined this means that we have an enclosed teams
- // region. Therefore we also expect to have ThreadLimit defined. These two
- // values should be defined in the presence of a teams directive, regardless
- // of having any clauses associated. If the user is using teams but no
- // clauses, these two values will be the default that should be passed to
- // the runtime library - a 32-bit integer with the value zero.
+ // The target region is an outlined function launched by the runtime
+ // via calls __tgt_target() or __tgt_target_teams().
+ //
+ // __tgt_target() launches a target region with one team and one thread,
+ // executing a serial region. This master thread may in turn launch
+ // more threads within its team upon encountering a parallel region,
+ // however, no additional teams can be launched on the device.
+ //
+ // __tgt_target_teams() launches a target region with one or more teams,
+ // each with one or more threads. This call is required for target
+ // constructs such as:
+ // 'target teams'
+ // 'target' / 'teams'
+ // 'target teams distribute parallel for'
+ // 'target parallel'
+ // and so on.
+ //
+ // Note that on the host and CPU targets, the runtime implementation of
+ // these calls simply call the outlined function without forking threads.
+ // The outlined functions themselves have runtime calls to
+ // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
+ // the compiler in emitTeamsCall() and emitParallelCall().
+ //
+ // In contrast, on the NVPTX target, the implementation of
+ // __tgt_target_teams() launches a GPU kernel with the requested number
+ // of teams and threads so no additional calls to the runtime are required.
if (NumTeams) {
- assert(ThreadLimit && "Thread limit expression should be available along "
- "with number of teams.");
+ // If we have NumTeams defined this means that we have an enclosed teams
+ // region. Therefore we also expect to have NumThreads defined. These two
+ // values should be defined in the presence of a teams directive,
+ // regardless of having any clauses associated. If the user is using teams
+ // but no clauses, these two values will be the default that should be
+ // passed to the runtime library - a 32-bit integer with the value zero.
+ assert(NumThreads && "Thread limit expression should be available along "
+ "with number of teams.");
llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID,
PointerNum, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
Info.MapTypesArray, NumTeams,
- ThreadLimit};
+ NumThreads};
Return = CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
} else {
@@ -6095,17 +6212,18 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!S)
return;
- // If we find a OMP target directive, codegen the outline function and
- // register the result.
- // FIXME: Add other directives with target when they become supported.
- bool isTargetDirective = isa<OMPTargetDirective>(S);
+ // Codegen OMP target directives that offload compute to the device.
+ bool requiresDeviceCodegen =
+ isa<OMPExecutableDirective>(S) &&
+ isOpenMPTargetExecutionDirective(
+ cast<OMPExecutableDirective>(S)->getDirectiveKind());
- if (isTargetDirective) {
- auto *E = cast<OMPExecutableDirective>(S);
+ if (requiresDeviceCodegen) {
+ auto &E = *cast<OMPExecutableDirective>(S);
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
+ getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
FileID, Line);
// Is this a target region that should not be emitted as an entry point? If
@@ -6114,13 +6232,22 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
ParentName, Line))
return;
- llvm::Function *Fn;
- llvm::Constant *Addr;
- std::tie(Fn, Addr) =
- CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
- CGM, cast<OMPTargetDirective>(*E), ParentName,
- /*isOffloadEntry=*/true);
- assert(Fn && Addr && "Target region emission failed.");
+ switch (S->getStmtClass()) {
+ case Stmt::OMPTargetDirectiveClass:
+ CodeGenFunction::EmitOMPTargetDeviceFunction(
+ CGM, ParentName, cast<OMPTargetDirective>(*S));
+ break;
+ case Stmt::OMPTargetParallelDirectiveClass:
+ CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
+ CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
+ break;
+ case Stmt::OMPTargetTeamsDirectiveClass:
+ CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
+ break;
+ default:
+ llvm_unreachable("Unknown target directive for OpenMP device codegen.");
+ }
return;
}
@@ -6271,8 +6398,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Generate the code for the opening of the data environment. Capture all the
// arguments of the runtime call by reference because they are used in the
// closing of the region.
- auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
- CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
+ PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
@@ -6318,8 +6445,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
};
// Generate code for the closing of the data region.
- auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
assert(Info.isValid() && "Invalid data environment closing arguments.");
llvm::Value *BasePointersArrayArg = nullptr;
@@ -6397,7 +6523,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
"Expecting either target enter, exit data, or update directives.");
// Generate the code for the opening of the data environment.
- auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 61ddc702ed24a..7901a6b7a8fce 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -527,6 +527,7 @@ public:
/// Get combiner/initializer for the specified user-defined reduction, if any.
virtual std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl *D);
+
/// \brief Emits outlined function for the specified OpenMP parallel directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
@@ -535,7 +536,19 @@ public:
/// \param InnermostKind Kind of innermost directive (for simple directives it
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
- virtual llvm::Value *emitParallelOrTeamsOutlinedFunction(
+ virtual llvm::Value *emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
+
+ /// \brief Emits outlined function for the specified OpenMP teams directive
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ virtual llvm::Value *emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
@@ -880,6 +893,32 @@ public:
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen,
bool HasCancel = false);
+
+ /// Emits reduction function.
+ /// \param ArgsType Array type containing pointers to reduction variables.
+ /// \param Privates List of private copies for original reduction arguments.
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps);
+
+ /// Emits single reduction combiner
+ void emitSingleReductionCombiner(CodeGenFunction &CGF,
+ const Expr *ReductionOp,
+ const Expr *PrivateRef,
+ const DeclRefExpr *LHS,
+ const DeclRefExpr *RHS);
+
+ struct ReductionOptionsTy {
+ bool WithNowait;
+ bool SimpleReduction;
+ OpenMPDirectiveKind ReductionKind;
+ };
/// \brief Emit a code for reduction clause. Next code should be emitted for
/// reduction:
/// \code
@@ -916,14 +955,18 @@ public:
/// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
/// or 'operator binop(LHS, RHS)'.
- /// \param WithNowait true if parent directive has also nowait clause, false
- /// otherwise.
+ /// \param Options List of options for reduction codegen:
+ /// WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ /// SimpleReduction Emit reduction operation only. Used for omp simd
+ /// directive on the host.
+ /// ReductionKind The kind of reduction to perform.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps,
- bool WithNowait, bool SimpleReduction);
+ ReductionOptionsTy Options);
/// \brief Emit code for 'taskwait' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
@@ -991,7 +1034,7 @@ public:
virtual bool emitTargetGlobalVariable(GlobalDecl GD);
/// \brief Emit the global \a GD if it is meaningful for the target. Returns
- /// if it was emitted succesfully.
+ /// if it was emitted successfully.
/// \param GD Global to scan.
virtual bool emitTargetGlobal(GlobalDecl GD);
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 6a6d832e33cd5..c3391d087b759 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -26,6 +26,11 @@ enum OpenMPRTLFunctionNVPTX {
OMPRTL_NVPTX__kmpc_kernel_init,
/// \brief Call to void __kmpc_kernel_deinit();
OMPRTL_NVPTX__kmpc_kernel_deinit,
+ /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ /// short RequiresOMPRuntime, short RequiresDataSharing);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_init,
+ /// \brief Call to void __kmpc_spmd_kernel_deinit();
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
/// \brief Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function);
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
@@ -39,6 +44,30 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL_NVPTX__kmpc_end_serialized_parallel,
+ /// \brief Call to int32_t __kmpc_shuffle_int32(int32_t element,
+ /// int16_t lane_offset, int16_t warp_size);
+ OMPRTL_NVPTX__kmpc_shuffle_int32,
+ /// \brief Call to int64_t __kmpc_shuffle_int64(int64_t element,
+ /// int16_t lane_offset, int16_t warp_size);
+ OMPRTL_NVPTX__kmpc_shuffle_int64,
+ /// \brief Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32
+ /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+ OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
+ /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
+ /// int32_t num_vars, size_t reduce_size, void *reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
+ /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
+ /// int32_t index, int32_t width),
+ /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t
+ /// index, int32_t width, int32_t reduce))
+ OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
+ /// \brief Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+ OMPRTL_NVPTX__kmpc_end_reduce_nowait
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -76,6 +105,47 @@ public:
CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
}
};
+
+// A class to track the execution mode when codegening directives within
+// a target region. The appropriate mode (generic/spmd) is set on entry
+// to the target region and used by containing directives such as 'parallel'
+// to emit optimized code.
+class ExecutionModeRAII {
+private:
+ CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
+
+public:
+ ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode,
+ CGOpenMPRuntimeNVPTX::ExecutionMode NewMode)
+ : Mode(Mode) {
+ SavedMode = Mode;
+ Mode = NewMode;
+ }
+ ~ExecutionModeRAII() { Mode = SavedMode; }
+};
+
+/// GPU Configuration: This information can be derived from cuda registers,
+/// however, providing compile time constants helps generate more efficient
+/// code. For all practical purposes this is fine because the configuration
+/// is the same for all known NVPTX architectures.
+enum MachineConfiguration : unsigned {
+ WarpSize = 32,
+ /// Number of bits required to represent a lane identifier, which is
+ /// computed as log_2(WarpSize).
+ LaneIDBits = 5,
+ LaneIDMask = WarpSize - 1,
+
+ /// Global memory alignment for performance.
+ GlobalMemoryAlignment = 256,
+};
+
+enum NamedBarrier : unsigned {
+ /// Synchronize on this barrier #ID using a named barrier primitive.
+ /// Only the subset of active threads in a parallel region arrive at the
+ /// barrier.
+ NB_Parallel = 1,
+};
} // anonymous namespace
/// Get the GPU warp size.
@@ -96,6 +166,23 @@ static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
llvm::None, "nvptx_tid");
}
+/// Get the id of the warp in the block.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
+}
+
+/// Get the id of the current lane in the Warp.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
+ "nvptx_lane_id");
+}
+
/// Get the maximum number of threads in a block of the GPU.
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
@@ -112,16 +199,37 @@ static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
}
+/// Get barrier #ID to synchronize selected (multiple of warp size) threads in
+/// a CTA.
+static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
+ llvm::Value *NumThreads) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
+ Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(),
+ llvm::Intrinsic::nvvm_barrier),
+ Args);
+}
+
/// Synchronize all GPU threads in a block.
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
+/// Synchronize worker threads in a parallel region.
+static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
+ return getNVPTXBarrier(CGF, NB_Parallel, NumThreads);
+}
+
/// Get the value of the thread_limit clause in the teams directive.
-/// The runtime encodes thread_limit in the launch parameter, always starting
-/// thread_limit+warpSize threads per team.
-static llvm::Value *getThreadLimit(CodeGenFunction &CGF) {
+/// For the 'generic' execution mode, the runtime encodes thread_limit in
+/// the launch parameters, always starting thread_limit+warpSize threads per
+/// CTA. The threads in the last warp are reserved for master execution.
+/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
+static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
+ bool IsInSpmdExecutionMode = false) {
CGBuilderTy &Bld = CGF.Builder;
- return Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
- "thread_limit");
+ return IsInSpmdExecutionMode
+ ? getNVPTXNumThreads(CGF)
+ : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
}
/// Get the thread id of the OMP master thread.
@@ -159,12 +267,34 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
}
+bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
+ return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
+}
+
+static CGOpenMPRuntimeNVPTX::ExecutionMode
+getExecutionModeForDirective(CodeGenModule &CGM,
+ const OMPExecutableDirective &D) {
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ switch (DirectiveKind) {
+ case OMPD_target:
+ case OMPD_target_teams:
+ return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
+ case OMPD_target_parallel:
+ return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
+ default:
+ llvm_unreachable("Unsupported directive on NVPTX device.");
+ }
+ llvm_unreachable("Unsupported directive on NVPTX device.");
+}
+
void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode,
+ CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
EntryFunctionState EST;
WorkerFunctionState WST(CGM);
Work.clear();
@@ -252,6 +382,94 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
EST.ExitBB = nullptr;
}
+void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) {
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode,
+ CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd);
+ EntryFunctionState EST;
+
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ CGOpenMPRuntimeNVPTX &RT;
+ CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
+ const OMPExecutableDirective &D;
+
+ public:
+ NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
+ CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+ const OMPExecutableDirective &D)
+ : RT(RT), EST(EST), D(D) {}
+ void Enter(CodeGenFunction &CGF) override {
+ RT.emitSpmdEntryHeader(CGF, EST, D);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ RT.emitSpmdEntryFooter(CGF, EST);
+ }
+ } Action(*this, EST, D);
+ CodeGen.setAction(Action);
+ emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+ IsOffloadEntry, CodeGen);
+ return;
+}
+
+void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
+ CodeGenFunction &CGF, EntryFunctionState &EST,
+ const OMPExecutableDirective &D) {
+ auto &Bld = CGF.Builder;
+
+ // Setup BBs in entry function.
+ llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
+ EST.ExitBB = CGF.createBasicBlock(".exit");
+
+ // Initialize the OMP state in the runtime; called by all active threads.
+ // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
+ // based on code analysis of the target region.
+ llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true),
+ /*RequiresOMPRuntime=*/Bld.getInt16(1),
+ /*RequiresDataSharing=*/Bld.getInt16(1)};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+ CGF.EmitBranch(ExecuteBB);
+
+ CGF.EmitBlock(ExecuteBB);
+}
+
+void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
+ EntryFunctionState &EST) {
+ if (!EST.ExitBB)
+ EST.ExitBB = CGF.createBasicBlock(".exit");
+
+ llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
+ CGF.EmitBranch(OMPDeInitBB);
+
+ CGF.EmitBlock(OMPDeInitBB);
+ // DeInitialize the OMP state in the runtime; called by all active threads.
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
+ CGF.EmitBranch(EST.ExitBB);
+
+ CGF.EmitBlock(EST.ExitBB);
+ EST.ExitBB = nullptr;
+}
+
+// Create a unique global variable to indicate the execution mode of this target
+// region. The execution mode is either 'generic', or 'spmd' depending on the
+// target directive. This variable is picked up by the offload library to setup
+// the device appropriately before kernel launch. If the execution mode is
+// 'generic', the runtime reserves one warp for the master, otherwise, all
+// warps participate in parallel work.
+static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
+ CGOpenMPRuntimeNVPTX::ExecutionMode Mode) {
+ (void)new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode"));
+}
+
void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
auto &Ctx = CGM.getContext();
@@ -385,6 +603,22 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
break;
}
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
+ // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ // short RequiresOMPRuntime, short RequiresDataSharing);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
+ // Build void __kmpc_spmd_kernel_deinit();
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
+ break;
+ }
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
/// Build void __kmpc_kernel_prepare_parallel(
/// void *outlined_function);
@@ -428,6 +662,103 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
}
+ case OMPRTL_NVPTX__kmpc_shuffle_int32: {
+ // Build int32_t __kmpc_shuffle_int32(int32_t element,
+ // int16_t lane_offset, int16_t warp_size);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_shuffle_int64: {
+ // Build int64_t __kmpc_shuffle_int64(int64_t element,
+ // int16_t lane_offset, int16_t warp_size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: {
+ // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid,
+ // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t Algorithm Version),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
+ // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
+ // int32_t num_vars, size_t reduce_size, void *reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t shortCircuit),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
+ // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
+ // int32_t index, int32_t width),
+ // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad,
+ // int32_t index, int32_t width, int32_t reduce))
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy,
+ CGM.Int32Ty, CGM.Int32Ty};
+ auto *CopyToScratchpadFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *LoadReduceTypeParams[] = {
+ CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty};
+ auto *LoadReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo(),
+ CopyToScratchpadFnTy->getPointerTo(),
+ LoadReduceFnTy->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+ break;
+ }
}
return RTLFn;
}
@@ -463,39 +794,74 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
- emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
- CodeGen);
+ CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
+ getExecutionModeForDirective(CGM, D);
+ switch (Mode) {
+ case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
+ emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+ break;
+ case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd:
+ emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+ break;
+ case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown:
+ llvm_unreachable(
+ "Unknown programming model for OpenMP directive on NVPTX target.");
+ }
+
+ setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM) {
+ : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
}
+void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) {
+ // Do nothing in case of Spmd mode and L0 parallel.
+ // TODO: If in Spmd mode and L1 parallel emit the clause.
+ if (isInSpmdExecutionMode())
+ return;
+
+ CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+}
+
+void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
+ llvm::Value *NumThreads,
+ SourceLocation Loc) {
+ // Do nothing in case of Spmd mode and L0 parallel.
+ // TODO: If in Spmd mode and L1 parallel emit the clause.
+ if (isInSpmdExecutionMode())
+ return;
+
+ CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
+}
+
void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
const Expr *NumTeams,
const Expr *ThreadLimit,
SourceLocation Loc) {}
-llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
+llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar,
+ InnermostKind, CodeGen);
+}
- llvm::Function *OutlinedFun = nullptr;
- if (isa<OMPTeamsDirective>(D)) {
- llvm::Value *OutlinedFunVal =
- CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- D, ThreadIDVar, InnermostKind, CodeGen);
- OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
- OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
- OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
- } else {
- llvm::Value *OutlinedFunVal =
- CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- D, ThreadIDVar, InnermostKind, CodeGen);
- OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
- }
+llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+ llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
+ D, ThreadIDVar, InnermostKind, CodeGen);
+ llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
+ OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
return OutlinedFun;
}
@@ -525,7 +891,10 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall(
if (!CGF.HaveInsertPoint())
return;
- emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+ if (isInSpmdExecutionMode())
+ emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+ else
+ emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
}
void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
@@ -533,8 +902,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
- auto &&L0ParallelGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) {
CGBuilderTy &Bld = CGF.Builder;
// Prepare for parallel region. Indicate the outlined function.
@@ -565,8 +933,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
PrePostActionTy &) {
- auto &&CodeGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
@@ -596,3 +964,1289 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
ThenRCG(CGF);
}
}
+
+void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
+ // Just call the outlined function to execute the parallel region.
+ // OutlinedFn(&GTid, &zero, CapturedStruct);
+ //
+ // TODO: Do something with IfCond when support for the 'if' clause
+ // is added on Spmd target directives.
+ llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+ OutlinedFnArgs.push_back(
+ llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ OutlinedFnArgs.push_back(
+ llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+ CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+}
+
+/// This function creates calls to one of two shuffle functions to copy
+/// variables between lanes in a warp.
+static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
+ QualType ElemTy,
+ llvm::Value *Elem,
+ llvm::Value *Offset) {
+ auto &CGM = CGF.CGM;
+ auto &C = CGM.getContext();
+ auto &Bld = CGF.Builder;
+ CGOpenMPRuntimeNVPTX &RT =
+ *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
+
+ unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
+ assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
+
+ OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
+ ? OMPRTL_NVPTX__kmpc_shuffle_int32
+ : OMPRTL_NVPTX__kmpc_shuffle_int64;
+
+ // Cast all types to 32- or 64-bit values before calling shuffle routines.
+ auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
+ auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
+ auto *WarpSize = CGF.EmitScalarConversion(
+ getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
+ C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+
+ auto *ShuffledVal =
+ CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
+ {ElemCast, Offset, WarpSize});
+
+ return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+}
+
+namespace {
+enum CopyAction : unsigned {
+ // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
+ // the warp using shuffle instructions.
+ RemoteLaneToThread,
+ // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
+ ThreadCopy,
+ // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
+ ThreadToScratchpad,
+ // ScratchpadToThread: Copy from a scratchpad array in global memory
+ // containing team-reduced data to a thread's stack.
+ ScratchpadToThread,
+};
+} // namespace
+
+struct CopyOptionsTy {
+ llvm::Value *RemoteLaneOffset;
+ llvm::Value *ScratchpadIndex;
+ llvm::Value *ScratchpadWidth;
+};
+
+/// Emit instructions to copy a Reduce list, which contains partially
+/// aggregated values, in the specified direction.
+static void emitReductionListCopy(
+ CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy,
+ ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase,
+ CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
+
+ auto &CGM = CGF.CGM;
+ auto &C = CGM.getContext();
+ auto &Bld = CGF.Builder;
+
+ auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
+ auto *ScratchpadIndex = CopyOptions.ScratchpadIndex;
+ auto *ScratchpadWidth = CopyOptions.ScratchpadWidth;
+
+ // Iterates, element-by-element, through the source Reduce list and
+ // make a copy.
+ unsigned Idx = 0;
+ unsigned Size = Privates.size();
+ for (auto &Private : Privates) {
+ Address SrcElementAddr = Address::invalid();
+ Address DestElementAddr = Address::invalid();
+ Address DestElementPtrAddr = Address::invalid();
+ // Should we shuffle in an element from a remote lane?
+ bool ShuffleInElement = false;
+ // Set to true to update the pointer in the dest Reduce list to a
+ // newly created element.
+ bool UpdateDestListPtr = false;
+ // Increment the src or dest pointer to the scratchpad, for each
+ // new element.
+ bool IncrScratchpadSrc = false;
+ bool IncrScratchpadDest = false;
+
+ switch (Action) {
+ case RemoteLaneToThread: {
+ // Step 1.1: Get the address for the src element in the Reduce list.
+ Address SrcElementPtrAddr =
+ Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
+ llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
+ SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ SrcElementAddr =
+ Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+
+ // Step 1.2: Create a temporary to store the element in the destination
+ // Reduce list.
+ DestElementPtrAddr =
+ Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
+ DestElementAddr =
+ CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+ ShuffleInElement = true;
+ UpdateDestListPtr = true;
+ break;
+ }
+ case ThreadCopy: {
+ // Step 1.1: Get the address for the src element in the Reduce list.
+ Address SrcElementPtrAddr =
+ Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
+ llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
+ SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ SrcElementAddr =
+ Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+
+ // Step 1.2: Get the address for dest element. The destination
+ // element has already been created on the thread's stack.
+ DestElementPtrAddr =
+ Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
+ llvm::Value *DestElementPtr =
+ CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation());
+ Address DestElemAddr =
+ Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
+ DestElementAddr = Bld.CreateElementBitCast(
+ DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+ break;
+ }
+ case ThreadToScratchpad: {
+ // Step 1.1: Get the address for the src element in the Reduce list.
+ Address SrcElementPtrAddr =
+ Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
+ llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
+ SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ SrcElementAddr =
+ Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+
+ // Step 1.2: Get the address for dest element:
+ // address = base + index * ElementSizeInChars.
+ unsigned ElementSizeInChars =
+ C.getTypeSizeInChars(Private->getType()).getQuantity();
+ auto *CurrentOffset =
+ Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
+ ScratchpadIndex);
+ auto *ScratchPadElemAbsolutePtrVal =
+ Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
+ ScratchPadElemAbsolutePtrVal =
+ Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+ Address ScratchpadPtr =
+ Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
+ DestElementAddr = Bld.CreateElementBitCast(
+ ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+ IncrScratchpadDest = true;
+ break;
+ }
+ case ScratchpadToThread: {
+ // Step 1.1: Get the address for the src element in the scratchpad.
+ // address = base + index * ElementSizeInChars.
+ unsigned ElementSizeInChars =
+ C.getTypeSizeInChars(Private->getType()).getQuantity();
+ auto *CurrentOffset =
+ Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
+ ScratchpadIndex);
+ auto *ScratchPadElemAbsolutePtrVal =
+ Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset);
+ ScratchPadElemAbsolutePtrVal =
+ Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+ SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
+ IncrScratchpadSrc = true;
+
+ // Step 1.2: Create a temporary to store the element in the destination
+ // Reduce list.
+ DestElementPtrAddr =
+ Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
+ DestElementAddr =
+ CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+ UpdateDestListPtr = true;
+ break;
+ }
+ }
+
+ // Regardless of src and dest of copy, we emit the load of src
+ // element as this is required in all directions
+ SrcElementAddr = Bld.CreateElementBitCast(
+ SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
+ llvm::Value *Elem =
+ CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
+ Private->getType(), SourceLocation());
+
+ // Now that all active lanes have read the element in the
+ // Reduce list, shuffle over the value from the remote lane.
+ if (ShuffleInElement) {
+ Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
+ RemoteLaneOffset);
+ }
+
+ // Store the source element value to the dest element address.
+ CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
+ Private->getType());
+
+ // Step 3.1: Modify reference in dest Reduce list as needed.
+ // Modifying the reference in Reduce list to point to the newly
+ // created element. The element is live in the current function
+ // scope and that of functions it invokes (i.e., reduce_function).
+ // RemoteReduceData[i] = (void*)&RemoteElem
+ if (UpdateDestListPtr) {
+ CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast(
+ DestElementAddr.getPointer(), CGF.VoidPtrTy),
+ DestElementPtrAddr, /*Volatile=*/false,
+ C.VoidPtrTy);
+ }
+
+ // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
+ // address of the next element in scratchpad memory, unless we're currently
+ // processing the last one. Memory alignment is also taken care of here.
+ if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
+ llvm::Value *ScratchpadBasePtr =
+ IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
+ unsigned ElementSizeInChars =
+ C.getTypeSizeInChars(Private->getType()).getQuantity();
+ ScratchpadBasePtr = Bld.CreateAdd(
+ ScratchpadBasePtr,
+ Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get(
+ CGM.SizeTy, ElementSizeInChars)));
+
+ // Take care of global memory alignment for performance
+ ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateSDiv(
+ ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+ ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateMul(
+ ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+
+ if (IncrScratchpadDest)
+ DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+ else /* IncrScratchpadSrc = true */
+ SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+ }
+
+ Idx++;
+ }
+}
+
+/// This function emits a helper that loads data from the scratchpad array
+/// and (optionally) reduces it with the input operand.
+///
+/// load_and_reduce(local, scratchpad, index, width, should_reduce)
+/// reduce_data remote;
+/// for elem in remote:
+/// remote.elem = Scratchpad[elem_id][index]
+/// if (should_reduce)
+/// local = local @ remote
+/// else
+/// local = remote
+llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy,
+ llvm::Value *ReduceFn) {
+ auto &C = CGM.getContext();
+ auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+
+ // Destination of the copy.
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.VoidPtrTy);
+ // Base address of the scratchpad array, with each element storing a
+ // Reduce list per team.
+ ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.VoidPtrTy);
+ // A source index into the scratchpad array.
+ ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, Int32Ty);
+ // Row width of an element in the scratchpad array, typically
+ // the number of teams.
+ ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, Int32Ty);
+ // If should_reduce == 1, then it's load AND reduce,
+ // If should_reduce == 0 (or otherwise), then it only loads (+ copy).
+ // The latter case is used for initialization.
+ ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, Int32Ty);
+
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&ScratchPadArg);
+ Args.push_back(&IndexArg);
+ Args.push_back(&WidthArg);
+ Args.push_back(&ShouldReduceArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_load_and_reduce", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ // Get local Reduce list pointer.
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address ReduceListAddr(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
+ llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
+ AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+
+ Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
+ llvm::Value *IndexVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGM.SizeTy, /*isSigned=*/true);
+
+ Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
+ llvm::Value *WidthVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGM.SizeTy, /*isSigned=*/true);
+
+ Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg);
+ llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar(
+ AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation());
+
+ // The absolute ptr address to the base addr of the next element to copy.
+ llvm::Value *CumulativeElemBasePtr =
+ Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
+ Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
+
+ // Create a Remote Reduce list to store the elements read from the
+ // scratchpad array.
+ Address RemoteReduceList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list");
+
+ // Assemble remote Reduce list from scratchpad array.
+ emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates,
+ SrcDataAddr, RemoteReduceList,
+ {/*RemoteLaneOffset=*/nullptr,
+ /*ScratchpadIndex=*/IndexVal,
+ /*ScratchpadWidth=*/WidthVal});
+
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+ auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1));
+ Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
+
+ CGF.EmitBlock(ThenBB);
+ // We should reduce with the local Reduce list.
+ // reduce_function(LocalReduceList, RemoteReduceList)
+ llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ ReduceListAddr.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ RemoteReduceList.getPointer(), CGF.VoidPtrTy);
+ CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr});
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ // No reduction; just copy:
+ // Local Reduce list = Remote Reduce list.
+ emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
+ RemoteReduceList, ReduceListAddr);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// This function emits a helper that stores reduced data from the team
+/// master to a scratchpad array in global memory.
+///
+/// for elem in Reduce List:
+/// scratchpad[elem_id][index] = elem
+///
+llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy) {
+
+ auto &C = CGM.getContext();
+ auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+
+ // Source of the copy.
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.VoidPtrTy);
+ // Base address of the scratchpad array, with each element storing a
+ // Reduce list per team.
+ ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.VoidPtrTy);
+ // A destination index into the scratchpad array, typically the team
+ // identifier.
+ ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, Int32Ty);
+ // Row width of an element in the scratchpad array, typically
+ // the number of teams.
+ ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, Int32Ty);
+
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&ScratchPadArg);
+ Args.push_back(&IndexArg);
+ Args.push_back(&WidthArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_copy_to_scratchpad", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address SrcDataAddr(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
+ llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
+ AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+
+ Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
+ llvm::Value *IndexVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGF.SizeTy, /*isSigned=*/true);
+
+ Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
+ llvm::Value *WidthVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGF.SizeTy, /*isSigned=*/true);
+
+ // The absolute ptr address to the base addr of the next element to copy.
+ llvm::Value *CumulativeElemBasePtr =
+ Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
+ Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
+
+ emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates,
+ SrcDataAddr, DestDataAddr,
+ {/*RemoteLaneOffset=*/nullptr,
+ /*ScratchpadIndex=*/IndexVal,
+ /*ScratchpadWidth=*/WidthVal});
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// This function emits a helper that gathers Reduce lists from the first
+/// lane of every active warp to lanes in the first warp.
+///
+/// void inter_warp_copy_func(void* reduce_data, num_warps)
+/// shared smem[warp_size];
+/// For all data entries D in reduce_data:
+/// If (I am the first lane in each warp)
+/// Copy my local D to smem[warp_id]
+/// sync
+/// if (I am the first warp)
+/// Copy smem[thread_id] to my local D
+/// sync
+static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy) {
+ auto &C = CGM.getContext();
+ auto &M = CGM.getModule();
+
+ // ReduceList: thread local Reduce list.
+ // At the stage of the computation when this function is called, partially
+ // aggregated values reside in the first lane of every active warp.
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.VoidPtrTy);
+ // NumWarps: number of warps active in the parallel region. This could
+ // be smaller than 32 (max warps in a CTA) for partial block reduction.
+ ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr,
+ C.getIntTypeForBitwidth(32, /* Signed */ true));
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&NumWarpsArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_inter_warp_copy_func", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ // This array is used as a medium to transfer, one reduce element at a time,
+ // the data from the first lane of every warp to lanes in the first warp
+ // in order to perform the final step of a reduction in a parallel region
+ // (reduction across warps). The array is placed in NVPTX __shared__ memory
+ // for reduced latency, as well as to have a distinct copy for concurrently
+ // executing target regions. The array is declared with common linkage so
+ // as to be shared across compilation units.
+ const char *TransferMediumName =
+ "__openmp_nvptx_data_transfer_temporary_storage";
+ llvm::GlobalVariable *TransferMedium =
+ M.getGlobalVariable(TransferMediumName);
+ if (!TransferMedium) {
+ auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize);
+ unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
+ TransferMedium = new llvm::GlobalVariable(
+ M, Ty,
+ /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
+ llvm::Constant::getNullValue(Ty), TransferMediumName,
+ /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+ SharedAddressSpace);
+ }
+
+ // Get the CUDA thread id of the current OpenMP thread on the GPU.
+ auto *ThreadID = getNVPTXThreadID(CGF);
+ // nvptx_lane_id = nvptx_id % warpsize
+ auto *LaneID = getNVPTXLaneID(CGF);
+ // nvptx_warp_id = nvptx_id / warpsize
+ auto *WarpID = getNVPTXWarpID(CGF);
+
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address LocalReduceList(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ unsigned Idx = 0;
+ for (auto &Private : Privates) {
+ //
+ // Warp master copies reduce element to transfer medium in __shared__
+ // memory.
+ //
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+ // if (lane_id == 0)
+ auto IsWarpMaster =
+ Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master");
+ Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
+ CGF.EmitBlock(ThenBB);
+
+ // Reduce element = LocalReduceList[i]
+ Address ElemPtrPtrAddr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+ ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ // elemptr = (type[i]*)(elemptrptr)
+ Address ElemPtr =
+ Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ ElemPtr = Bld.CreateElementBitCast(
+ ElemPtr, CGF.ConvertTypeForMem(Private->getType()));
+ // elem = *elemptr
+ llvm::Value *Elem = CGF.EmitLoadOfScalar(
+ ElemPtr, /*Volatile=*/false, Private->getType(), SourceLocation());
+
+ // Get pointer to location in transfer medium.
+ // MediumPtr = &medium[warp_id]
+ llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
+ Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType()));
+ // Casting to actual data type.
+ // MediumPtr = (type[i]*)MediumPtrAddr;
+ MediumPtr = Bld.CreateElementBitCast(
+ MediumPtr, CGF.ConvertTypeForMem(Private->getType()));
+
+ //*MediumPtr = elem
+ Bld.CreateStore(Elem, MediumPtr);
+
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+ llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+ AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation());
+
+ auto *NumActiveThreads = Bld.CreateNSWMul(
+ NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
+ // named_barrier_sync(ParallelBarrierID, num_active_threads)
+ syncParallelThreads(CGF, NumActiveThreads);
+
+ //
+ // Warp 0 copies reduce element from transfer medium.
+ //
+ llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+
+ // Up to 32 threads in warp 0 are active.
+ auto IsActiveThread =
+ Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
+ Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
+
+ CGF.EmitBlock(W0ThenBB);
+
+ // SrcMediumPtr = &medium[tid]
+ llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
+ Address SrcMediumPtr(SrcMediumPtrVal,
+ C.getTypeAlignInChars(Private->getType()));
+ // SrcMediumVal = *SrcMediumPtr;
+ SrcMediumPtr = Bld.CreateElementBitCast(
+ SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType()));
+ llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar(
+ SrcMediumPtr, /*Volatile=*/false, Private->getType(), SourceLocation());
+
+ // TargetElemPtr = (type[i]*)(SrcDataAddr[i])
+ Address TargetElemPtrPtr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
+ TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ Address TargetElemPtr =
+ Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType()));
+ TargetElemPtr = Bld.CreateElementBitCast(
+ TargetElemPtr, CGF.ConvertTypeForMem(Private->getType()));
+
+ // *TargetElemPtr = SrcMediumVal;
+ CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
+ Private->getType());
+ Bld.CreateBr(W0MergeBB);
+
+ CGF.EmitBlock(W0ElseBB);
+ Bld.CreateBr(W0MergeBB);
+
+ CGF.EmitBlock(W0MergeBB);
+
+ // While warp 0 copies values from transfer medium, all other warps must
+ // wait.
+ syncParallelThreads(CGF, NumActiveThreads);
+ Idx++;
+ }
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emit a helper that reduces data across two OpenMP threads (lanes)
+/// in the same warp. It uses shuffle instructions to copy over data from
+/// a remote lane's stack. The reduction algorithm performed is specified
+/// by the fourth parameter.
+///
+/// Algorithm Versions.
+/// Full Warp Reduce (argument value 0):
+/// This algorithm assumes that all 32 lanes are active and gathers
+/// data from these 32 lanes, producing a single resultant value.
+/// Contiguous Partial Warp Reduce (argument value 1):
+/// This algorithm assumes that only a *contiguous* subset of lanes
+/// are active. This happens for the last warp in a parallel region
+/// when the user specified num_threads is not an integer multiple of
+/// 32. This contiguous subset always starts with the zeroth lane.
+/// Partial Warp Reduce (argument value 2):
+/// This algorithm gathers data from any number of lanes at any position.
+/// All reduced values are stored in the lowest possible lane. The set
+/// of problems every algorithm addresses is a super set of those
+/// addressable by algorithms with a lower version number. Overhead
+/// increases as algorithm version increases.
+///
+/// Terminology
+/// Reduce element:
+/// Reduce element refers to the individual data field with primitive
+/// data types to be combined and reduced across threads.
+/// Reduce list:
+/// Reduce list refers to a collection of local, thread-private
+/// reduce elements.
+/// Remote Reduce list:
+/// Remote Reduce list refers to a collection of remote (relative to
+/// the current thread) reduce elements.
+///
+/// We distinguish between three states of threads that are important to
+/// the implementation of this function.
+/// Alive threads:
+/// Threads in a warp executing the SIMT instruction, as distinguished from
+/// threads that are inactive due to divergent control flow.
+/// Active threads:
+/// The minimal set of threads that has to be alive upon entry to this
+/// function. The computation is correct iff active threads are alive.
+/// Some threads are alive but they are not active because they do not
+/// contribute to the computation in any useful manner. Turning them off
+/// may introduce control flow overheads without any tangible benefits.
+/// Effective threads:
+/// In order to comply with the argument requirements of the shuffle
+/// function, we must keep all lanes holding data alive. But at most
+/// half of them perform value aggregation; we refer to this half of
+/// threads as effective. The other half is simply handing off their
+/// data.
+///
+/// Procedure
+/// Value shuffle:
+/// In this step active threads transfer data from higher lane positions
+/// in the warp to lower lane positions, creating Remote Reduce list.
+/// Value aggregation:
+/// In this step, effective threads combine their thread local Reduce list
+/// with Remote Reduce list and store the result in the thread local
+/// Reduce list.
+/// Value copy:
+/// In this step, we deal with the assumption made by algorithm 2
+/// (i.e. contiguity assumption). When we have an odd number of lanes
+/// active, say 2k+1, only k threads will be effective and therefore k
+/// new values will be produced. However, the Reduce list owned by the
+/// (2k+1)th thread is ignored in the value aggregation. Therefore
+/// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
+/// that the contiguity assumption still holds.
+static llvm::Value *
+emitShuffleAndReduceFunction(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+ auto &C = CGM.getContext();
+
+ // Thread local Reduce list used to host the values of data to be reduced.
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.VoidPtrTy);
+ // Current lane id; could be logical.
+ ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.ShortTy);
+ // Offset of the remote source lane relative to the current lane.
+ ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.ShortTy);
+ // Algorithm version. This is expected to be known at compile time.
+ ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, C.ShortTy);
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&LaneIDArg);
+ Args.push_back(&RemoteLaneOffsetArg);
+ Args.push_back(&AlgoVerArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address LocalReduceList(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg);
+ llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar(
+ AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+ Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg);
+ llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar(
+ AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+ Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg);
+ llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar(
+ AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+ // Create a local thread-private variable to host the Reduce list
+ // from a remote lane.
+ Address RemoteReduceList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list");
+
+ // This loop iterates through the list of reduce elements and copies,
+ // element by element, from a remote lane in the warp to RemoteReduceList,
+ // hosted on the thread's stack.
+ emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates,
+ LocalReduceList, RemoteReduceList,
+ {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal,
+ /*ScratchpadIndex=*/nullptr,
+ /*ScratchpadWidth=*/nullptr});
+
+ // The actions to be performed on the Remote Reduce list is dependent
+ // on the algorithm version.
+ //
+ // if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
+ // LaneId % 2 == 0 && Offset > 0):
+ // do the reduction value aggregation
+ //
+ // The thread local variable Reduce list is mutated in place to host the
+ // reduced data, which is the aggregated value produced from local and
+ // remote lanes.
+ //
+ // Note that AlgoVer is expected to be a constant integer known at compile
+ // time.
+ // When AlgoVer==0, the first conjunction evaluates to true, making
+ // the entire predicate true during compile time.
+ // When AlgoVer==1, the second conjunction has only the second part to be
+ // evaluated during runtime. Other conjunctions evaluates to false
+ // during compile time.
+ // When AlgoVer==2, the third conjunction has only the second part to be
+ // evaluated during runtime. Other conjunctions evaluates to false
+ // during compile time.
+ auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0));
+
+ auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+ auto CondAlgo1 = Bld.CreateAnd(
+ Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+ auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
+ auto CondAlgo2 = Bld.CreateAnd(
+ Algo2,
+ Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)),
+ Bld.getInt16(0)));
+ CondAlgo2 = Bld.CreateAnd(
+ CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
+
+ auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
+ CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
+
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+ Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
+
+ CGF.EmitBlock(ThenBB);
+ // reduce_function(LocalReduceList, RemoteReduceList)
+ llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ LocalReduceList.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ RemoteReduceList.getPointer(), CGF.VoidPtrTy);
+ CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
+ // Reduce list.
+ Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+ auto CondCopy = Bld.CreateAnd(
+ Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+ llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont");
+ Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
+
+ CGF.EmitBlock(CpyThenBB);
+ emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
+ RemoteReduceList, LocalReduceList);
+ Bld.CreateBr(CpyMergeBB);
+
+ CGF.EmitBlock(CpyElseBB);
+ Bld.CreateBr(CpyMergeBB);
+
+ CGF.EmitBlock(CpyMergeBB);
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+///
+/// Design of OpenMP reductions on the GPU
+///
+/// Consider a typical OpenMP program with one or more reduction
+/// clauses:
+///
+/// float foo;
+/// double bar;
+/// #pragma omp target teams distribute parallel for \
+/// reduction(+:foo) reduction(*:bar)
+/// for (int i = 0; i < N; i++) {
+/// foo += A[i]; bar *= B[i];
+/// }
+///
+/// where 'foo' and 'bar' are reduced across all OpenMP threads in
+/// all teams. In our OpenMP implementation on the NVPTX device an
+/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
+/// within a team are mapped to CUDA threads within a threadblock.
+/// Our goal is to efficiently aggregate values across all OpenMP
+/// threads such that:
+///
+/// - the compiler and runtime are logically concise, and
+/// - the reduction is performed efficiently in a hierarchical
+/// manner as follows: within OpenMP threads in the same warp,
+/// across warps in a threadblock, and finally across teams on
+/// the NVPTX device.
+///
+/// Introduction to Decoupling
+///
+/// We would like to decouple the compiler and the runtime so that the
+/// latter is ignorant of the reduction variables (number, data types)
+/// and the reduction operators. This allows a simpler interface
+/// and implementation while still attaining good performance.
+///
+/// Pseudocode for the aforementioned OpenMP program generated by the
+/// compiler is as follows:
+///
+/// 1. Create private copies of reduction variables on each OpenMP
+/// thread: 'foo_private', 'bar_private'
+/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
+/// to it and writes the result in 'foo_private' and 'bar_private'
+/// respectively.
+/// 3. Call the OpenMP runtime on the GPU to reduce within a team
+/// and store the result on the team master:
+///
+/// __kmpc_nvptx_parallel_reduce_nowait(...,
+/// reduceData, shuffleReduceFn, interWarpCpyFn)
+///
+/// where:
+/// struct ReduceData {
+/// double *foo;
+/// double *bar;
+/// } reduceData
+/// reduceData.foo = &foo_private
+/// reduceData.bar = &bar_private
+///
+/// 'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
+/// auxiliary functions generated by the compiler that operate on
+/// variables of type 'ReduceData'. They aid the runtime perform
+/// algorithmic steps in a data agnostic manner.
+///
+/// 'shuffleReduceFn' is a pointer to a function that reduces data
+/// of type 'ReduceData' across two OpenMP threads (lanes) in the
+/// same warp. It takes the following arguments as input:
+///
+/// a. variable of type 'ReduceData' on the calling lane,
+/// b. its lane_id,
+/// c. an offset relative to the current lane_id to generate a
+/// remote_lane_id. The remote lane contains the second
+/// variable of type 'ReduceData' that is to be reduced.
+/// d. an algorithm version parameter determining which reduction
+/// algorithm to use.
+///
+/// 'shuffleReduceFn' retrieves data from the remote lane using
+/// efficient GPU shuffle intrinsics and reduces, using the
+/// algorithm specified by the 4th parameter, the two operands
+/// element-wise. The result is written to the first operand.
+///
+/// Different reduction algorithms are implemented in different
+/// runtime functions, all calling 'shuffleReduceFn' to perform
+/// the essential reduction step. Therefore, based on the 4th
+/// parameter, this function behaves slightly differently to
+/// cooperate with the runtime to ensure correctness under
+/// different circumstances.
+///
+/// 'InterWarpCpyFn' is a pointer to a function that transfers
+/// reduced variables across warps. It tunnels, through CUDA
+/// shared memory, the thread-private data of type 'ReduceData'
+/// from lane 0 of each warp to a lane in the first warp.
+/// 4. Call the OpenMP runtime on the GPU to reduce across teams.
+/// The last team writes the global reduced value to memory.
+///
+/// ret = __kmpc_nvptx_teams_reduce_nowait(...,
+/// reduceData, shuffleReduceFn, interWarpCpyFn,
+/// scratchpadCopyFn, loadAndReduceFn)
+///
+/// 'scratchpadCopyFn' is a helper that stores reduced
+/// data from the team master to a scratchpad array in
+/// global memory.
+///
+/// 'loadAndReduceFn' is a helper that loads data from
+/// the scratchpad array and reduces it with the input
+/// operand.
+///
+/// These compiler generated functions hide address
+/// calculation and alignment information from the runtime.
+/// 5. if ret == 1:
+/// The team master of the last team stores the reduced
+/// result to the globals in memory.
+/// foo += reduceData.foo; bar *= reduceData.bar
+///
+///
+/// Warp Reduction Algorithms
+///
+/// On the warp level, we have three algorithms implemented in the
+/// OpenMP runtime depending on the number of active lanes:
+///
+/// Full Warp Reduction
+///
+/// The reduce algorithm within a warp where all lanes are active
+/// is implemented in the runtime as follows:
+///
+/// full_warp_reduce(void *reduce_data,
+/// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+/// for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
+/// ShuffleReduceFn(reduce_data, 0, offset, 0);
+/// }
+///
+/// The algorithm completes in log(2, WARPSIZE) steps.
+///
+/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
+/// not used therefore we save instructions by not retrieving lane_id
+/// from the corresponding special registers. The 4th parameter, which
+/// represents the version of the algorithm being used, is set to 0 to
+/// signify full warp reduction.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// #reduce_elem refers to an element in the local lane's data structure
+/// #remote_elem is retrieved from a remote lane
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem;
+///
+/// Contiguous Partial Warp Reduction
+///
+/// This reduce algorithm is used within a warp where only the first
+/// 'n' (n <= WARPSIZE) lanes are active. It is typically used when the
+/// number of OpenMP threads in a parallel region is not a multiple of
+/// WARPSIZE. The algorithm is implemented in the runtime as follows:
+///
+/// void
+/// contiguous_partial_reduce(void *reduce_data,
+/// kmp_ShuffleReductFctPtr ShuffleReduceFn,
+/// int size, int lane_id) {
+/// int curr_size;
+/// int offset;
+/// curr_size = size;
+/// mask = curr_size/2;
+/// while (offset>0) {
+/// ShuffleReduceFn(reduce_data, lane_id, offset, 1);
+/// curr_size = (curr_size+1)/2;
+/// offset = curr_size/2;
+/// }
+/// }
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id < offset)
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+/// reduce_elem = remote_elem
+///
+/// This algorithm assumes that the data to be reduced are located in a
+/// contiguous subset of lanes starting from the first. When there is
+/// an odd number of active lanes, the data in the last lane is not
+/// aggregated with any other lane's dat but is instead copied over.
+///
+/// Dispersed Partial Warp Reduction
+///
+/// This algorithm is used within a warp when any discontiguous subset of
+/// lanes are active. It is used to implement the reduction operation
+/// across lanes in an OpenMP simd region or in a nested parallel region.
+///
+/// void
+/// dispersed_partial_reduce(void *reduce_data,
+/// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+/// int size, remote_id;
+/// int logical_lane_id = number_of_active_lanes_before_me() * 2;
+/// do {
+/// remote_id = next_active_lane_id_right_after_me();
+/// # the above function returns 0 of no active lane
+/// # is present right after the current lane.
+/// size = number_of_active_lanes_in_this_warp();
+/// logical_lane_id /= 2;
+/// ShuffleReduceFn(reduce_data, logical_lane_id,
+/// remote_id-1-threadIdx.x, 2);
+/// } while (logical_lane_id % 2 == 0 && size > 1);
+/// }
+///
+/// There is no assumption made about the initial state of the reduction.
+/// Any number of lanes (>=1) could be active at any position. The reduction
+/// result is returned in the first active lane.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id % 2 == 0 && offset > 0)
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+/// reduce_elem = remote_elem
+///
+///
+/// Intra-Team Reduction
+///
+/// This function, as implemented in the runtime call
+/// '__kmpc_nvptx_parallel_reduce_nowait', aggregates data across OpenMP
+/// threads in a team. It first reduces within a warp using the
+/// aforementioned algorithms. We then proceed to gather all such
+/// reduced values at the first warp.
+///
+/// The runtime makes use of the function 'InterWarpCpyFn', which copies
+/// data from each of the "warp master" (zeroth lane of each warp, where
+/// warp-reduced data is held) to the zeroth warp. This step reduces (in
+/// a mathematical sense) the problem of reduction across warp masters in
+/// a block to the problem of warp reduction.
+///
+///
+/// Inter-Team Reduction
+///
+/// Once a team has reduced its data to a single value, it is stored in
+/// a global scratchpad array. Since each team has a distinct slot, this
+/// can be done without locking.
+///
+/// The last team to write to the scratchpad array proceeds to reduce the
+/// scratchpad array. One or more workers in the last team use the helper
+/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
+/// the k'th worker reduces every k'th element.
+///
+/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait' to
+/// reduce across workers and compute a globally reduced value.
+///
+void CGOpenMPRuntimeNVPTX::emitReduction(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+ bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
+ // FIXME: Add support for simd reduction.
+ assert((TeamsReduction || ParallelReduction) &&
+ "Invalid reduction selection in emitReduction.");
+
+ auto &C = CGM.getContext();
+
+ // 1. Build a list of reduction variables.
+ // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+ auto Size = RHSExprs.size();
+ for (auto *E : Privates) {
+ if (E->getType()->isVariablyModifiedType())
+ // Reserve place for array size.
+ ++Size;
+ }
+ llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
+ QualType ReductionArrayTy =
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ Address ReductionList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+ auto IPriv = Privates.begin();
+ unsigned Idx = 0;
+ for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+ Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ Elem);
+ if ((*IPriv)->getType()->isVariablyModifiedType()) {
+ // Store array size.
+ ++Idx;
+ Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ llvm::Value *Size = CGF.Builder.CreateIntCast(
+ CGF.getVLASize(
+ CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+ .first,
+ CGF.SizeTy, /*isSigned=*/false);
+ CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+ Elem);
+ }
+ }
+
+ // 2. Emit reduce_func().
+ auto *ReductionFn = emitReductionFunction(
+ CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
+ LHSExprs, RHSExprs, ReductionOps);
+
+ // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
+ // RedList, shuffle_reduce_func, interwarp_copy_func);
+ auto *ThreadId = getThreadID(CGF, Loc);
+ auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ ReductionList.getPointer(), CGF.VoidPtrTy);
+
+ auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
+ CGM, Privates, ReductionArrayTy, ReductionFn);
+ auto *InterWarpCopyFn =
+ emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy);
+
+ llvm::Value *Res = nullptr;
+ if (ParallelReduction) {
+ llvm::Value *Args[] = {ThreadId,
+ CGF.Builder.getInt32(RHSExprs.size()),
+ ReductionArrayTySize,
+ RL,
+ ShuffleAndReduceFn,
+ InterWarpCopyFn};
+
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
+ Args);
+ }
+
+ if (TeamsReduction) {
+ auto *ScratchPadCopyFn =
+ emitCopyToScratchpad(CGM, Privates, ReductionArrayTy);
+ auto *LoadAndReduceFn = emitReduceScratchpadFunction(
+ CGM, Privates, ReductionArrayTy, ReductionFn);
+
+ llvm::Value *Args[] = {ThreadId,
+ CGF.Builder.getInt32(RHSExprs.size()),
+ ReductionArrayTySize,
+ RL,
+ ShuffleAndReduceFn,
+ InterWarpCopyFn,
+ ScratchPadCopyFn,
+ LoadAndReduceFn};
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait),
+ Args);
+ }
+
+ // 5. Build switch(res)
+ auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+ auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
+
+ // 6. Build case 1: where we have reduced values in the master
+ // thread in each team.
+ // __kmpc_end_reduce{_nowait}(<gtid>);
+ // break;
+ auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+ SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
+ CGF.EmitBlock(Case1BB);
+
+ // Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
+ llvm::Value *EndArgs[] = {ThreadId};
+ auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps,
+ this](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto IPriv = Privates.begin();
+ auto ILHS = LHSExprs.begin();
+ auto IRHS = RHSExprs.begin();
+ for (auto *E : ReductionOps) {
+ emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+ cast<DeclRefExpr>(*IRHS));
+ ++IPriv;
+ ++ILHS;
+ ++IRHS;
+ }
+ };
+ RegionCodeGenTy RCG(CodeGen);
+ NVPTXActionTy Action(
+ nullptr, llvm::None,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+ EndArgs);
+ RCG.setAction(Action);
+ RCG(CGF);
+ CGF.EmitBranch(DefaultBB);
+ CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 4010b46a4cbd6..ae25e94759e6a 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -43,6 +43,8 @@ private:
void createWorkerFunction(CodeGenModule &CGM);
};
+ bool isInSpmdExecutionMode() const;
+
/// \brief Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
@@ -58,11 +60,12 @@ private:
/// function.
void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
- /// \brief Returns specified OpenMP runtime function for the current OpenMP
- /// implementation. Specialized for the NVPTX device.
- /// \param Function OpenMP runtime function.
- /// \return Specified function.
- llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
+ /// \brief Helper for Spmd mode target directive's entry function.
+ void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+ const OMPExecutableDirective &D);
+
+ /// \brief Signal termination of Spmd mode execution.
+ void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
//
// Base class overrides.
@@ -87,6 +90,22 @@ private:
llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// \brief Emit outlined function specialized for the Single Program
+ /// Multiple Data programming model for applicable target directives on the
+ /// NVPTX device.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// \param CodeGen Object containing the target statements.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen);
+
/// \brief Emit outlined function for 'target' directive on the NVPTX
/// device.
/// \param D Directive to emit.
@@ -118,6 +137,22 @@ private:
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond);
+ /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// This call is for a parallel directive within an SPMD target directive.
+ /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
+ ///
+ void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars,
+ const Expr *IfCond);
+
protected:
/// \brief Get the function name of an outlined region.
// The name can be customized depending on the target.
@@ -129,6 +164,20 @@ protected:
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+ /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+ /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+ virtual void emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) override;
+
+ /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+ /// clause.
+ /// \param NumThreads An integer value of threads.
+ virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+ llvm::Value *NumThreads,
+ SourceLocation Loc) override;
+
/// \brief This function ought to emit, in the general case, a call to
// the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
// as these numbers are obtained through the PTX grid and block configuration.
@@ -138,7 +187,22 @@ public:
const Expr *ThreadLimit, SourceLocation Loc) override;
/// \brief Emits inlined function for the specified OpenMP parallel
- // directive but an inlined function for teams.
+ // directive.
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ llvm::Value *
+ emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// \brief Emits inlined function for the specified OpenMP teams
+ // directive.
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
@@ -147,10 +211,10 @@ public:
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
llvm::Value *
- emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
- const VarDecl *ThreadIDVar,
- OpenMPDirectiveKind InnermostKind,
- const RegionCodeGenTy &CodeGen) override;
+ emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
/// \brief Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
@@ -177,6 +241,50 @@ public:
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) override;
+
+ /// Emit a code for reduction clause.
+ ///
+ /// \param Privates List of private copies for original reduction arguments.
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ /// \param Options List of options for reduction codegen:
+ /// WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ /// SimpleReduction Emit reduction operation only. Used for omp simd
+ /// directive on the host.
+ /// ReductionKind The kind of reduction to perform.
+ virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps,
+ ReductionOptionsTy Options) override;
+
+ /// Returns specified OpenMP runtime function for the current OpenMP
+ /// implementation. Specialized for the NVPTX device.
+ /// \param Function OpenMP runtime function.
+ /// \return Specified function.
+ llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
+
+ /// Target codegen is specialized based on two programming models: the
+ /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd'
+ /// model for constructs like 'target parallel' that support it.
+ enum ExecutionMode {
+ /// Single Program Multiple Data.
+ Spmd,
+ /// Generic codegen to support fork-join model.
+ Generic,
+ Unknown,
+ };
+
+private:
+ // Track the execution mode when codegening directives within a target
+ // region. The appropriate mode (generic/spmd) is set on entry to the
+ // target region and used by containing directives such as 'parallel'
+ // to emit optimized code.
+ ExecutionMode CurrentExecutionMode;
};
} // CodeGen namespace.
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 8370607db50f9..0ebfd99363c1d 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -145,7 +145,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
EmitCoroutineBody(cast<CoroutineBodyStmt>(*S));
break;
case Stmt::CoreturnStmtClass:
- CGM.ErrorUnsupported(S, "coroutine");
+ EmitCoreturnStmt(cast<CoreturnStmt>(*S));
break;
case Stmt::CapturedStmtClass: {
const CapturedStmt *CS = cast<CapturedStmt>(S);
@@ -2127,16 +2127,16 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
/* IsAlignStack */ false, AsmDialect);
llvm::CallInst *Result = Builder.CreateCall(IA, Args);
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
// Attach readnone and readonly attributes.
if (!HasSideEffect) {
if (ReadNone)
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::ReadNone);
else if (ReadOnly)
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::ReadOnly);
}
@@ -2157,7 +2157,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Conservatively, mark all inline asm blocks in CUDA as convergent
// (meaning, they may call an intrinsically convergent op, such as bar.sync,
// and so can't have certain optimizations applied around them).
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::Convergent);
}
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 39e1cdfdbe2a1..22269e42c7a00 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -26,7 +26,7 @@ using namespace CodeGen;
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
-class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
+class OMPLexicalScope : public CodeGenFunction::LexicalScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
for (const auto *C : S.clauses()) {
if (auto *CPI = OMPClauseWithPreInit::get(C)) {
@@ -54,10 +54,11 @@ class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
public:
OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
- bool AsInlined = false)
+ bool AsInlined = false, bool EmitPreInitStmt = true)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
- emitPreInitStmt(CGF, S);
+ if (EmitPreInitStmt)
+ emitPreInitStmt(CGF, S);
if (AsInlined) {
if (S.hasAssociatedStmt()) {
auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
@@ -81,6 +82,38 @@ public:
}
};
+/// Lexical scope for OpenMP parallel construct, that handles correct codegen
+/// for captured expressions.
+class OMPParallelScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !isOpenMPTargetExecutionDirective(Kind) &&
+ isOpenMPParallelDirective(Kind);
+ }
+
+public:
+ OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
+/// Lexical scope for OpenMP teams construct, that handles correct codegen
+/// for captured expressions.
+class OMPTeamsScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !isOpenMPTargetExecutionDirective(Kind) &&
+ isOpenMPTeamsDirective(Kind);
+ }
+
+public:
+ OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
@@ -194,6 +227,17 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
return TmpAddr;
}
+static QualType getCanonicalParamType(ASTContext &C, QualType T) {
+ if (T->isLValueReferenceType()) {
+ return C.getLValueReferenceType(
+ getCanonicalParamType(C, T.getNonReferenceType()),
+ /*SpelledAsLValue=*/false);
+ }
+ if (T->isPointerType())
+ return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
+ return C.getCanonicalParamType(T);
+}
+
llvm::Function *
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
assert(
@@ -233,13 +277,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
II = &getContext().Idents.get("vla");
}
if (ArgType->isVariablyModifiedType()) {
- bool IsReference = ArgType->isLValueReferenceType();
ArgType =
- getContext().getCanonicalParamType(ArgType.getNonReferenceType());
- if (IsReference && !ArgType->isPointerType()) {
- ArgType = getContext().getLValueReferenceType(
- ArgType, /*SpelledAsLValue=*/false);
- }
+ getCanonicalParamType(getContext(), ArgType.getNonReferenceType());
}
Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
FD->getLocation(), II, ArgType));
@@ -986,7 +1025,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
OriginalBaseLValue);
// Store the address of the original variable associated with the LHS
// implicit variable.
- PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
+ PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address {
return OASELValueLB.getAddress();
});
// Emit reduction copy.
@@ -1040,9 +1079,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
*this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
// Store the address of the original variable associated with the LHS
// implicit variable.
- PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
- return ASELValue.getAddress();
- });
+ PrivateScope.addPrivate(
+ LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); });
// Emit reduction copy.
bool IsRegistered = PrivateScope.addPrivate(
OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
@@ -1158,7 +1196,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
- const OMPExecutableDirective &D) {
+ const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
if (!HaveInsertPoint())
return;
llvm::SmallVector<const Expr *, 8> Privates;
@@ -1174,14 +1212,15 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
}
if (HasAtLeastOneReduction) {
+ bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
+ isOpenMPParallelDirective(D.getDirectiveKind()) ||
+ D.getDirectiveKind() == OMPD_simd;
+ bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction(
*this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
- D.getSingleClause<OMPNowaitClause>() ||
- isOpenMPParallelDirective(D.getDirectiveKind()) ||
- D.getDirectiveKind() == OMPD_simd,
- D.getDirectiveKind() == OMPD_simd);
+ {WithNowait, SimpleReduction, ReductionKind});
}
}
@@ -1214,10 +1253,9 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
- emitParallelOrTeamsOutlinedFunction(S,
- *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
@@ -1239,7 +1277,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
}
}
- OMPLexicalScope Scope(CGF, S);
+ OMPParallelScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
@@ -1264,7 +1302,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
emitPostUpdateForReductionClause(
@@ -1677,7 +1715,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
// Emit final copy of the lastprivate variables at the end of loops.
if (HasLastprivateClause)
CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
emitPostUpdateForReductionClause(
CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -2003,15 +2041,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
});
}
-void CodeGenFunction::EmitOMPTargetTeamsDirective(
- const OMPTargetTeamsDirective &S) {
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
-}
-
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
const OMPTargetTeamsDistributeDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(
@@ -2222,7 +2251,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
}
- EmitOMPReductionClauseFinal(S);
+ EmitOMPReductionClauseFinal(
+ S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
+ ? /*Parallel and Simd*/ OMPD_parallel_for_simd
+ : /*Parallel only*/ OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
@@ -2320,8 +2352,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
- OK_Ordinary, S.getLocStart(),
- /*fpContractable=*/false);
+ OK_Ordinary, S.getLocStart(), FPOptions());
// Increment for loop counter.
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
S.getLocStart());
@@ -2397,7 +2428,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
};
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
@@ -2633,7 +2664,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
for (auto *IRef : C->varlists())
Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
- auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
+ auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs](
CodeGenFunction &CGF, PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
@@ -3250,7 +3281,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = XRValExpr->getType();
auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
- IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
+ IsPostfixUpdate](RValue XRValue) -> RValue {
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
RValue Res = CGF.EmitAnyExpr(UE);
@@ -3277,7 +3308,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = X->getType().getNonReferenceType();
ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
X->getType().getNonReferenceType(), Loc);
- auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
+ auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
NewVVal = XRValue;
return ExprRValue;
};
@@ -3404,41 +3435,24 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
}
-std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
-CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
- CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
- bool IsOffloadEntry) {
- llvm::Function *OutlinedFn = nullptr;
- llvm::Constant *OutlinedFnID = nullptr;
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
- OMPPrivateScope PrivateScope(CGF);
- (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- CGF.EmitOMPPrivateClause(S, PrivateScope);
- (void)PrivateScope.Privatize();
-
- Action.Enter(CGF);
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- };
- // Emit target region as a standalone region.
- CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
- S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
- return std::make_pair(OutlinedFn, OutlinedFnID);
-}
-
-void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ const RegionCodeGenTy &CodeGen) {
+ assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
+ CodeGenModule &CGM = CGF.CGM;
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
- llvm::SmallVector<llvm::Value *, 16> CapturedVars;
- GenerateOpenMPCapturedVars(CS, CapturedVars);
-
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
- // Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
-
- if (auto *C = S.getSingleClause<OMPIfClause>()) {
- IfCond = C->getCondition();
+ // Check for the at most one if clause associated with the target region.
+ for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_target) {
+ IfCond = C->getCondition();
+ break;
+ }
}
// Check if we have any device clause associated with the directive.
@@ -3453,43 +3467,76 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
bool IsOffloadEntry = true;
if (IfCond) {
bool Val;
- if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
+ if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
IsOffloadEntry = false;
}
if (CGM.getLangOpts().OMPTargetTriples.empty())
IsOffloadEntry = false;
- assert(CurFuncDecl && "No parent declaration for target region!");
+ assert(CGF.CurFuncDecl && "No parent declaration for target region!");
StringRef ParentName;
// In case we have Ctors/Dtors we use the complete type variant to produce
// the mangling of the device outlined kernel.
- if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
+ if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
- else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
+ else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
else
ParentName =
- CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
+ CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
- std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
- CGM, S, ParentName, IsOffloadEntry);
- OMPLexicalScope Scope(*this, S);
- CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
+ IsOffloadEntry, CodeGen);
+ OMPLexicalScope Scope(CGF, S);
+ llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
CapturedVars);
}
+static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
+ PrePostActionTy &Action) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+
+ Action.Enter(CGF);
+ CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+}
+
+void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
+ StringRef ParentName,
+ const OMPTargetDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
- emitParallelOrTeamsOutlinedFunction(S,
- *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
- const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
- const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
- const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
+ const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
+ const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
@@ -3498,7 +3545,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
S.getLocStart());
}
- OMPLexicalScope Scope(CGF, S);
+ OMPTeamsScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
@@ -3511,10 +3558,47 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
+ emitPostUpdateForReductionClause(
+ *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+}
+
+static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
+ const OMPTargetTeamsDirective &S) {
+ auto *CS = S.getCapturedStmt(OMPD_teams);
+ Action.Enter(CGF);
+ auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ };
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDirective(
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPCancellationPointDirective(
@@ -3740,9 +3824,47 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
+static void emitTargetParallelRegion(CodeGenFunction &CGF,
+ const OMPTargetParallelDirective &S,
+ PrePostActionTy &Action) {
+ // Get the captured statement associated with the 'parallel' region.
+ auto *CS = S.getCapturedStmt(OMPD_parallel);
+ Action.Enter(CGF);
+ auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
+ };
+ emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen);
+ emitPostUpdateForReductionClause(
+ CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetParallelDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetParallelRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
void CodeGenFunction::EmitOMPTargetParallelDirective(
const OMPTargetParallelDirective &S) {
- // TODO: codegen for target parallel.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetParallelRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPTargetParallelForDirective(
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 1a09830b52fd3..7b0c8bf7d6e9b 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -14,7 +14,7 @@
#include "CGCXXABI.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/RecordLayout.h"
#include "clang/CodeGen/CGFunctionInfo.h"
@@ -284,6 +284,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
if (isa<CXXDestructorDecl>(MD))
CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs);
+#ifndef NDEBUG
+ unsigned PrefixArgs = CallArgs.size() - 1;
+#endif
// Add the rest of the arguments.
for (const ParmVarDecl *PD : MD->parameters())
EmitDelegateCallArg(CallArgs, PD, SourceLocation());
@@ -292,7 +295,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
#ifndef NDEBUG
const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall(
- CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD));
+ CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD), PrefixArgs);
assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() &&
CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() &&
CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention());
@@ -380,8 +383,8 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
CGM.ConstructAttributeList(CalleePtr->getName(),
*CurFnInfo, MD, AttributeList,
CallingConv, /*AttrOnCallSite=*/true);
- llvm::AttributeSet Attrs =
- llvm::AttributeSet::get(getLLVMContext(), AttributeList);
+ llvm::AttributeList Attrs =
+ llvm::AttributeList::get(getLLVMContext(), AttributeList);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
@@ -744,9 +747,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
switch (keyFunction->getTemplateSpecializationKind()) {
case TSK_Undeclared:
case TSK_ExplicitSpecialization:
- assert((def || CodeGenOpts.OptimizationLevel > 0) &&
- "Shouldn't query vtable linkage without key function or "
- "optimizations");
+ assert((def || CodeGenOpts.OptimizationLevel > 0 ||
+ CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo) &&
+ "Shouldn't query vtable linkage without key function, "
+ "optimizations, or debug info");
if (!def && CodeGenOpts.OptimizationLevel > 0)
return llvm::GlobalVariable::AvailableExternallyLinkage;
@@ -942,7 +946,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
const VTableLayout &VTLayout) {
- if (!getCodeGenOpts().PrepareForLTO)
+ if (!getCodeGenOpts().LTOUnit)
return;
CharUnits PointerWidth =
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 4fbf9f22a98db..84248cc647199 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -36,7 +36,6 @@ add_clang_library(clangCodeGen
CGAtomic.cpp
CGBlocks.cpp
CGBuiltin.cpp
- CGCUDABuiltin.cpp
CGCUDANV.cpp
CGCUDARuntime.cpp
CGCXX.cpp
@@ -55,6 +54,7 @@ add_clang_library(clangCodeGen
CGExprComplex.cpp
CGExprConstant.cpp
CGExprScalar.cpp
+ CGGPUBuiltin.cpp
CGLoopInfo.cpp
CGObjC.cpp
CGObjCGNU.cpp
@@ -75,8 +75,10 @@ add_clang_library(clangCodeGen
CodeGenPGO.cpp
CodeGenTBAA.cpp
CodeGenTypes.cpp
+ ConstantInitBuilder.cpp
CoverageMappingGen.cpp
ItaniumCXXABI.cpp
+ MacroPPCallbacks.cpp
MicrosoftCXXABI.cpp
ModuleBuilder.cpp
ObjectFilePCHContainerOperations.cpp
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 5f74141d75b37..b864069dc6456 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -7,7 +7,10 @@
//
//===----------------------------------------------------------------------===//
+#include "clang/CodeGen/CodeGenAction.h"
+#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
+#include "MacroPPCallbacks.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
@@ -16,15 +19,16 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/BackendUtil.h"
-#include "clang/CodeGen/CodeGenAction.h"
#include "clang/CodeGen/ModuleBuilder.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
@@ -35,12 +39,16 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Transforms/IPO/Internalize.h"
+
#include <memory>
using namespace clang;
using namespace llvm;
namespace clang {
class BackendConsumer : public ASTConsumer {
+ using LinkModule = CodeGenAction::LinkModule;
+
virtual void anchor();
DiagnosticsEngine &Diags;
BackendAction Action;
@@ -61,43 +69,39 @@ namespace clang {
std::unique_ptr<CodeGenerator> Gen;
- SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
- LinkModules;
+ SmallVector<LinkModule, 4> LinkModules;
// This is here so that the diagnostic printer knows the module a diagnostic
// refers to.
llvm::Module *CurLinkModule = nullptr;
public:
- BackendConsumer(
- BackendAction Action, DiagnosticsEngine &Diags,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- bool TimePasses, const std::string &InFile,
- const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
- std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr)
+ BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts, bool TimePasses,
+ const std::string &InFile,
+ SmallVector<LinkModule, 4> LinkModules,
+ std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
AsmOutStream(std::move(OS)), Context(nullptr),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
- CodeGenOpts, C, CoverageInfo)) {
+ CodeGenOpts, C, CoverageInfo)),
+ LinkModules(std::move(LinkModules)) {
llvm::TimePassesIsEnabled = TimePasses;
- for (auto &I : LinkModules)
- this->LinkModules.push_back(
- std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
}
llvm::Module *getModule() const { return Gen->GetModule(); }
std::unique_ptr<llvm::Module> takeModule() {
return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
}
- void releaseLinkModules() {
- for (auto &I : LinkModules)
- I.second.release();
- }
+
+ CodeGenerator *getCodeGenerator() { return Gen.get(); }
void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
Gen->HandleCXXStaticMemberVarInstantiation(VD);
@@ -159,6 +163,35 @@ namespace clang {
HandleTopLevelDecl(D);
}
+ // Links each entry in LinkModules into our module. Returns true on error.
+ bool LinkInModules() {
+ for (auto &LM : LinkModules) {
+ if (LM.PropagateAttrs)
+ for (Function &F : *LM.Module)
+ Gen->CGM().AddDefaultFnAttrs(F);
+
+ CurLinkModule = LM.Module.get();
+
+ bool Err;
+ if (LM.Internalize) {
+ Err = Linker::linkModules(
+ *getModule(), std::move(LM.Module), LM.LinkFlags,
+ [](llvm::Module &M, const llvm::StringSet<> &GVS) {
+ internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
+ return !GV.hasName() || (GVS.count(GV.getName()) == 0);
+ });
+ });
+ } else {
+ Err = Linker::linkModules(*getModule(), std::move(LM.Module),
+ LM.LinkFlags);
+ }
+
+ if (Err)
+ return true;
+ }
+ return false; // success
+ }
+
void HandleTranslationUnit(ASTContext &C) override {
{
PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
@@ -216,13 +249,9 @@ namespace clang {
Ctx.setDiagnosticHotnessRequested(true);
}
- // Link LinkModule into this module if present, preserving its validity.
- for (auto &I : LinkModules) {
- unsigned LinkFlags = I.first;
- CurLinkModule = I.second.get();
- if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags))
- return;
- }
+ // Link each LinkModule into our module.
+ if (LinkInModules())
+ return;
EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
@@ -275,7 +304,7 @@ namespace clang {
/// Get the best possible source location to represent a diagnostic that
/// may have associated debug info.
const FullSourceLoc
- getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D,
+ getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D,
bool &BadDebugInfo, StringRef &Filename,
unsigned &Line, unsigned &Column) const;
@@ -298,9 +327,8 @@ namespace clang {
/// them.
void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D,
unsigned DiagID);
- void OptimizationRemarkHandler(const llvm::OptimizationRemark &D);
- void OptimizationRemarkHandler(const llvm::OptimizationRemarkMissed &D);
- void OptimizationRemarkHandler(const llvm::OptimizationRemarkAnalysis &D);
+ void
+ OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D);
void OptimizationRemarkHandler(
const llvm::OptimizationRemarkAnalysisFPCommute &D);
void OptimizationRemarkHandler(
@@ -308,7 +336,7 @@ namespace clang {
void OptimizationFailureHandler(
const llvm::DiagnosticInfoOptimizationFailure &D);
};
-
+
void BackendConsumer::anchor() {}
}
@@ -377,7 +405,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
// code.
if (LocCookie.isValid()) {
Diags.Report(LocCookie, DiagID).AddString(Message);
-
+
if (D.getLoc().isValid()) {
DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here);
// Convert the SMDiagnostic ranges into SourceRange and attach them
@@ -390,7 +418,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
}
return;
}
-
+
// Otherwise, report the backend issue as occurring in the generated .s file.
// If Loc is invalid, we still need to report the issue, it just gets no
// location info.
@@ -477,8 +505,8 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
}
const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
- const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename,
- unsigned &Line, unsigned &Column) const {
+ const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo,
+ StringRef &Filename, unsigned &Line, unsigned &Column) const {
SourceManager &SourceMgr = Context->getSourceManager();
FileManager &FileMgr = SourceMgr.getFileManager();
SourceLocation DILoc;
@@ -568,36 +596,34 @@ void BackendConsumer::EmitOptimizationMessage(
}
void BackendConsumer::OptimizationRemarkHandler(
- const llvm::OptimizationRemark &D) {
- // Optimization remarks are active only if the -Rpass flag has a regular
- // expression that matches the name of the pass name in \p D.
- if (CodeGenOpts.OptimizationRemarkPattern &&
- CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName()))
- EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark);
-}
-
-void BackendConsumer::OptimizationRemarkHandler(
- const llvm::OptimizationRemarkMissed &D) {
- // Missed optimization remarks are active only if the -Rpass-missed
- // flag has a regular expression that matches the name of the pass
- // name in \p D.
- if (CodeGenOpts.OptimizationRemarkMissedPattern &&
- CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName()))
- EmitOptimizationMessage(D,
- diag::remark_fe_backend_optimization_remark_missed);
-}
-
-void BackendConsumer::OptimizationRemarkHandler(
- const llvm::OptimizationRemarkAnalysis &D) {
- // Optimization analysis remarks are active if the pass name is set to
- // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
- // regular expression that matches the name of the pass name in \p D.
-
- if (D.shouldAlwaysPrint() ||
- (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
- CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
- EmitOptimizationMessage(
- D, diag::remark_fe_backend_optimization_remark_analysis);
+ const llvm::DiagnosticInfoOptimizationBase &D) {
+ if (D.isPassed()) {
+ // Optimization remarks are active only if the -Rpass flag has a regular
+ // expression that matches the name of the pass name in \p D.
+ if (CodeGenOpts.OptimizationRemarkPattern &&
+ CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName()))
+ EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark);
+ } else if (D.isMissed()) {
+ // Missed optimization remarks are active only if the -Rpass-missed
+ // flag has a regular expression that matches the name of the pass
+ // name in \p D.
+ if (CodeGenOpts.OptimizationRemarkMissedPattern &&
+ CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName()))
+ EmitOptimizationMessage(
+ D, diag::remark_fe_backend_optimization_remark_missed);
+ } else {
+ assert(D.isAnalysis() && "Unknown remark type");
+
+ bool ShouldAlwaysPrint = false;
+ if (auto *ORA = dyn_cast<llvm::OptimizationRemarkAnalysis>(&D))
+ ShouldAlwaysPrint = ORA->shouldAlwaysPrint();
+
+ if (ShouldAlwaysPrint ||
+ (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
+ CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
+ EmitOptimizationMessage(
+ D, diag::remark_fe_backend_optimization_remark_analysis);
+ }
}
void BackendConsumer::OptimizationRemarkHandler(
@@ -680,6 +706,21 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
// handler. There is no generic way of emitting them.
OptimizationRemarkHandler(cast<OptimizationRemarkAnalysisAliasing>(DI));
return;
+ case llvm::DK_MachineOptimizationRemark:
+ // Optimization remarks are always handled completely by this
+ // handler. There is no generic way of emitting them.
+ OptimizationRemarkHandler(cast<MachineOptimizationRemark>(DI));
+ return;
+ case llvm::DK_MachineOptimizationRemarkMissed:
+ // Optimization remarks are always handled completely by this
+ // handler. There is no generic way of emitting them.
+ OptimizationRemarkHandler(cast<MachineOptimizationRemarkMissed>(DI));
+ return;
+ case llvm::DK_MachineOptimizationRemarkAnalysis:
+ // Optimization remarks are always handled completely by this
+ // handler. There is no generic way of emitting them.
+ OptimizationRemarkHandler(cast<MachineOptimizationRemarkAnalysis>(DI));
+ return;
case llvm::DK_OptimizationFailure:
// Optimization failures are always handled completely by this
// handler.
@@ -729,10 +770,6 @@ void CodeGenAction::EndSourceFileAction() {
if (!getCompilerInstance().hasASTConsumer())
return;
- // Take back ownership of link modules we passed to consumer.
- if (!LinkModules.empty())
- BEConsumer->releaseLinkModules();
-
// Steal the module from the consumer.
TheModule = BEConsumer->takeModule();
}
@@ -775,13 +812,12 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
// Load bitcode modules to link with, if we need to.
if (LinkModules.empty())
- for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) {
- const std::string &LinkBCFile = I.second;
-
- auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile);
+ for (const CodeGenOptions::BitcodeFileToLink &F :
+ CI.getCodeGenOpts().LinkBitcodeFiles) {
+ auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
if (!BCBuf) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << BCBuf.getError().message();
+ << F.Filename << BCBuf.getError().message();
LinkModules.clear();
return nullptr;
}
@@ -791,12 +827,13 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
if (!ModuleOrErr) {
handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << EIB.message();
+ << F.Filename << EIB.message();
});
LinkModules.clear();
return nullptr;
}
- addLinkModule(ModuleOrErr.get().release(), I.first);
+ LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
}
CoverageSourceInfo *CoverageInfo = nullptr;
@@ -810,9 +847,20 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
- CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
- std::move(OS), *VMContext, CoverageInfo));
+ CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
+ std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
BEConsumer = Result.get();
+
+ // Enable generating macro debug info only when debug info is not disabled and
+ // also macro debug info is enabled.
+ if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo &&
+ CI.getCodeGenOpts().MacroDebugInfo) {
+ std::unique_ptr<PPCallbacks> Callbacks =
+ llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(),
+ CI.getPreprocessor());
+ CI.getPreprocessor().addPPCallbacks(std::move(Callbacks));
+ }
+
return std::move(Result);
}
@@ -838,6 +886,62 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM,
Diags->Report(DiagID).AddString("cannot compile inline asm");
}
+std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) {
+ CompilerInstance &CI = getCompilerInstance();
+ SourceManager &SM = CI.getSourceManager();
+
+ // For ThinLTO backend invocations, ensure that the context
+ // merges types based on ODR identifiers. We also need to read
+ // the correct module out of a multi-module bitcode file.
+ if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) {
+ VMContext->enableDebugTypeODRUniquing();
+
+ auto DiagErrors = [&](Error E) -> std::unique_ptr<llvm::Module> {
+ unsigned DiagID =
+ CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+ handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+ CI.getDiagnostics().Report(DiagID) << EIB.message();
+ });
+ return {};
+ };
+
+ Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef);
+ if (!BMOrErr)
+ return DiagErrors(BMOrErr.takeError());
+
+ Expected<std::unique_ptr<llvm::Module>> MOrErr =
+ BMOrErr->parseModule(*VMContext);
+ if (!MOrErr)
+ return DiagErrors(MOrErr.takeError());
+ return std::move(*MOrErr);
+ }
+
+ llvm::SMDiagnostic Err;
+ if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext))
+ return M;
+
+ // Translate from the diagnostic info to the SourceManager location if
+ // available.
+ // TODO: Unify this with ConvertBackendLocation()
+ SourceLocation Loc;
+ if (Err.getLineNo() > 0) {
+ assert(Err.getColumnNo() >= 0);
+ Loc = SM.translateFileLineCol(SM.getFileEntryForID(SM.getMainFileID()),
+ Err.getLineNo(), Err.getColumnNo() + 1);
+ }
+
+ // Strip off a leading diagnostic code if there is one.
+ StringRef Msg = Err.getMessage();
+ if (Msg.startswith("error: "))
+ Msg = Msg.substr(7);
+
+ unsigned DiagID =
+ CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+
+ CI.getDiagnostics().Report(Loc, DiagID) << Msg;
+ return {};
+}
+
void CodeGenAction::ExecuteAction() {
// If this is an IR file, we have to treat it specially.
if (getCurrentFileKind() == IK_LLVM_IR) {
@@ -855,35 +959,10 @@ void CodeGenAction::ExecuteAction() {
if (Invalid)
return;
- // For ThinLTO backend invocations, ensure that the context
- // merges types based on ODR identifiers.
- if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty())
- VMContext->enableDebugTypeODRUniquing();
-
- llvm::SMDiagnostic Err;
- TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext);
- if (!TheModule) {
- // Translate from the diagnostic info to the SourceManager location if
- // available.
- // TODO: Unify this with ConvertBackendLocation()
- SourceLocation Loc;
- if (Err.getLineNo() > 0) {
- assert(Err.getColumnNo() >= 0);
- Loc = SM.translateFileLineCol(SM.getFileEntryForID(FID),
- Err.getLineNo(), Err.getColumnNo() + 1);
- }
-
- // Strip off a leading diagnostic code if there is one.
- StringRef Msg = Err.getMessage();
- if (Msg.startswith("error: "))
- Msg = Msg.substr(7);
-
- unsigned DiagID =
- CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
-
- CI.getDiagnostics().Report(Loc, DiagID) << Msg;
+ TheModule = loadModule(*MainFile);
+ if (!TheModule)
return;
- }
+
const TargetOptions &TargetOpts = CI.getTargetOpts();
if (TheModule->getTargetTriple() != TargetOpts.Triple) {
CI.getDiagnostics().Report(SourceLocation(),
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index e142a21b1e744..6e6eb7d7f13c6 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -45,15 +45,15 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
if (CGOpts.DisableLifetimeMarkers)
return false;
- // Asan uses markers for use-after-scope checks.
- if (CGOpts.SanitizeAddressUseAfterScope)
- return true;
-
// Disable lifetime markers in msan builds.
// FIXME: Remove this when msan works with lifetime markers.
if (LangOpts.Sanitize.has(SanitizerKind::Memory))
return false;
+ // Asan uses markers for use-after-scope checks.
+ if (CGOpts.SanitizeAddressUseAfterScope)
+ return true;
+
// For now, only in optimized builds.
return CGOpts.OptimizationLevel != 0;
}
@@ -149,6 +149,8 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
Alignment = CGM.getClassPointerAlignment(RD);
} else {
Alignment = getContext().getTypeAlignInChars(T);
+ if (T.getQualifiers().hasUnaligned())
+ Alignment = CharUnits::One();
}
// Cap to the global maximum type alignment unless the alignment
@@ -200,7 +202,8 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
llvm_unreachable("non-canonical or dependent type in IR-generation");
case Type::Auto:
- llvm_unreachable("undeduced auto type in IR-generation");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("undeduced type in IR-generation");
// Various scalar types.
case Type::Builtin:
@@ -607,11 +610,6 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName));
- // Get argument type qualifiers:
- if (ty.isConstQualified())
- typeQuals = "const";
- if (ty.isVolatileQualified())
- typeQuals += typeQuals.empty() ? "volatile" : " volatile";
if (isPipe)
typeQuals = "pipe";
}
@@ -707,6 +705,11 @@ static bool endsWithReturn(const Decl* F) {
return false;
}
+static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) {
+ Fn->addFnAttr("sanitize_thread_no_checking_at_run_time");
+ Fn->removeFnAttr(llvm::Attribute::SanitizeThread);
+}
+
void CodeGenFunction::StartFunction(GlobalDecl GD,
QualType RetTy,
llvm::Function *Fn,
@@ -750,16 +753,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr(llvm::Attribute::SafeStack);
// Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize,
- // .cxx_destruct and all of their calees at run time.
+ // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time.
if (SanOpts.has(SanitizerKind::Thread)) {
if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) {
IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0);
if (OMD->getMethodFamily() == OMF_dealloc ||
OMD->getMethodFamily() == OMF_initialize ||
(OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) {
- Fn->addFnAttr("sanitize_thread_no_checking_at_run_time");
- Fn->removeFnAttr(llvm::Attribute::SanitizeThread);
+ markAsIgnoreThreadCheckingAtRuntime(Fn);
}
+ } else if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ IdentifierInfo *II = FD->getIdentifier();
+ if (II && II->isStr("__destroy_helper_block_"))
+ markAsIgnoreThreadCheckingAtRuntime(Fn);
}
}
@@ -770,10 +776,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr("function-instrument", "xray-always");
if (XRayAttr->neverXRayInstrument())
Fn->addFnAttr("function-instrument", "xray-never");
+ if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) {
+ Fn->addFnAttr("xray-log-args",
+ llvm::utostr(LogArgs->getArgumentCount()));
+ }
} else {
- Fn->addFnAttr(
- "xray-instruction-threshold",
- llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+ if (!CGM.imbueXRayAttrs(Fn, Loc))
+ Fn->addFnAttr(
+ "xray-instruction-threshold",
+ llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
}
}
@@ -807,6 +818,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
}
}
+ // If we're checking nullability, we need to know whether we can check the
+ // return value. Initialize the flag to 'true' and refine it in EmitParmDecl.
+ if (SanOpts.has(SanitizerKind::NullabilityReturn)) {
+ auto Nullability = FnRetTy->getNullability(getContext());
+ if (Nullability && *Nullability == NullabilityKind::NonNull) {
+ if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) &&
+ CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()))
+ RetValNullabilityPrecondition =
+ llvm::ConstantInt::getTrue(getLLVMContext());
+ }
+ }
+
// If we're in C++ mode and the function name is "main", it is guaranteed
// to be norecurse by the standard (3.6.1.3 "The function main shall not be
// used within a program").
@@ -851,8 +874,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// inlining, we just add an attribute to insert a mcount call in backend.
// The attribute "counting-function" is set to mcount function name which is
// architecture dependent.
- if (CGM.getCodeGenOpts().InstrumentForProfiling)
- Fn->addFnAttr("counting-function", getTarget().getMCountName());
+ if (CGM.getCodeGenOpts().InstrumentForProfiling) {
+ if (CGM.getCodeGenOpts().CallFEntry)
+ Fn->addFnAttr("fentry-call", "true");
+ else
+ Fn->addFnAttr("counting-function", getTarget().getMCountName());
+ }
if (RetTy->isVoidType()) {
// Void type; nothing to return.
@@ -935,6 +962,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// fast register allocator would be happier...
CXXThisValue = CXXABIThisValue;
}
+
+ // Check the 'this' pointer once per function, if it's available.
+ if (CXXThisValue) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::ObjectSize, true);
+ QualType ThisTy = MD->getThisType(getContext());
+ EmitTypeCheck(TCK_Load, Loc, CXXThisValue, ThisTy,
+ getContext().getTypeAlignInChars(ThisTy->getPointeeType()),
+ SkippedChecks);
+ }
}
// If any of the arguments have a variably modified type, make sure to
@@ -1076,8 +1113,13 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
if (FD->hasAttr<NoDebugAttr>())
DebugInfo = nullptr; // disable debug info indefinitely for this function
+ // The function might not have a body if we're generating thunks for a
+ // function declaration.
SourceRange BodyRange;
- if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
+ if (Stmt *Body = FD->getBody())
+ BodyRange = Body->getSourceRange();
+ else
+ BodyRange = FD->getLocation();
CurEHLocation = BodyRange.getEnd();
// Use the location of the start of the function to determine where
@@ -1891,6 +1933,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
case Type::Typedef:
case Type::Decltype:
case Type::Auto:
+ case Type::DeducedTemplateSpecialization:
// Stop walking: nothing to do.
return;
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 586134023240b..fa72019eb08b2 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -115,6 +115,8 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(MissingReturn, missing_return, 0) \
SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \
SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \
+ SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \
+ SANITIZER_CHECK(NullabilityReturn, nullability_return, 0) \
SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \
SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \
SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \
@@ -212,6 +214,13 @@ public:
/// value. This is invalid iff the function has no return value.
Address ReturnValue;
+ /// Return true if a label was seen in the current scope.
+ bool hasLabelBeenSeenInCurrentScope() const {
+ if (CurLexicalScope)
+ return CurLexicalScope->hasLabels();
+ return !LabelMap.empty();
+ }
+
/// AllocaInsertPoint - This is an instruction in the entry block before which
/// we prefer to insert allocas.
llvm::AssertingVH<llvm::Instruction> AllocaInsertPt;
@@ -298,6 +307,31 @@ public:
~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; }
};
+ /// An abstract representation of regular/ObjC call/message targets.
+ class AbstractCallee {
+ /// The function declaration of the callee.
+ const Decl *CalleeDecl;
+
+ public:
+ AbstractCallee() : CalleeDecl(nullptr) {}
+ AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {}
+ AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {}
+ bool hasFunctionDecl() const {
+ return dyn_cast_or_null<FunctionDecl>(CalleeDecl);
+ }
+ const Decl *getDecl() const { return CalleeDecl; }
+ unsigned getNumParams() const {
+ if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl))
+ return FD->getNumParams();
+ return cast<ObjCMethodDecl>(CalleeDecl)->param_size();
+ }
+ const ParmVarDecl *getParamDecl(unsigned I) const {
+ if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl))
+ return FD->getParamDecl(I);
+ return *(cast<ObjCMethodDecl>(CalleeDecl)->param_begin() + I);
+ }
+ };
+
/// \brief Sanitizers enabled for this function.
SanitizerSet SanOpts;
@@ -548,14 +582,10 @@ public:
CGF.DidCallStackSave = false;
}
- /// \brief Exit this cleanup scope, emitting any accumulated
- /// cleanups.
+ /// \brief Exit this cleanup scope, emitting any accumulated cleanups.
~RunCleanupsScope() {
- if (PerformCleanup) {
- CGF.DidCallStackSave = OldDidCallStackSave;
- CGF.PopCleanupBlocks(CleanupStackDepth,
- LifetimeExtendedCleanupStackSize);
- }
+ if (PerformCleanup)
+ ForceCleanup();
}
/// \brief Determine whether this scope requires any cleanups.
@@ -565,11 +595,15 @@ public:
/// \brief Force the emission of cleanups now, instead of waiting
/// until this object is destroyed.
- void ForceCleanup() {
+ /// \param ValuesToReload - A list of values that need to be available at
+ /// the insertion point after cleanup emission. If cleanup emission created
+ /// a shared cleanup block, these value pointers will be rewritten.
+ /// Otherwise, they not will be modified.
+ void ForceCleanup(std::initializer_list<llvm::Value**> ValuesToReload = {}) {
assert(PerformCleanup && "Already forced cleanup");
CGF.DidCallStackSave = OldDidCallStackSave;
- CGF.PopCleanupBlocks(CleanupStackDepth,
- LifetimeExtendedCleanupStackSize);
+ CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize,
+ ValuesToReload);
PerformCleanup = false;
}
};
@@ -620,6 +654,10 @@ public:
rescopeLabels();
}
+ bool hasLabels() const {
+ return !Labels.empty();
+ }
+
void rescopeLabels();
};
@@ -727,13 +765,17 @@ public:
/// \brief Takes the old cleanup stack size and emits the cleanup blocks
/// that have been added.
- void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize);
+ void
+ PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
+ std::initializer_list<llvm::Value **> ValuesToReload = {});
/// \brief Takes the old cleanup stack size and emits the cleanup blocks
/// that have been added, then adds all lifetime-extended cleanups from
/// the given position to the stack.
- void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
- size_t OldLifetimeExtendedStackSize);
+ void
+ PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
+ size_t OldLifetimeExtendedStackSize,
+ std::initializer_list<llvm::Value **> ValuesToReload = {});
void ResolveBranchFixups(llvm::BasicBlock *Target);
@@ -1116,10 +1158,11 @@ private:
uint64_t LoopCount);
public:
- /// Increment the profiler's counter for the given statement.
- void incrementProfileCounter(const Stmt *S) {
+ /// Increment the profiler's counter for the given statement by \p StepV.
+ /// If \p StepV is null, the default increment is 1.
+ void incrementProfileCounter(const Stmt *S, llvm::Value *StepV = nullptr) {
if (CGM.getCodeGenOpts().hasProfileClangInstr())
- PGO.emitCounterIncrement(Builder, S);
+ PGO.emitCounterIncrement(Builder, S, StepV);
PGO.setCurrentStmt(S);
}
@@ -1334,6 +1377,16 @@ private:
/// information about the layout of the variable.
llvm::DenseMap<const ValueDecl *, BlockByrefInfo> BlockByrefInfos;
+ /// Used by -fsanitize=nullability-return to determine whether the return
+ /// value can be checked.
+ llvm::Value *RetValNullabilityPrecondition = nullptr;
+
+ /// Check if -fsanitize=nullability-return instrumentation is required for
+ /// this function.
+ bool requiresReturnValueNullabilityCheck() const {
+ return RetValNullabilityPrecondition;
+ }
+
llvm::BasicBlock *TerminateLandingPad;
llvm::BasicBlock *TerminateHandler;
llvm::BasicBlock *TrapBB;
@@ -1553,6 +1606,8 @@ public:
SourceLocation Loc = SourceLocation(),
SourceLocation StartLoc = SourceLocation());
+ static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor);
+
void EmitConstructorBody(FunctionArgList &Args);
void EmitDestructorBody(FunctionArgList &Args);
void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
@@ -1710,6 +1765,9 @@ public:
void EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc,
SourceLocation EndLoc);
+ /// Emit a test that checks if the return value \p RV is nonnull.
+ void EmitReturnValueCheck(llvm::Value *RV, SourceLocation EndLoc);
+
/// EmitStartEHSpec - Emit the start of the exception spec.
void EmitStartEHSpec(const Decl *D);
@@ -1928,7 +1986,7 @@ public:
/// pointer to a char.
Address EmitMSVAListRef(const Expr *E);
- /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will
+ /// EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will
/// always be accessible even if no aggregate location is provided.
RValue EmitAnyExprToTemp(const Expr *E);
@@ -2019,6 +2077,9 @@ public:
llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L);
llvm::BasicBlock *GetIndirectGotoBlock();
+ /// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts.
+ static bool IsWrappedCXXThis(const Expr *E);
+
/// EmitNullInitialization - Generate code to set a value of the given type to
/// null, If the type contains data member pointers, they will be initialized
/// to -1 in accordance with the Itanium C++ ABI.
@@ -2230,7 +2291,9 @@ public:
TCK_Upcast,
/// Checking the operand of a cast to a virtual base object. Must be an
/// object within its lifetime.
- TCK_UpcastToVirtualBase
+ TCK_UpcastToVirtualBase,
+ /// Checking the value assigned to a _Nonnull pointer. Must not be null.
+ TCK_NonnullAssign
};
/// \brief Whether any type-checking sanitizers are enabled. If \c false,
@@ -2241,7 +2304,7 @@ public:
/// appropriate size and alignment for an object of type \p Type.
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
QualType Type, CharUnits Alignment = CharUnits::Zero(),
- bool SkipNullCheck = false);
+ SanitizerSet SkippedChecks = SanitizerSet());
/// \brief Emit a check that \p Base points into an array object, which
/// we can access at index \p Index. \p Accessed should be \c false if we
@@ -2401,6 +2464,12 @@ public:
PeepholeProtection protectFromPeepholes(RValue rvalue);
void unprotectFromPeepholes(PeepholeProtection protection);
+ void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment,
+ llvm::Value *OffsetValue = nullptr) {
+ Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment,
+ OffsetValue);
+ }
+
//===--------------------------------------------------------------------===//
// Statement Emission
//===--------------------------------------------------------------------===//
@@ -2463,6 +2532,13 @@ public:
void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S);
void EmitCoroutineBody(const CoroutineBodyStmt &S);
+ void EmitCoreturnStmt(const CoreturnStmt &S);
+ RValue EmitCoawaitExpr(const CoawaitExpr &E,
+ AggValueSlot aggSlot = AggValueSlot::ignored(),
+ bool ignoreResult = false);
+ RValue EmitCoyieldExpr(const CoyieldExpr &E,
+ AggValueSlot aggSlot = AggValueSlot::ignored(),
+ bool ignoreResult = false);
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID);
void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
@@ -2627,7 +2703,9 @@ public:
/// the end of the directive.
///
/// \param D Directive that has at least one 'reduction' directives.
- void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D);
+ /// \param ReductionKind The kind of reduction to perform.
+ void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D,
+ const OpenMPDirectiveKind ReductionKind);
/// \brief Emit initial code for linear variables. Creates private copies
/// and initializes them with the values according to OpenMP standard.
///
@@ -2704,13 +2782,16 @@ public:
void EmitOMPTargetTeamsDistributeSimdDirective(
const OMPTargetTeamsDistributeSimdDirective &S);
- /// Emit outlined function for the target directive.
- static std::pair<llvm::Function * /*OutlinedFn*/,
- llvm::Constant * /*OutlinedFnID*/>
- EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM,
- const OMPTargetDirective &S,
- StringRef ParentName,
- bool IsOffloadEntry);
+ /// Emit device code for the target directive.
+ static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
+ StringRef ParentName,
+ const OMPTargetDirective &S);
+ static void
+ EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetParallelDirective &S);
+ static void
+ EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S);
/// \brief Emit inner loop of the worksharing/simd construct.
///
/// \param S Directive, for which the inner loop must be emitted.
@@ -2843,6 +2924,13 @@ public:
/// representation to its value representation.
llvm::Value *EmitFromMemory(llvm::Value *Value, QualType Ty);
+ /// Check if the scalar \p Value is within the valid range for the given
+ /// type \p Ty.
+ ///
+ /// Returns true if a check is needed (even if the range is unknown).
+ bool EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
+ SourceLocation Loc);
+
/// EmitLoadOfScalar - Load a scalar value from an address, taking
/// care to appropriately convert from the memory representation to
/// the LLVM value representation.
@@ -2883,7 +2971,7 @@ public:
/// rvalue, returning the rvalue.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc);
RValue EmitLoadOfExtVectorElementLValue(LValue V);
- RValue EmitLoadOfBitfieldLValue(LValue LV);
+ RValue EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc);
RValue EmitLoadOfGlobalRegLValue(LValue LV);
/// EmitStoreThroughLValue - Store the specified rvalue into the specified
@@ -3092,8 +3180,8 @@ public:
RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue);
- RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue);
+ RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue);
RValue EmitBuiltinExpr(const FunctionDecl *FD,
unsigned BuiltinID, const CallExpr *E,
@@ -3149,6 +3237,8 @@ private:
public:
llvm::Value *EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E);
+ llvm::Value *EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args);
+
llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
llvm::Value *EmitObjCBoxedExpr(const ObjCBoxedExpr *E);
@@ -3396,6 +3486,10 @@ public:
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
+ /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is
+ /// nonnull, if \p LHS is marked _Nonnull.
+ void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc);
+
/// \brief Emit a description of a type in a format suitable for passing to
/// a runtime sanitizer handler.
llvm::Constant *EmitCheckTypeDescriptor(QualType T);
@@ -3429,13 +3523,16 @@ public:
/// "trap-func-name" if specified.
llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);
+ /// \brief Emit a stub for the cross-DSO CFI check function.
+ void EmitCfiCheckStub();
+
/// \brief Emit a cross-DSO CFI failure handling function.
void EmitCfiCheckFail();
/// \brief Create a check for a function parameter that may potentially be
/// declared as non-null.
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
- const FunctionDecl *FD, unsigned ParmNum);
+ AbstractCallee AC, unsigned ParmNum);
/// EmitCallArg - Emit a single call argument.
void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);
@@ -3490,14 +3587,18 @@ private:
/// \brief Attempts to statically evaluate the object size of E. If that
/// fails, emits code to figure the size of E out for us. This is
/// pass_object_size aware.
+ ///
+ /// If EmittedExpr is non-null, this will use that instead of re-emitting E.
llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType);
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE);
/// \brief Emits the size of E, as required by __builtin_object_size. This
/// function is aware of pass_object_size parameters, and will act accordingly
/// if E is a parameter with the pass_object_size attribute.
llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType);
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE);
public:
#ifndef NDEBUG
@@ -3533,7 +3634,7 @@ public:
template <typename T>
void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo,
llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
- const FunctionDecl *CalleeDecl = nullptr,
+ AbstractCallee AC = AbstractCallee(),
unsigned ParamsToSkip = 0,
EvaluationOrder Order = EvaluationOrder::Default) {
SmallVector<QualType, 16> ArgTypes;
@@ -3575,12 +3676,12 @@ public:
for (auto *A : llvm::make_range(Arg, ArgRange.end()))
ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());
- EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
+ EmitCallArgs(Args, ArgTypes, ArgRange, AC, ParamsToSkip, Order);
}
void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes,
llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
- const FunctionDecl *CalleeDecl = nullptr,
+ AbstractCallee AC = AbstractCallee(),
unsigned ParamsToSkip = 0,
EvaluationOrder Order = EvaluationOrder::Default);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 36005430ae4c3..d48bff9c30a3d 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -24,7 +24,6 @@
#include "CodeGenFunction.h"
#include "CodeGenPGO.h"
#include "CodeGenTBAA.h"
-#include "ConstantBuilder.h"
#include "CoverageMappingGen.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
@@ -42,6 +41,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/Triple.h"
@@ -406,8 +406,11 @@ void CodeGenModule::Release() {
EmitDeferredUnusedCoverageMappings();
if (CoverageMapping)
CoverageMapping->emit();
- if (CodeGenOpts.SanitizeCfiCrossDso)
+ if (CodeGenOpts.SanitizeCfiCrossDso) {
CodeGenFunction(*this).EmitCfiCheckFail();
+ CodeGenFunction(*this).EmitCfiCheckStub();
+ }
+ emitAtAvailableLinkGuard();
emitLLVMUsed();
if (SanStats)
SanStats->finish();
@@ -416,6 +419,12 @@ void CodeGenModule::Release() {
(Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
EmitModuleLinkOptions();
}
+
+ // Record mregparm value now so it is visible through rest of codegen.
+ if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
+ getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
+ CodeGenOpts.NumRegisterParameters);
+
if (CodeGenOpts.DwarfVersion) {
// We actually want the latest version when there are conflicts.
// We can change from Warning to Latest if such mode is supported.
@@ -833,7 +842,7 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
AttributeListType AttributeList;
ConstructAttributeList(F->getName(), Info, D, AttributeList, CallingConv,
false);
- F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList));
+ F->setAttributes(llvm::AttributeList::get(getLLVMContext(), AttributeList));
F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
}
@@ -882,10 +891,10 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);
- F->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(
- F->getContext(),
- llvm::AttributeSet::FunctionIndex, B));
+ F->addAttributes(
+ llvm::AttributeList::FunctionIndex,
+ llvm::AttributeList::get(F->getContext(),
+ llvm::AttributeList::FunctionIndex, B));
return;
}
@@ -951,9 +960,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::MinSize);
}
- F->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(
- F->getContext(), llvm::AttributeSet::FunctionIndex, B));
+ F->addAttributes(llvm::AttributeList::FunctionIndex,
+ llvm::AttributeList::get(
+ F->getContext(), llvm::AttributeList::FunctionIndex, B));
unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
if (alignment)
@@ -1029,7 +1038,6 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
} else if (ND->hasAttr<DLLExportAttr>()) {
GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
} else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) {
// "extern_weak" is overloaded in LLVM; we probably should have
// separate linkage types for this.
@@ -1107,7 +1115,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
if (FD->isReplaceableGlobalAllocationFunction()) {
// A replaceable global allocation function does not act like a builtin by
// default, only if it is invoked by a new-expression or delete-expression.
- F->addAttribute(llvm::AttributeSet::FunctionIndex,
+ F->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoBuiltin);
// A sane operator new returns a non-aliasing pointer.
@@ -1116,7 +1124,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
auto Kind = FD->getDeclName().getCXXOverloadedOperator();
if (getCodeGenOpts().AssumeSaneOperatorNew &&
(Kind == OO_New || Kind == OO_Array_New))
- F->addAttribute(llvm::AttributeSet::ReturnIndex,
+ F->addAttribute(llvm::AttributeList::ReturnIndex,
llvm::Attribute::NoAlias);
}
@@ -1482,6 +1490,30 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV,
return false;
}
+bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
+ StringRef Category) const {
+ if (!LangOpts.XRayInstrument)
+ return false;
+ const auto &XRayFilter = getContext().getXRayFilter();
+ using ImbueAttr = XRayFunctionFilter::ImbueAttribute;
+ auto Attr = XRayFunctionFilter::ImbueAttribute::NONE;
+ if (Loc.isValid())
+ Attr = XRayFilter.shouldImbueLocation(Loc, Category);
+ if (Attr == ImbueAttr::NONE)
+ Attr = XRayFilter.shouldImbueFunction(Fn->getName());
+ switch (Attr) {
+ case ImbueAttr::NONE:
+ return false;
+ case ImbueAttr::ALWAYS:
+ Fn->addFnAttr("function-instrument", "xray-always");
+ break;
+ case ImbueAttr::NEVER:
+ Fn->addFnAttr("function-instrument", "xray-never");
+ break;
+ }
+ return true;
+}
+
bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
// Never defer when EmitAllDecls is specified.
if (LangOpts.EmitAllDecls)
@@ -1693,6 +1725,16 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
}
}
+// Check if T is a class type with a destructor that's not dllimport.
+static bool HasNonDllImportDtor(QualType T) {
+ if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs<RecordType>())
+ if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+ if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>())
+ return true;
+
+ return false;
+}
+
namespace {
struct FunctionIsDirectlyRecursive :
public RecursiveASTVisitor<FunctionIsDirectlyRecursive> {
@@ -1726,6 +1768,7 @@ namespace {
}
};
+ // Make sure we're not referencing non-imported vars or functions.
struct DLLImportFunctionVisitor
: public RecursiveASTVisitor<DLLImportFunctionVisitor> {
bool SafeToInline = true;
@@ -1733,12 +1776,25 @@ namespace {
bool shouldVisitImplicitCode() const { return true; }
bool VisitVarDecl(VarDecl *VD) {
- // A thread-local variable cannot be imported.
- SafeToInline = !VD->getTLSKind();
+ if (VD->getTLSKind()) {
+ // A thread-local variable cannot be imported.
+ SafeToInline = false;
+ return SafeToInline;
+ }
+
+ // A variable definition might imply a destructor call.
+ if (VD->isThisDeclarationADefinition())
+ SafeToInline = !HasNonDllImportDtor(VD->getType());
+
+ return SafeToInline;
+ }
+
+ bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
+ if (const auto *D = E->getTemporary()->getDestructor())
+ SafeToInline = D->hasAttr<DLLImportAttr>();
return SafeToInline;
}
- // Make sure we're not referencing non-imported vars or functions.
bool VisitDeclRefExpr(DeclRefExpr *E) {
ValueDecl *VD = E->getDecl();
if (isa<FunctionDecl>(VD))
@@ -1747,14 +1803,28 @@ namespace {
SafeToInline = !V->hasGlobalStorage() || V->hasAttr<DLLImportAttr>();
return SafeToInline;
}
+
bool VisitCXXConstructExpr(CXXConstructExpr *E) {
SafeToInline = E->getConstructor()->hasAttr<DLLImportAttr>();
return SafeToInline;
}
+
+ bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
+ CXXMethodDecl *M = E->getMethodDecl();
+ if (!M) {
+ // Call through a pointer to member function. This is safe to inline.
+ SafeToInline = true;
+ } else {
+ SafeToInline = M->hasAttr<DLLImportAttr>();
+ }
+ return SafeToInline;
+ }
+
bool VisitCXXDeleteExpr(CXXDeleteExpr *E) {
SafeToInline = E->getOperatorDelete()->hasAttr<DLLImportAttr>();
return SafeToInline;
}
+
bool VisitCXXNewExpr(CXXNewExpr *E) {
SafeToInline = E->getOperatorNew()->hasAttr<DLLImportAttr>();
return SafeToInline;
@@ -1783,16 +1853,6 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) {
return Walker.Result;
}
-// Check if T is a class type with a destructor that's not dllimport.
-static bool HasNonDllImportDtor(QualType T) {
- if (const RecordType *RT = dyn_cast<RecordType>(T))
- if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
- if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>())
- return true;
-
- return false;
-}
-
bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage)
return true;
@@ -1828,22 +1888,6 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
return !isTriviallyRecursive(F);
}
-/// If the type for the method's class was generated by
-/// CGDebugInfo::createContextChain(), the cache contains only a
-/// limited DIType without any declarations. Since EmitFunctionStart()
-/// needs to find the canonical declaration for each method, we need
-/// to construct the complete type prior to emitting the method.
-void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) {
- if (!D->isInstance())
- return;
-
- if (CGDebugInfo *DI = getModuleDebugInfo())
- if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) {
- const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext()));
- DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation());
- }
-}
-
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const auto *D = cast<ValueDecl>(GD.getDecl());
@@ -1858,7 +1902,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
return;
if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
- CompleteDIClassType(Method);
// Make sure to emit the definition(s) before we emit the thunks.
// This is necessary for the generation of certain thunks.
if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method))
@@ -1893,13 +1936,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
///
/// If D is non-null, it specifies a decl that correspond to this. This is used
/// to set the attributes on the function when it is first created.
-llvm::Constant *
-CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
- llvm::Type *Ty,
- GlobalDecl GD, bool ForVTable,
- bool DontDefer, bool IsThunk,
- llvm::AttributeSet ExtraAttrs,
- ForDefinition_t IsForDefinition) {
+llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
+ StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable,
+ bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs,
+ ForDefinition_t IsForDefinition) {
const Decl *D = GD.getDecl();
// Lookup the entry, lazily creating it if necessary.
@@ -1989,12 +2029,11 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
assert(F->getName() == MangledName && "name was uniqued!");
if (D)
SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
- if (ExtraAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) {
- llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeSet::FunctionIndex);
- F->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(VMContext,
- llvm::AttributeSet::FunctionIndex,
- B));
+ if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) {
+ llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex);
+ F->addAttributes(llvm::AttributeList::FunctionIndex,
+ llvm::AttributeList::get(
+ VMContext, llvm::AttributeList::FunctionIndex, B));
}
if (!DontDefer) {
@@ -2069,7 +2108,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
StringRef MangledName = getMangledName(GD);
return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
- /*IsThunk=*/false, llvm::AttributeSet(),
+ /*IsThunk=*/false, llvm::AttributeList(),
IsForDefinition);
}
@@ -2115,7 +2154,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) {
/// type and name.
llvm::Constant *
CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
- llvm::AttributeSet ExtraAttrs,
+ llvm::AttributeList ExtraAttrs,
bool Local) {
llvm::Constant *C =
GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
@@ -2143,9 +2182,8 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
/// CreateBuiltinFunction - Create a new builtin function with the specified
/// type and name.
llvm::Constant *
-CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy,
- StringRef Name,
- llvm::AttributeSet ExtraAttrs) {
+CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name,
+ llvm::AttributeList ExtraAttrs) {
llvm::Constant *C =
GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
/*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs);
@@ -2803,7 +2841,7 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
// We are guaranteed to have a strong definition somewhere else,
// so we can use available_externally linkage.
if (Linkage == GVA_AvailableExternally)
- return llvm::Function::AvailableExternallyLinkage;
+ return llvm::GlobalValue::AvailableExternallyLinkage;
// Note that Apple's kernel linker doesn't support symbol
// coalescing, so we need to avoid linkonce and weak linkages there.
@@ -2897,14 +2935,8 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
continue;
// Get the call site's attribute list.
- SmallVector<llvm::AttributeSet, 8> newAttrs;
- llvm::AttributeSet oldAttrs = callSite.getAttributes();
-
- // Collect any return attributes from the call.
- if (oldAttrs.hasAttributes(llvm::AttributeSet::ReturnIndex))
- newAttrs.push_back(
- llvm::AttributeSet::get(newFn->getContext(),
- oldAttrs.getRetAttributes()));
+ SmallVector<llvm::AttributeSet, 8> newArgAttrs;
+ llvm::AttributeList oldAttrs = callSite.getAttributes();
// If the function was passed too few arguments, don't transform.
unsigned newNumArgs = newFn->arg_size();
@@ -2914,27 +2946,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
// If any of the types mismatch, we don't transform.
unsigned argNo = 0;
bool dontTransform = false;
- for (llvm::Function::arg_iterator ai = newFn->arg_begin(),
- ae = newFn->arg_end(); ai != ae; ++ai, ++argNo) {
- if (callSite.getArgument(argNo)->getType() != ai->getType()) {
+ for (llvm::Argument &A : newFn->args()) {
+ if (callSite.getArgument(argNo)->getType() != A.getType()) {
dontTransform = true;
break;
}
// Add any parameter attributes.
- if (oldAttrs.hasAttributes(argNo + 1))
- newAttrs.
- push_back(llvm::
- AttributeSet::get(newFn->getContext(),
- oldAttrs.getParamAttributes(argNo + 1)));
+ newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo));
+ argNo++;
}
if (dontTransform)
continue;
- if (oldAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex))
- newAttrs.push_back(llvm::AttributeSet::get(newFn->getContext(),
- oldAttrs.getFnAttributes()));
-
// Okay, we can transform this. Create the new call instruction and copy
// over the required information.
newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo);
@@ -2958,8 +2982,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
if (!newCall->getType()->isVoidTy())
newCall->takeName(callSite.getInstruction());
- newCall.setAttributes(
- llvm::AttributeSet::get(newFn->getContext(), newAttrs));
+ newCall.setAttributes(llvm::AttributeList::get(
+ newFn->getContext(), oldAttrs.getFnAttributes(),
+ oldAttrs.getRetAttributes(), newArgAttrs));
newCall.setCallingConv(callSite.getCallingConv());
// Finally, remove the old call, replacing any uses with the new one.
@@ -3341,6 +3366,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
llvm_unreachable("unknown file format");
case llvm::Triple::COFF:
case llvm::Triple::ELF:
+ case llvm::Triple::Wasm:
GV->setSection("cfstring");
break;
case llvm::Triple::MachO:
@@ -3790,6 +3816,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
EmitDeclContext(cast<NamespaceDecl>(D));
break;
case Decl::CXXRecord:
+ if (DebugInfo) {
+ if (auto *ES = D->getASTContext().getExternalSource())
+ if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
+ DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D));
+ }
// Emit any static data members, they may be definitions.
for (auto *I : cast<CXXRecordDecl>(D)->decls())
if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I))
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 36f6785fd1b92..d0b2dd717c8c7 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -28,6 +28,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SanitizerBlacklist.h"
+#include "clang/Basic/XRayLists.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -546,6 +547,10 @@ public:
return *ObjCData;
}
+ // Version checking function, used to implement ObjC's @available:
+ // i32 @__isOSVersionAtLeast(i32, i32, i32)
+ llvm::Constant *IsOSVersionAtLeastFn = nullptr;
+
InstrProfStats &getPGOStats() { return PGOStats; }
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
@@ -906,14 +911,13 @@ public:
/// Create a new runtime function with the specified type and name.
llvm::Constant *
CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name,
- llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(),
+ llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
bool Local = false);
/// Create a new compiler builtin function with the specified type and name.
- llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty,
- StringRef Name,
- llvm::AttributeSet ExtraAttrs =
- llvm::AttributeSet());
+ llvm::Constant *
+ CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name,
+ llvm::AttributeList ExtraAttrs = llvm::AttributeList());
/// Create a new runtime global variable with the specified type and name.
llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
StringRef Name);
@@ -1022,6 +1026,25 @@ public:
CGCalleeInfo CalleeInfo, AttributeListType &PAL,
unsigned &CallingConv, bool AttrOnCallSite);
+ /// Adds attributes to F according to our CodeGenOptions and LangOptions, as
+ /// though we had emitted it ourselves. We remove any attributes on F that
+ /// conflict with the attributes we add here.
+ ///
+ /// This is useful for adding attrs to bitcode modules that you want to link
+ /// with but don't control, such as CUDA's libdevice. When linking with such
+ /// a bitcode library, you might want to set e.g. its functions'
+ /// "unsafe-fp-math" attribute to match the attr of the functions you're
+ /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
+ /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
+ /// will propagate unsafe-fp-math=false up to every transitive caller of a
+ /// function in the bitcode library!
+ ///
+ /// With the exception of fast-math attrs, this will only make the attributes
+ /// on the function more conservative. But it's unsafe to call this on a
+ /// function which relies on particular fast-math attributes for correctness.
+ /// It's up to you to ensure that this is safe.
+ void AddDefaultFnAttrs(llvm::Function &F);
+
// Fills in the supplied string map with the set of target features for the
// passed in function.
void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
@@ -1103,6 +1126,12 @@ public:
QualType Ty,
StringRef Category = StringRef()) const;
+ /// Imbue XRay attributes to a function, applying the always/never attribute
+ /// lists in the process. Returns true if we did imbue attributes this way,
+ /// false otherwise.
+ bool imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
+ StringRef Category = StringRef()) const;
+
SanitizerMetadata *getSanitizerMetadata() {
return SanitizerMD.get();
}
@@ -1176,7 +1205,7 @@ public:
void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset,
const CXXRecordDecl *RD);
- /// \breif Get the declaration of std::terminate for the platform.
+ /// \brief Get the declaration of std::terminate for the platform.
llvm::Constant *getTerminateFn();
llvm::SanitizerStatReport &getSanStats();
@@ -1190,12 +1219,11 @@ public:
llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT);
private:
- llvm::Constant *
- GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
- bool ForVTable, bool DontDefer = false,
- bool IsThunk = false,
- llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(),
- ForDefinition_t IsForDefinition = NotForDefinition);
+ llvm::Constant *GetOrCreateLLVMFunction(
+ StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
+ bool DontDefer = false, bool IsThunk = false,
+ llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
+ ForDefinition_t IsForDefinition = NotForDefinition);
llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
llvm::PointerType *PTy,
@@ -1222,7 +1250,6 @@ private:
void EmitDeclContext(const DeclContext *DC);
void EmitLinkageSpec(const LinkageSpecDecl *D);
- void CompleteDIClassType(const CXXMethodDecl* D);
/// \brief Emit the function that initializes C++ thread_local variables.
void EmitCXXThreadLocalInitFunc();
@@ -1266,6 +1293,10 @@ private:
/// Emit any vtables which we deferred and still have a use for.
void EmitDeferredVTables();
+ /// Emit a dummy function that reference a CoreFoundation symbol when
+ /// @available is used on Darwin.
+ void emitAtAvailableLinkGuard();
+
/// Emit the llvm.used and llvm.compiler.used metadata.
void emitLLVMUsed();
@@ -1304,6 +1335,12 @@ private:
/// Check whether we can use a "simpler", more core exceptions personality
/// function.
void SimplifyPersonality();
+
+ /// Helper function for ConstructAttributeList and AddDefaultFnAttrs.
+ /// Constructs an AttrList for a function with the given properties.
+ void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs);
};
} // end namespace CodeGen
} // end namespace clang
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index c6c3fa41e6281..6acedc033a6ea 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -612,7 +612,7 @@ uint64_t PGOHash::finalize() {
llvm::MD5::MD5Result Result;
MD5.final(Result);
using namespace llvm::support;
- return endian::read<uint64_t, little, unaligned>(Result);
+ return Result.low();
}
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
@@ -626,12 +626,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
// Constructors and destructors may be represented by several functions in IR.
// If so, instrument only base variant, others are implemented by delegation
// to the base one, it would be counted twice otherwise.
- if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
- ((isa<CXXConstructorDecl>(GD.getDecl()) &&
- GD.getCtorType() != Ctor_Base) ||
- (isa<CXXDestructorDecl>(GD.getDecl()) &&
- GD.getDtorType() != Dtor_Base))) {
+ if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+ if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base)
return;
+
+ if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
+ if (GD.getCtorType() != Ctor_Base &&
+ CodeGenFunction::IsConstructorDelegationValid(CCD))
+ return;
}
CGM.ClearUnusedCoverageMapping(D);
setFuncName(Fn);
@@ -737,7 +739,8 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
Fn->setEntryCount(FunctionCount);
}
-void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
+ llvm::Value *StepV) {
if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
return;
if (!Builder.GetInsertBlock())
@@ -745,11 +748,18 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
unsigned Counter = (*RegionCounterMap)[S];
auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
- Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
- {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FunctionHash),
- Builder.getInt32(NumRegionCounters),
- Builder.getInt32(Counter)});
+
+ llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+ Builder.getInt64(FunctionHash),
+ Builder.getInt32(NumRegionCounters),
+ Builder.getInt32(Counter), StepV};
+ if (!StepV)
+ Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
+ makeArrayRef(Args, 4));
+ else
+ Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
+ makeArrayRef(Args));
}
// This method either inserts a call to the profile run-time during
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index 4f229cde63b03..0026df570bce0 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -105,7 +105,8 @@ private:
void emitCounterRegionMapping(const Decl *D);
public:
- void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S);
+ void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
+ llvm::Value *StepV);
/// Return the region count for the counter at the given index.
uint64_t getRegionCount(const Stmt *S) {
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index adb40c8c0d478..dc24b2040f047 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -472,7 +472,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
- case BuiltinType::OCLNDRange:
case BuiltinType::OCLReserveID:
ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
break;
@@ -487,7 +486,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
break;
}
case Type::Auto:
- llvm_unreachable("Unexpected undeduced auto type!");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("Unexpected undeduced type!");
case Type::Complex: {
llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType());
ResultType = llvm::StructType::get(EltTy, EltTy, nullptr);
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 2ce6591e4eb7d..f0b97ebde1c21 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -303,11 +303,14 @@ public:
const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args,
const CXXConstructorDecl *D,
CXXCtorType CtorKind,
- unsigned ExtraArgs);
+ unsigned ExtraPrefixArgs,
+ unsigned ExtraSuffixArgs,
+ bool PassProtoArgs = true);
const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args,
const FunctionProtoType *type,
- RequiredArgs required);
+ RequiredArgs required,
+ unsigned numPrefixArgs);
const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
CXXCtorType CT);
diff --git a/lib/CodeGen/ConstantBuilder.h b/lib/CodeGen/ConstantBuilder.h
deleted file mode 100644
index 40b34a9d61c8f..0000000000000
--- a/lib/CodeGen/ConstantBuilder.h
+++ /dev/null
@@ -1,444 +0,0 @@
-//===----- ConstantBuilder.h - Builder for LLVM IR constants ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class provides a convenient interface for building complex
-// global initializers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H
-#define LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-
-#include "CodeGenModule.h"
-
-#include <vector>
-
-namespace clang {
-namespace CodeGen {
-
-class ConstantStructBuilder;
-class ConstantArrayBuilder;
-
-/// A convenience builder class for complex constant initializers,
-/// especially for anonymous global structures used by various language
-/// runtimes.
-///
-/// The basic usage pattern is expected to be something like:
-/// ConstantInitBuilder builder(CGM);
-/// auto toplevel = builder.beginStruct();
-/// toplevel.addInt(CGM.SizeTy, widgets.size());
-/// auto widgetArray = builder.beginArray();
-/// for (auto &widget : widgets) {
-/// auto widgetDesc = widgetArray.beginStruct();
-/// widgetDesc.addInt(CGM.SizeTy, widget.getPower());
-/// widgetDesc.add(CGM.GetAddrOfConstantString(widget.getName()));
-/// widgetDesc.add(CGM.GetAddrOfGlobal(widget.getInitializerDecl()));
-/// widgetArray.add(widgetDesc.finish());
-/// }
-/// toplevel.add(widgetArray.finish());
-/// auto global = toplevel.finishAndCreateGlobal("WIDGET_LIST", Align,
-/// /*constant*/ true);
-class ConstantInitBuilder {
- struct SelfReference {
- llvm::GlobalVariable *Dummy;
- llvm::SmallVector<llvm::Constant*, 4> Indices;
-
- SelfReference(llvm::GlobalVariable *dummy) : Dummy(dummy) {}
- };
- CodeGenModule &CGM;
- llvm::SmallVector<llvm::Constant*, 16> Buffer;
- std::vector<SelfReference> SelfReferences;
- bool Frozen = false;
-
-public:
- explicit ConstantInitBuilder(CodeGenModule &CGM) : CGM(CGM) {}
-
- ~ConstantInitBuilder() {
- assert(Buffer.empty() && "didn't claim all values out of buffer");
- }
-
- class AggregateBuilderBase {
- protected:
- ConstantInitBuilder &Builder;
- AggregateBuilderBase *Parent;
- size_t Begin;
- bool Finished = false;
- bool Frozen = false;
-
- llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() {
- return Builder.Buffer;
- }
-
- const llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() const {
- return Builder.Buffer;
- }
-
- AggregateBuilderBase(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent)
- : Builder(builder), Parent(parent), Begin(builder.Buffer.size()) {
- if (parent) {
- assert(!parent->Frozen && "parent already has child builder active");
- parent->Frozen = true;
- } else {
- assert(!builder.Frozen && "builder already has child builder active");
- builder.Frozen = true;
- }
- }
-
- ~AggregateBuilderBase() {
- assert(Finished && "didn't finish aggregate builder");
- }
-
- void markFinished() {
- assert(!Frozen && "child builder still active");
- assert(!Finished && "builder already finished");
- Finished = true;
- if (Parent) {
- assert(Parent->Frozen &&
- "parent not frozen while child builder active");
- Parent->Frozen = false;
- } else {
- assert(Builder.Frozen &&
- "builder not frozen while child builder active");
- Builder.Frozen = false;
- }
- }
-
- public:
- // Not copyable.
- AggregateBuilderBase(const AggregateBuilderBase &) = delete;
- AggregateBuilderBase &operator=(const AggregateBuilderBase &) = delete;
-
- // Movable, mostly to allow returning. But we have to write this out
- // properly to satisfy the assert in the destructor.
- AggregateBuilderBase(AggregateBuilderBase &&other)
- : Builder(other.Builder), Parent(other.Parent), Begin(other.Begin),
- Finished(other.Finished), Frozen(other.Frozen) {
- other.Finished = false;
- }
- AggregateBuilderBase &operator=(AggregateBuilderBase &&other) = delete;
-
- /// Abandon this builder completely.
- void abandon() {
- markFinished();
- auto &buffer = Builder.Buffer;
- buffer.erase(buffer.begin() + Begin, buffer.end());
- }
-
- /// Add a new value to this initializer.
- void add(llvm::Constant *value) {
- assert(value && "adding null value to constant initializer");
- assert(!Finished && "cannot add more values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- Builder.Buffer.push_back(value);
- }
-
- /// Add an integer value of type size_t.
- void addSize(CharUnits size) {
- add(Builder.CGM.getSize(size));
- }
-
- /// Add an integer value of a specific type.
- void addInt(llvm::IntegerType *intTy, uint64_t value,
- bool isSigned = false) {
- add(llvm::ConstantInt::get(intTy, value, isSigned));
- }
-
- /// Add a null pointer of a specific type.
- void addNullPointer(llvm::PointerType *ptrTy) {
- add(llvm::ConstantPointerNull::get(ptrTy));
- }
-
- /// Add a bitcast of a value to a specific type.
- void addBitCast(llvm::Constant *value, llvm::Type *type) {
- add(llvm::ConstantExpr::getBitCast(value, type));
- }
-
- /// Add a bunch of new values to this initializer.
- void addAll(ArrayRef<llvm::Constant *> values) {
- assert(!Finished && "cannot add more values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- Builder.Buffer.append(values.begin(), values.end());
- }
-
- /// An opaque class to hold the abstract position of a placeholder.
- class PlaceholderPosition {
- size_t Index;
- friend class AggregateBuilderBase;
- PlaceholderPosition(size_t index) : Index(index) {}
- };
-
- /// Add a placeholder value to the structure. The returned position
- /// can be used to set the value later; it will not be invalidated by
- /// any intermediate operations except (1) filling the same position or
- /// (2) finishing the entire builder.
- ///
- /// This is useful for emitting certain kinds of structure which
- /// contain some sort of summary field, generaly a count, before any
- /// of the data. By emitting a placeholder first, the structure can
- /// be emitted eagerly.
- PlaceholderPosition addPlaceholder() {
- assert(!Finished && "cannot add more values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- Builder.Buffer.push_back(nullptr);
- return Builder.Buffer.size() - 1;
- }
-
- /// Fill a previously-added placeholder.
- void fillPlaceholderWithInt(PlaceholderPosition position,
- llvm::IntegerType *type, uint64_t value,
- bool isSigned = false) {
- fillPlaceholder(position, llvm::ConstantInt::get(type, value, isSigned));
- }
-
- /// Fill a previously-added placeholder.
- void fillPlaceholder(PlaceholderPosition position, llvm::Constant *value) {
- assert(!Finished && "cannot change values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- llvm::Constant *&slot = Builder.Buffer[position.Index];
- assert(slot == nullptr && "placeholder already filled");
- slot = value;
- }
-
- /// Produce an address which will eventually point to the the next
- /// position to be filled. This is computed with an indexed
- /// getelementptr rather than by computing offsets.
- ///
- /// The returned pointer will have type T*, where T is the given
- /// position.
- llvm::Constant *getAddrOfCurrentPosition(llvm::Type *type) {
- // Make a global variable. We will replace this with a GEP to this
- // position after installing the initializer.
- auto dummy =
- new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
- llvm::GlobalVariable::PrivateLinkage,
- nullptr, "");
- Builder.SelfReferences.emplace_back(dummy);
- auto &entry = Builder.SelfReferences.back();
- (void) getGEPIndicesToCurrentPosition(entry.Indices);
- return dummy;
- }
-
- ArrayRef<llvm::Constant*> getGEPIndicesToCurrentPosition(
- llvm::SmallVectorImpl<llvm::Constant*> &indices) {
- getGEPIndicesTo(indices, Builder.Buffer.size());
- return indices;
- }
-
- ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr);
- ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr);
-
- private:
- void getGEPIndicesTo(llvm::SmallVectorImpl<llvm::Constant*> &indices,
- size_t position) const {
- // Recurse on the parent builder if present.
- if (Parent) {
- Parent->getGEPIndicesTo(indices, Begin);
-
- // Otherwise, add an index to drill into the first level of pointer.
- } else {
- assert(indices.empty());
- indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0));
- }
-
- assert(position >= Begin);
- // We have to use i32 here because struct GEPs demand i32 indices.
- // It's rather unlikely to matter in practice.
- indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty,
- position - Begin));
- }
- };
-
- template <class Impl>
- class AggregateBuilder : public AggregateBuilderBase {
- protected:
- AggregateBuilder(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent)
- : AggregateBuilderBase(builder, parent) {}
-
- Impl &asImpl() { return *static_cast<Impl*>(this); }
-
- public:
- /// Given that this builder was created by beginning an array or struct
- /// component on the given parent builder, finish the array/struct
- /// component and add it to the parent.
- ///
- /// It is an intentional choice that the parent is passed in explicitly
- /// despite it being redundant with information already kept in the
- /// builder. This aids in readability by making it easier to find the
- /// places that add components to a builder, as well as "bookending"
- /// the sub-builder more explicitly.
- void finishAndAddTo(AggregateBuilderBase &parent) {
- assert(Parent == &parent && "adding to non-parent builder");
- parent.add(asImpl().finishImpl());
- }
-
- /// Given that this builder was created by beginning an array or struct
- /// directly on a ConstantInitBuilder, finish the array/struct and
- /// create a global variable with it as the initializer.
- template <class... As>
- llvm::GlobalVariable *finishAndCreateGlobal(As &&...args) {
- assert(!Parent && "finishing non-root builder");
- return Builder.createGlobal(asImpl().finishImpl(),
- std::forward<As>(args)...);
- }
-
- /// Given that this builder was created by beginning an array or struct
- /// directly on a ConstantInitBuilder, finish the array/struct and
- /// set it as the initializer of the given global variable.
- void finishAndSetAsInitializer(llvm::GlobalVariable *global) {
- assert(!Parent && "finishing non-root builder");
- return Builder.setGlobalInitializer(global, asImpl().finishImpl());
- }
- };
-
- ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr);
-
- ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr);
-
-private:
- llvm::GlobalVariable *createGlobal(llvm::Constant *initializer,
- const llvm::Twine &name,
- CharUnits alignment,
- bool constant = false,
- llvm::GlobalValue::LinkageTypes linkage
- = llvm::GlobalValue::InternalLinkage,
- unsigned addressSpace = 0) {
- auto GV = new llvm::GlobalVariable(CGM.getModule(),
- initializer->getType(),
- constant,
- linkage,
- initializer,
- name,
- /*insert before*/ nullptr,
- llvm::GlobalValue::NotThreadLocal,
- addressSpace);
- GV->setAlignment(alignment.getQuantity());
- resolveSelfReferences(GV);
- return GV;
- }
-
- void setGlobalInitializer(llvm::GlobalVariable *GV,
- llvm::Constant *initializer) {
- GV->setInitializer(initializer);
- resolveSelfReferences(GV);
- }
-
- void resolveSelfReferences(llvm::GlobalVariable *GV) {
- for (auto &entry : SelfReferences) {
- llvm::Constant *resolvedReference =
- llvm::ConstantExpr::getInBoundsGetElementPtr(
- GV->getValueType(), GV, entry.Indices);
- entry.Dummy->replaceAllUsesWith(resolvedReference);
- entry.Dummy->eraseFromParent();
- }
- }
-};
-
-/// A helper class of ConstantInitBuilder, used for building constant
-/// array initializers.
-class ConstantArrayBuilder
- : public ConstantInitBuilder::AggregateBuilder<ConstantArrayBuilder> {
- llvm::Type *EltTy;
- friend class ConstantInitBuilder;
- template <class Impl> friend class ConstantInitBuilder::AggregateBuilder;
- ConstantArrayBuilder(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent, llvm::Type *eltTy)
- : AggregateBuilder(builder, parent), EltTy(eltTy) {}
-public:
- size_t size() const {
- assert(!Finished);
- assert(!Frozen);
- assert(Begin <= getBuffer().size());
- return getBuffer().size() - Begin;
- }
-
- bool empty() const {
- return size() == 0;
- }
-
-private:
- /// Form an array constant from the values that have been added to this
- /// builder.
- llvm::Constant *finishImpl() {
- markFinished();
-
- auto &buffer = getBuffer();
- assert((Begin < buffer.size() ||
- (Begin == buffer.size() && EltTy))
- && "didn't add any array elements without element type");
- auto elts = llvm::makeArrayRef(buffer).slice(Begin);
- auto eltTy = EltTy ? EltTy : elts[0]->getType();
- auto type = llvm::ArrayType::get(eltTy, elts.size());
- auto constant = llvm::ConstantArray::get(type, elts);
- buffer.erase(buffer.begin() + Begin, buffer.end());
- return constant;
- }
-};
-
-inline ConstantArrayBuilder
-ConstantInitBuilder::beginArray(llvm::Type *eltTy) {
- return ConstantArrayBuilder(*this, nullptr, eltTy);
-}
-
-inline ConstantArrayBuilder
-ConstantInitBuilder::AggregateBuilderBase::beginArray(llvm::Type *eltTy) {
- return ConstantArrayBuilder(Builder, this, eltTy);
-}
-
-/// A helper class of ConstantInitBuilder, used for building constant
-/// struct initializers.
-class ConstantStructBuilder
- : public ConstantInitBuilder::AggregateBuilder<ConstantStructBuilder> {
- llvm::StructType *Ty;
- friend class ConstantInitBuilder;
- template <class Impl> friend class ConstantInitBuilder::AggregateBuilder;
- ConstantStructBuilder(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent, llvm::StructType *ty)
- : AggregateBuilder(builder, parent), Ty(ty) {}
-
- /// Finish the struct.
- llvm::Constant *finishImpl() {
- markFinished();
-
- auto &buffer = getBuffer();
- assert(Begin < buffer.size() && "didn't add any struct elements?");
- auto elts = llvm::makeArrayRef(buffer).slice(Begin);
-
- llvm::Constant *constant;
- if (Ty) {
- constant = llvm::ConstantStruct::get(Ty, elts);
- } else {
- constant = llvm::ConstantStruct::getAnon(elts, /*packed*/ false);
- }
-
- buffer.erase(buffer.begin() + Begin, buffer.end());
- return constant;
- }
-};
-
-inline ConstantStructBuilder
-ConstantInitBuilder::beginStruct(llvm::StructType *structTy) {
- return ConstantStructBuilder(*this, nullptr, structTy);
-}
-
-inline ConstantStructBuilder
-ConstantInitBuilder::AggregateBuilderBase::beginStruct(
- llvm::StructType *structTy) {
- return ConstantStructBuilder(Builder, this, structTy);
-}
-
-} // end namespace CodeGen
-} // end namespace clang
-
-#endif
diff --git a/lib/CodeGen/ConstantInitBuilder.cpp b/lib/CodeGen/ConstantInitBuilder.cpp
new file mode 100644
index 0000000000000..7f8d809850329
--- /dev/null
+++ b/lib/CodeGen/ConstantInitBuilder.cpp
@@ -0,0 +1,280 @@
+//===--- ConstantInitBuilder.cpp - Global initializer builder -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines out-of-line routines for building initializers for
+// global variables, in particular the kind of globals that are implicitly
+// introduced by various language ABIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "CodeGenModule.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+llvm::Type *ConstantInitFuture::getType() const {
+ assert(Data && "dereferencing null future");
+ if (Data.is<llvm::Constant*>()) {
+ return Data.get<llvm::Constant*>()->getType();
+ } else {
+ return Data.get<ConstantInitBuilderBase*>()->Buffer[0]->getType();
+ }
+}
+
+void ConstantInitFuture::abandon() {
+ assert(Data && "abandoning null future");
+ if (auto builder = Data.dyn_cast<ConstantInitBuilderBase*>()) {
+ builder->abandon(0);
+ }
+ Data = nullptr;
+}
+
+void ConstantInitFuture::installInGlobal(llvm::GlobalVariable *GV) {
+ assert(Data && "installing null future");
+ if (Data.is<llvm::Constant*>()) {
+ GV->setInitializer(Data.get<llvm::Constant*>());
+ } else {
+ auto &builder = *Data.get<ConstantInitBuilderBase*>();
+ assert(builder.Buffer.size() == 1);
+ builder.setGlobalInitializer(GV, builder.Buffer[0]);
+ builder.Buffer.clear();
+ Data = nullptr;
+ }
+}
+
+ConstantInitFuture
+ConstantInitBuilderBase::createFuture(llvm::Constant *initializer) {
+ assert(Buffer.empty() && "buffer not current empty");
+ Buffer.push_back(initializer);
+ return ConstantInitFuture(this);
+}
+
+// Only used in this file.
+inline ConstantInitFuture::ConstantInitFuture(ConstantInitBuilderBase *builder)
+ : Data(builder) {
+ assert(!builder->Frozen);
+ assert(builder->Buffer.size() == 1);
+ assert(builder->Buffer[0] != nullptr);
+}
+
+llvm::GlobalVariable *
+ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer,
+ const llvm::Twine &name,
+ CharUnits alignment,
+ bool constant,
+ llvm::GlobalValue::LinkageTypes linkage,
+ unsigned addressSpace) {
+ auto GV = new llvm::GlobalVariable(CGM.getModule(),
+ initializer->getType(),
+ constant,
+ linkage,
+ initializer,
+ name,
+ /*insert before*/ nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ addressSpace);
+ GV->setAlignment(alignment.getQuantity());
+ resolveSelfReferences(GV);
+ return GV;
+}
+
+void ConstantInitBuilderBase::setGlobalInitializer(llvm::GlobalVariable *GV,
+ llvm::Constant *initializer){
+ GV->setInitializer(initializer);
+
+ if (!SelfReferences.empty())
+ resolveSelfReferences(GV);
+}
+
+void ConstantInitBuilderBase::resolveSelfReferences(llvm::GlobalVariable *GV) {
+ for (auto &entry : SelfReferences) {
+ llvm::Constant *resolvedReference =
+ llvm::ConstantExpr::getInBoundsGetElementPtr(
+ GV->getValueType(), GV, entry.Indices);
+ auto dummy = entry.Dummy;
+ dummy->replaceAllUsesWith(resolvedReference);
+ dummy->eraseFromParent();
+ }
+ SelfReferences.clear();
+}
+
+void ConstantInitBuilderBase::abandon(size_t newEnd) {
+ // Remove all the entries we've added.
+ Buffer.erase(Buffer.begin() + newEnd, Buffer.end());
+
+ // If we're abandoning all the way to the beginning, destroy
+ // all the self-references, because we might not get another
+ // opportunity.
+ if (newEnd == 0) {
+ for (auto &entry : SelfReferences) {
+ auto dummy = entry.Dummy;
+ dummy->replaceAllUsesWith(llvm::UndefValue::get(dummy->getType()));
+ dummy->eraseFromParent();
+ }
+ SelfReferences.clear();
+ }
+}
+
+void ConstantAggregateBuilderBase::addSize(CharUnits size) {
+ add(Builder.CGM.getSize(size));
+}
+
+llvm::Constant *
+ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType,
+ llvm::Constant *target) {
+ // Compute the address of the relative-address slot.
+ auto base = getAddrOfCurrentPosition(offsetType);
+
+ // Subtract.
+ base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy);
+ target = llvm::ConstantExpr::getPtrToInt(target, Builder.CGM.IntPtrTy);
+ llvm::Constant *offset = llvm::ConstantExpr::getSub(target, base);
+
+ // Truncate to the relative-address type if necessary.
+ if (Builder.CGM.IntPtrTy != offsetType) {
+ offset = llvm::ConstantExpr::getTrunc(offset, offsetType);
+ }
+
+ return offset;
+}
+
+llvm::Constant *
+ConstantAggregateBuilderBase::getAddrOfCurrentPosition(llvm::Type *type) {
+ // Make a global variable. We will replace this with a GEP to this
+ // position after installing the initializer.
+ auto dummy =
+ new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
+ llvm::GlobalVariable::PrivateLinkage,
+ nullptr, "");
+ Builder.SelfReferences.emplace_back(dummy);
+ auto &entry = Builder.SelfReferences.back();
+ (void) getGEPIndicesToCurrentPosition(entry.Indices);
+ return dummy;
+}
+
+void ConstantAggregateBuilderBase::getGEPIndicesTo(
+ llvm::SmallVectorImpl<llvm::Constant*> &indices,
+ size_t position) const {
+ // Recurse on the parent builder if present.
+ if (Parent) {
+ Parent->getGEPIndicesTo(indices, Begin);
+
+ // Otherwise, add an index to drill into the first level of pointer.
+ } else {
+ assert(indices.empty());
+ indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0));
+ }
+
+ assert(position >= Begin);
+ // We have to use i32 here because struct GEPs demand i32 indices.
+ // It's rather unlikely to matter in practice.
+ indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty,
+ position - Begin));
+}
+
+ConstantAggregateBuilderBase::PlaceholderPosition
+ConstantAggregateBuilderBase::addPlaceholderWithSize(llvm::Type *type) {
+ // Bring the offset up to the last field.
+ CharUnits offset = getNextOffsetFromGlobal();
+
+ // Create the placeholder.
+ auto position = addPlaceholder();
+
+ // Advance the offset past that field.
+ auto &layout = Builder.CGM.getDataLayout();
+ if (!Packed)
+ offset = offset.alignTo(CharUnits::fromQuantity(
+ layout.getABITypeAlignment(type)));
+ offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type));
+
+ CachedOffsetEnd = Builder.Buffer.size();
+ CachedOffsetFromGlobal = offset;
+
+ return position;
+}
+
+CharUnits ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const{
+ size_t cacheEnd = CachedOffsetEnd;
+ assert(cacheEnd <= end);
+
+ // Fast path: if the cache is valid, just use it.
+ if (cacheEnd == end) {
+ return CachedOffsetFromGlobal;
+ }
+
+ // If the cached range ends before the index at which the current
+ // aggregate starts, recurse for the parent.
+ CharUnits offset;
+ if (cacheEnd < Begin) {
+ assert(cacheEnd == 0);
+ assert(Parent && "Begin != 0 for root builder");
+ cacheEnd = Begin;
+ offset = Parent->getOffsetFromGlobalTo(Begin);
+ } else {
+ offset = CachedOffsetFromGlobal;
+ }
+
+ // Perform simple layout on the elements in cacheEnd..<end.
+ if (cacheEnd != end) {
+ auto &layout = Builder.CGM.getDataLayout();
+ do {
+ llvm::Constant *element = Builder.Buffer[cacheEnd];
+ assert(element != nullptr &&
+ "cannot compute offset when a placeholder is present");
+ llvm::Type *elementType = element->getType();
+ if (!Packed)
+ offset = offset.alignTo(CharUnits::fromQuantity(
+ layout.getABITypeAlignment(elementType)));
+ offset += CharUnits::fromQuantity(layout.getTypeStoreSize(elementType));
+ } while (++cacheEnd != end);
+ }
+
+ // Cache and return.
+ CachedOffsetEnd = cacheEnd;
+ CachedOffsetFromGlobal = offset;
+ return offset;
+}
+
+llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy) {
+ markFinished();
+
+ auto &buffer = getBuffer();
+ assert((Begin < buffer.size() ||
+ (Begin == buffer.size() && eltTy))
+ && "didn't add any array elements without element type");
+ auto elts = llvm::makeArrayRef(buffer).slice(Begin);
+ if (!eltTy) eltTy = elts[0]->getType();
+ auto type = llvm::ArrayType::get(eltTy, elts.size());
+ auto constant = llvm::ConstantArray::get(type, elts);
+ buffer.erase(buffer.begin() + Begin, buffer.end());
+ return constant;
+}
+
+llvm::Constant *
+ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) {
+ markFinished();
+
+ auto &buffer = getBuffer();
+ auto elts = llvm::makeArrayRef(buffer).slice(Begin);
+
+ if (ty == nullptr && elts.empty())
+ ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed);
+
+ llvm::Constant *constant;
+ if (ty) {
+ assert(ty->isPacked() == Packed);
+ constant = llvm::ConstantStruct::get(ty, elts);
+ } else {
+ constant = llvm::ConstantStruct::getAnon(elts, Packed);
+ }
+
+ buffer.erase(buffer.begin() + Begin, buffer.end());
+ return constant;
+}
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 5bc9e5011aa89..a1023473bdd33 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -961,12 +961,10 @@ struct CounterCoverageMappingBuilder
}
};
-bool isMachO(const CodeGenModule &CGM) {
- return CGM.getTarget().getTriple().isOSBinFormatMachO();
-}
-
-StringRef getCoverageSection(const CodeGenModule &CGM) {
- return llvm::getInstrProfCoverageSectionName(isMachO(CGM));
+std::string getCoverageSection(const CodeGenModule &CGM) {
+ return llvm::getInstrProfSectionName(
+ llvm::IPSK_covmap,
+ CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
}
std::string normalizeFilename(StringRef Filename) {
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index 2435830385583..c7bdeac58a1a4 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -202,7 +202,7 @@ public:
template <std::size_t... Is>
T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) {
// It's important that the restores are emitted in order. The braced init
- // list guarentees that.
+ // list guarantees that.
return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...};
}
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index f7a8dd66c527c..dac2d15fa4061 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -24,8 +24,8 @@
#include "CGVTables.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
#include "TargetInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/Type.h"
#include "clang/AST/StmtCXX.h"
@@ -207,8 +207,9 @@ public:
void EmitCXXConstructors(const CXXConstructorDecl *D) override;
- void buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
- SmallVectorImpl<CanQualType> &ArgTys) override;
+ AddedStructorArgs
+ buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
CXXDtorType DT) const override {
@@ -225,11 +226,10 @@ public:
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type, bool ForVirtualBase,
- bool Delegating,
- CallArgList &Args) override;
+ AddedStructorArgs
+ addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating, CallArgList &Args) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
@@ -1134,8 +1134,8 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
// Mark the function as nounwind readonly.
llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
llvm::Attribute::ReadOnly };
- llvm::AttributeSet Attrs = llvm::AttributeSet::get(
- CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
+ llvm::AttributeList Attrs = llvm::AttributeList::get(
+ CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);
return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
}
@@ -1353,7 +1353,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
}
}
-void
+CGCXXABI::AddedStructorArgs
ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) {
ASTContext &Context = getContext();
@@ -1362,9 +1362,12 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
// These are Clang types, so we don't need to worry about sret yet.
// Check if we need to add a VTT parameter (which has type void **).
- if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0)
+ if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) {
ArgTys.insert(ArgTys.begin() + 1,
Context.getPointerType(Context.VoidPtrTy));
+ return AddedStructorArgs::prefix(1);
+ }
+ return AddedStructorArgs{};
}
void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
@@ -1429,11 +1432,11 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
}
-unsigned ItaniumCXXABI::addImplicitConstructorArgs(
+CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating, CallArgList &Args) {
if (!NeedsVTTParameter(GlobalDecl(D, Type)))
- return 0;
+ return AddedStructorArgs{};
// Insert the implicit 'vtt' argument as the second argument.
llvm::Value *VTT =
@@ -1441,7 +1444,7 @@ unsigned ItaniumCXXABI::addImplicitConstructorArgs(
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
Args.insert(Args.begin() + 1,
CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
- return 1; // Added one arg.
+ return AddedStructorArgs::prefix(1); // Added one arg.
}
void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
@@ -1907,10 +1910,11 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM,
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy),
GuardPtrTy, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_acquire",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind));
+ return CGM.CreateRuntimeFunction(
+ FTy, "__cxa_guard_acquire",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind));
}
static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
@@ -1918,10 +1922,11 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
// void __cxa_guard_release(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_release",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind));
+ return CGM.CreateRuntimeFunction(
+ FTy, "__cxa_guard_release",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind));
}
static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
@@ -1929,10 +1934,11 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
// void __cxa_guard_abort(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_abort",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind));
+ return CGM.CreateRuntimeFunction(
+ FTy, "__cxa_guard_abort",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind));
}
namespace {
@@ -2015,10 +2021,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
// The ABI says: "It is suggested that it be emitted in the same COMDAT
// group as the associated data object." In practice, this doesn't work for
- // non-ELF object formats, so only do it for ELF.
+ // non-ELF and non-Wasm object formats, so only do it for ELF and Wasm.
llvm::Comdat *C = var->getComdat();
if (!D.isLocalVarDecl() && C &&
- CGM.getTarget().getTriple().isOSBinFormatELF()) {
+ (CGM.getTarget().getTriple().isOSBinFormatELF() ||
+ CGM.getTarget().getTriple().isOSBinFormatWasm())) {
guard->setComdat(C);
// An inline variable's guard function is run from the per-TU
// initialization function, not via a dedicated global ctor function, so
@@ -2161,7 +2168,9 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
// Create a variable that binds the atexit to this shared object.
llvm::Constant *handle =
- CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
+ CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
+ auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
llvm::Value *args[] = {
llvm::ConstantExpr::getBitCast(dtor, dtorTy),
@@ -2634,7 +2643,6 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
- case BuiltinType::OCLNDRange:
case BuiltinType::OCLReserveID:
return false;
@@ -2814,7 +2822,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
llvm_unreachable("References shouldn't get here");
case Type::Auto:
- llvm_unreachable("Undeduced auto type shouldn't get here");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
llvm_unreachable("Pipe types shouldn't get here");
@@ -3044,7 +3053,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
llvm_unreachable("References shouldn't get here");
case Type::Auto:
- llvm_unreachable("Undeduced auto type shouldn't get here");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
llvm_unreachable("Pipe type shouldn't get here");
@@ -3534,8 +3544,9 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM,
return StructorCodegen::RAUW;
if (llvm::GlobalValue::isWeakForLinker(Linkage)) {
- // Only ELF supports COMDATs with arbitrary names (C5/D5).
- if (CGM.getTarget().getTriple().isOSBinFormatELF())
+ // Only ELF and wasm support COMDATs with arbitrary names (C5/D5).
+ if (CGM.getTarget().getTriple().isOSBinFormatELF() ||
+ CGM.getTarget().getTriple().isOSBinFormatWasm())
return StructorCodegen::COMDAT;
return StructorCodegen::Emit;
}
@@ -3919,9 +3930,8 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
llvm::FunctionType *fnTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
- llvm::Constant *fnRef =
- CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate",
- llvm::AttributeSet(), /*Local=*/true);
+ llvm::Constant *fnRef = CGM.CreateRuntimeFunction(
+ fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true);
llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
if (fn && fn->empty()) {
diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp
new file mode 100644
index 0000000000000..acea5c1143cf8
--- /dev/null
+++ b/lib/CodeGen/MacroPPCallbacks.cpp
@@ -0,0 +1,207 @@
+//===--- MacroPPCallbacks.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains implementation for the macro preprocessors callbacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MacroPPCallbacks.h"
+#include "CGDebugInfo.h"
+#include "clang/CodeGen/ModuleBuilder.h"
+#include "clang/Parse/Parser.h"
+
+using namespace clang;
+
+void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II,
+ const MacroInfo &MI,
+ Preprocessor &PP, raw_ostream &Name,
+ raw_ostream &Value) {
+ Name << II.getName();
+
+ if (MI.isFunctionLike()) {
+ Name << '(';
+ if (!MI.arg_empty()) {
+ MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
+ for (; AI + 1 != E; ++AI) {
+ Name << (*AI)->getName();
+ Name << ',';
+ }
+
+ // Last argument.
+ if ((*AI)->getName() == "__VA_ARGS__")
+ Name << "...";
+ else
+ Name << (*AI)->getName();
+ }
+
+ if (MI.isGNUVarargs())
+ // #define foo(x...)
+ Name << "...";
+
+ Name << ')';
+ }
+
+ SmallString<128> SpellingBuffer;
+ bool First = true;
+ for (const auto &T : MI.tokens()) {
+ if (!First && T.hasLeadingSpace())
+ Value << ' ';
+
+ Value << PP.getSpelling(T, SpellingBuffer);
+ First = false;
+ }
+}
+
+MacroPPCallbacks::MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP)
+ : Gen(Gen), PP(PP), Status(NoScope) {}
+
+// This is the expected flow of enter/exit compiler and user files:
+// - Main File Enter
+// - <built-in> file enter
+// {Compiler macro definitions} - (Line=0, no scope)
+// - (Optional) <command line> file enter
+// {Command line macro definitions} - (Line=0, no scope)
+// - (Optional) <command line> file exit
+// {Command line file includes} - (Line=0, Main file scope)
+// {macro definitions and file includes} - (Line!=0, Parent scope)
+// - <built-in> file exit
+// {User code macro definitions and file includes} - (Line!=0, Parent scope)
+
+llvm::DIMacroFile *MacroPPCallbacks::getCurrentScope() {
+ if (Status == MainFileScope || Status == CommandLineIncludeScope)
+ return Scopes.back();
+ return nullptr;
+}
+
+SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) {
+ if (Status == MainFileScope || EnteredCommandLineIncludeFiles)
+ return Loc;
+
+ // While parsing skipped files, location of macros is invalid.
+ // Invalid location represents line zero.
+ return SourceLocation();
+}
+
+static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) {
+ StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
+ return Filename.equals("<built-in>");
+}
+
+static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) {
+ StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
+ return Filename.equals("<command line>");
+}
+
+void MacroPPCallbacks::updateStatusToNextScope() {
+ switch (Status) {
+ case NoScope:
+ Status = InitializedScope;
+ break;
+ case InitializedScope:
+ Status = BuiltinScope;
+ break;
+ case BuiltinScope:
+ Status = CommandLineIncludeScope;
+ break;
+ case CommandLineIncludeScope:
+ Status = MainFileScope;
+ break;
+ case MainFileScope:
+ llvm_unreachable("There is no next scope, already in the final scope");
+ }
+}
+
+void MacroPPCallbacks::FileEntered(SourceLocation Loc) {
+ SourceLocation LineLoc = getCorrectLocation(LastHashLoc);
+ switch (Status) {
+ case NoScope:
+ updateStatusToNextScope();
+ break;
+ case InitializedScope:
+ updateStatusToNextScope();
+ return;
+ case BuiltinScope:
+ if (isCommandLineFile(PP.getSourceManager(), Loc))
+ return;
+ updateStatusToNextScope();
+ LLVM_FALLTHROUGH;
+ case CommandLineIncludeScope:
+ EnteredCommandLineIncludeFiles++;
+ break;
+ case MainFileScope:
+ break;
+ }
+
+ Scopes.push_back(Gen->getCGDebugInfo()->CreateTempMacroFile(getCurrentScope(),
+ LineLoc, Loc));
+}
+
+void MacroPPCallbacks::FileExited(SourceLocation Loc) {
+ switch (Status) {
+ default:
+ llvm_unreachable("Do not expect to exit a file from current scope");
+ case BuiltinScope:
+ if (!isBuiltinFile(PP.getSourceManager(), Loc))
+ // Skip next scope and change status to MainFileScope.
+ Status = MainFileScope;
+ return;
+ case CommandLineIncludeScope:
+ if (!EnteredCommandLineIncludeFiles) {
+ updateStatusToNextScope();
+ return;
+ }
+ EnteredCommandLineIncludeFiles--;
+ break;
+ case MainFileScope:
+ break;
+ }
+
+ Scopes.pop_back();
+}
+
+void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID) {
+ // Only care about enter file or exit file changes.
+ if (Reason == EnterFile)
+ FileEntered(Loc);
+ else if (Reason == ExitFile)
+ FileExited(Loc);
+}
+
+void MacroPPCallbacks::InclusionDirective(
+ SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
+ bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File,
+ StringRef SearchPath, StringRef RelativePath, const Module *Imported) {
+
+ // Record the line location of the current included file.
+ LastHashLoc = HashLoc;
+}
+
+void MacroPPCallbacks::MacroDefined(const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ IdentifierInfo *Id = MacroNameTok.getIdentifierInfo();
+ SourceLocation location = getCorrectLocation(MacroNameTok.getLocation());
+ std::string NameBuffer, ValueBuffer;
+ llvm::raw_string_ostream Name(NameBuffer);
+ llvm::raw_string_ostream Value(ValueBuffer);
+ writeMacroDefinition(*Id, *MD->getMacroInfo(), PP, Name, Value);
+ Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(),
+ llvm::dwarf::DW_MACINFO_define, location,
+ Name.str(), Value.str());
+}
+
+void MacroPPCallbacks::MacroUndefined(const Token &MacroNameTok,
+ const MacroDefinition &MD) {
+ IdentifierInfo *Id = MacroNameTok.getIdentifierInfo();
+ SourceLocation location = getCorrectLocation(MacroNameTok.getLocation());
+ Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(),
+ llvm::dwarf::DW_MACINFO_undef, location,
+ Id->getName(), "");
+}
diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h
new file mode 100644
index 0000000000000..06217f9c5883c
--- /dev/null
+++ b/lib/CodeGen/MacroPPCallbacks.h
@@ -0,0 +1,117 @@
+//===--- MacroPPCallbacks.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines implementation for the macro preprocessors callbacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/PPCallbacks.h"
+
+namespace llvm {
+class DIMacroFile;
+class DIMacroNode;
+}
+namespace clang {
+class Preprocessor;
+class MacroInfo;
+class CodeGenerator;
+
+class MacroPPCallbacks : public PPCallbacks {
+ /// A pointer to code generator, where debug info generator can be found.
+ CodeGenerator *Gen;
+
+ /// Preprocessor.
+ Preprocessor &PP;
+
+ /// Location of recent included file, used for line number.
+ SourceLocation LastHashLoc;
+
+ /// Counts current number of command line included files, which were entered
+ /// and were not exited yet.
+ int EnteredCommandLineIncludeFiles = 0;
+
+ enum FileScopeStatus {
+ NoScope = 0, // Scope is not initialized yet.
+ InitializedScope, // Main file scope is initialized but not set yet.
+ BuiltinScope, // <built-in> and <command line> file scopes.
+ CommandLineIncludeScope, // Included file, from <command line> file, scope.
+ MainFileScope // Main file scope.
+ };
+ FileScopeStatus Status;
+
+ /// Parent contains all entered files that were not exited yet according to
+ /// the inclusion order.
+ llvm::SmallVector<llvm::DIMacroFile *, 4> Scopes;
+
+ /// Get current DIMacroFile scope.
+ /// \return current DIMacroFile scope or nullptr if there is no such scope.
+ llvm::DIMacroFile *getCurrentScope();
+
+ /// Get current line location or invalid location.
+ /// \param Loc current line location.
+ /// \return current line location \p `Loc`, or invalid location if it's in a
+ /// skipped file scope.
+ SourceLocation getCorrectLocation(SourceLocation Loc);
+
+ /// Use the passed preprocessor to write the macro name and value from the
+ /// given macro info and identifier info into the given \p `Name` and \p
+ /// `Value` output streams.
+ ///
+ /// \param II Identifier info, used to get the Macro name.
+ /// \param MI Macro info, used to get the Macro argumets and values.
+ /// \param PP Preprocessor.
+ /// \param [out] Name Place holder for returned macro name and arguments.
+ /// \param [out] Value Place holder for returned macro value.
+ static void writeMacroDefinition(const IdentifierInfo &II,
+ const MacroInfo &MI, Preprocessor &PP,
+ raw_ostream &Name, raw_ostream &Value);
+
+ /// Update current file scope status to next file scope.
+ void updateStatusToNextScope();
+
+ /// Handle the case when entering a file.
+ ///
+ /// \param Loc Indicates the new location.
+ void FileEntered(SourceLocation Loc);
+
+ /// Handle the case when exiting a file.
+ ///
+ /// \param Loc Indicates the new location.
+ void FileExited(SourceLocation Loc);
+
+public:
+ MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP);
+
+ /// Callback invoked whenever a source file is entered or exited.
+ ///
+ /// \param Loc Indicates the new location.
+ /// \param PrevFID the file that was exited if \p Reason is ExitFile.
+ void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID = FileID()) override;
+
+ /// Callback invoked whenever a directive (#xxx) is processed.
+ void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+ StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange, const FileEntry *File,
+ StringRef SearchPath, StringRef RelativePath,
+ const Module *Imported) override;
+
+ /// Hook called whenever a macro definition is seen.
+ void MacroDefined(const Token &MacroNameTok,
+ const MacroDirective *MD) override;
+
+ /// Hook called whenever a macro \#undef is seen.
+ ///
+ /// MD is released immediately following this callback.
+ void MacroUndefined(const Token &MacroNameTok,
+ const MacroDefinition &MD) override;
+};
+
+} // end namespace clang
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 38df455011e3a..4cacf494e6941 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -19,8 +19,8 @@
#include "CGVTables.h"
#include "CodeGenModule.h"
#include "CodeGenTypes.h"
-#include "ConstantBuilder.h"
#include "TargetInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/StmtCXX.h"
@@ -206,8 +206,9 @@ public:
// lacks a definition for the destructor, non-base destructors must always
// delegate to or alias the base destructor.
- void buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
- SmallVectorImpl<CanQualType> &ArgTys) override;
+ AddedStructorArgs
+ buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
/// Non-base dtors should be emitted as delegating thunks in this ABI.
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
@@ -248,11 +249,10 @@ public:
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type, bool ForVirtualBase,
- bool Delegating,
- CallArgList &Args) override;
+ AddedStructorArgs
+ addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating, CallArgList &Args) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
@@ -1261,17 +1261,19 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
}
}
-void
+CGCXXABI::AddedStructorArgs
MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) {
+ AddedStructorArgs Added;
// TODO: 'for base' flag
if (T == StructorType::Deleting) {
// The scalar deleting destructor takes an implicit int parameter.
ArgTys.push_back(getContext().IntTy);
+ ++Added.Suffix;
}
auto *CD = dyn_cast<CXXConstructorDecl>(MD);
if (!CD)
- return;
+ return Added;
// All parameters are already in place except is_most_derived, which goes
// after 'this' if it's variadic and last if it's not.
@@ -1279,11 +1281,16 @@ MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
const CXXRecordDecl *Class = CD->getParent();
const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>();
if (Class->getNumVBases()) {
- if (FPT->isVariadic())
+ if (FPT->isVariadic()) {
ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy);
- else
+ ++Added.Prefix;
+ } else {
ArgTys.push_back(getContext().IntTy);
+ ++Added.Suffix;
+ }
}
+
+ return Added;
}
void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
@@ -1493,14 +1500,14 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
}
}
-unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
+CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating, CallArgList &Args) {
assert(Type == Ctor_Complete || Type == Ctor_Base);
// Check if we need a 'most_derived' parameter.
if (!D->getParent()->getNumVBases())
- return 0;
+ return AddedStructorArgs{};
// Add the 'most_derived' argument second if we are variadic or last if not.
const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
@@ -1511,13 +1518,13 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
}
RValue RV = RValue::get(MostDerivedArg);
- if (FPT->isVariadic())
+ if (FPT->isVariadic()) {
Args.insert(Args.begin() + 1,
CallArg(RV, getContext().IntTy, /*needscopy=*/false));
- else
- Args.add(RV, getContext().IntTy);
-
- return 1; // Added one arg.
+ return AddedStructorArgs::prefix(1);
+ }
+ Args.add(RV, getContext().IntTy);
+ return AddedStructorArgs::suffix(1);
}
void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
@@ -1554,7 +1561,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
const CXXRecordDecl *RD,
llvm::GlobalVariable *VTable) {
- if (!CGM.getCodeGenOpts().PrepareForLTO)
+ if (!CGM.getCodeGenOpts().LTOUnit)
return;
// The location of the first virtual function pointer in the virtual table,
@@ -2203,9 +2210,8 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get(
CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false);
- llvm::Constant *TLRegDtor =
- CGF.CGM.CreateRuntimeFunction(TLRegDtorTy, "__tlregdtor",
- llvm::AttributeSet(), /*Local=*/true);
+ llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction(
+ TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true);
if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor))
TLRegDtorFn->setDoesNotThrow();
@@ -2301,9 +2307,9 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) {
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_header",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind),
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind),
/*Local=*/true);
}
@@ -2313,9 +2319,9 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) {
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_footer",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind),
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind),
/*Local=*/true);
}
@@ -2325,9 +2331,9 @@ static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) {
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_abort",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind),
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind),
/*Local=*/true);
}
@@ -3713,7 +3719,7 @@ CatchTypeInfo
MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
QualType CatchHandlerType) {
// TypeDescriptors for exceptions never have qualified pointer types,
- // qualifiers are stored seperately in order to support qualification
+ // qualifiers are stored separately in order to support qualification
// conversions.
bool IsConst, IsVolatile, IsUnaligned;
Type =
@@ -3918,16 +3924,16 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
// Insert any ABI-specific implicit constructor arguments.
- unsigned ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
- /*ForVirtualBase=*/false,
- /*Delegating=*/false, Args);
-
+ AddedStructorArgs ExtraArgs =
+ addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
+ /*ForVirtualBase=*/false,
+ /*Delegating=*/false, Args);
// Call the destructor with our arguments.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
CGCallee Callee = CGCallee::forDirect(CalleePtr, CD);
const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
- Args, CD, Ctor_Complete, ExtraArgs);
+ Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix);
CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args);
Cleanups.ForceCleanup();
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index f925c2549175c..89090c8b6a1b5 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -92,6 +92,10 @@ namespace {
return M.get();
}
+ CGDebugInfo *getCGDebugInfo() {
+ return Builder->getModuleDebugInfo();
+ }
+
llvm::Module *ReleaseModule() {
return M.release();
}
@@ -299,6 +303,10 @@ llvm::Module *CodeGenerator::ReleaseModule() {
return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule();
}
+CGDebugInfo *CodeGenerator::getCGDebugInfo() {
+ return static_cast<CodeGeneratorImpl*>(this)->getCGDebugInfo();
+}
+
const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) {
return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name);
}
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 754f9968b67f2..37ecc05aa1eee 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -171,7 +171,8 @@ public:
// Prepare CGDebugInfo to emit debug info for a clang module.
auto *DI = Builder->getModuleDebugInfo();
StringRef ModuleName = llvm::sys::path::filename(MainFileName);
- DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL});
+ DI->setPCHDescriptor({ModuleName, "", OutputFileName,
+ ASTFileSignature{{{~0U, ~0U, ~0U, ~0U, ~1U}}}});
DI->setModuleMap(MMap);
}
@@ -241,7 +242,11 @@ public:
// PCH files don't have a signature field in the control block,
// but LLVM detects DWO CUs by looking for a non-zero DWO id.
- uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL;
+ // We use the lower 64 bits for debug info.
+ uint64_t Signature =
+ Buffer->Signature
+ ? (uint64_t)Buffer->Signature[1] << 32 | Buffer->Signature[0]
+ : ~1ULL;
Builder->getModuleDebugInfo()->setDwoId(Signature);
// Finalize the Builder.
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index d2fc3888ef292..94c3880ea26ec 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1197,6 +1197,39 @@ static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
return Size == 32 || Size == 64;
}
+static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
+ uint64_t &Size) {
+ for (const auto *FD : RD->fields()) {
+ // Scalar arguments on the stack get 4 byte alignment on x86. If the
+ // argument is smaller than 32-bits, expanding the struct will create
+ // alignment padding.
+ if (!is32Or64BitBasicType(FD->getType(), Context))
+ return false;
+
+ // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
+ // how to expand them yet, and the predicate for telling if a bitfield still
+ // counts as "basic" is more complicated than what we were doing previously.
+ if (FD->isBitField())
+ return false;
+
+ Size += Context.getTypeSize(FD->getType());
+ }
+ return true;
+}
+
+static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
+ uint64_t &Size) {
+ // Don't do this if there are any non-empty bases.
+ for (const CXXBaseSpecifier &Base : RD->bases()) {
+ if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(),
+ Size))
+ return false;
+ }
+ if (!addFieldSizes(Context, RD, Size))
+ return false;
+ return true;
+}
+
/// Test whether an argument type which is to be passed indirectly (on the
/// stack) would have the equivalent layout if it was expanded into separate
/// arguments. If so, we prefer to do the latter to avoid inhibiting
@@ -1207,8 +1240,9 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
+ uint64_t Size = 0;
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
- if (!IsWin32StructABI ) {
+ if (!IsWin32StructABI) {
// On non-Windows, we have to conservatively match our old bitcode
// prototypes in order to be ABI-compatible at the bitcode level.
if (!CXXRD->isCLike())
@@ -1217,30 +1251,12 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
// Don't do this for dynamic classes.
if (CXXRD->isDynamicClass())
return false;
- // Don't do this if there are any non-empty bases.
- for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
- if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true))
- return false;
- }
}
- }
-
- uint64_t Size = 0;
-
- for (const auto *FD : RD->fields()) {
- // Scalar arguments on the stack get 4 byte alignment on x86. If the
- // argument is smaller than 32-bits, expanding the struct will create
- // alignment padding.
- if (!is32Or64BitBasicType(FD->getType(), getContext()))
+ if (!addBaseAndFieldSizes(getContext(), CXXRD, Size))
return false;
-
- // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
- // how to expand them yet, and the predicate for telling if a bitfield still
- // counts as "basic" is more complicated than what we were doing previously.
- if (FD->isBitField())
+ } else {
+ if (!addFieldSizes(getContext(), RD, Size))
return false;
-
- Size += getContext().getTypeSize(FD->getType());
}
// We can do this if there was no alignment padding.
@@ -1885,10 +1901,10 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
// Now add the 'alignstack' attribute with a value of 16.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(16);
- Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- B));
+ Fn->addAttributes(
+ llvm::AttributeList::FunctionIndex,
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex, B));
}
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -4819,7 +4835,7 @@ public:
: TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
- return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
+ return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue";
}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
@@ -4901,7 +4917,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
return coerceToIntArray(Ty, getContext(), getVMContext());
}
unsigned Alignment = getContext().getTypeAlign(Ty);
- Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
@@ -4951,7 +4967,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
return coerceToIntArray(RetTy, getContext(), getVMContext());
}
unsigned Alignment = getContext().getTypeAlign(RetTy);
- Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
@@ -5433,10 +5449,10 @@ public:
// the backend to perform a realignment as part of the function prologue.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(8);
- Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- B));
+ Fn->addAttributes(
+ llvm::AttributeList::FunctionIndex,
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex, B));
}
};
@@ -6884,6 +6900,31 @@ MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
}
//===----------------------------------------------------------------------===//
+// AVR ABI Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class AVRTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ AVRTargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override {
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD) return;
+ auto *Fn = cast<llvm::Function>(GV);
+
+ if (FD->getAttr<AVRInterruptAttr>())
+ Fn->addFnAttr("interrupt");
+
+ if (FD->getAttr<AVRSignalAttr>())
+ Fn->addFnAttr("signal");
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
// TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
// Currently subclassed only to implement custom OpenCL C function attribute
// handling.
@@ -7261,9 +7302,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
llvm::Function *F = cast<llvm::Function>(GV);
- if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
- unsigned Min = Attr->getMin();
- unsigned Max = Attr->getMax();
+ const auto *ReqdWGS = M.getLangOpts().OpenCL ?
+ FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+ const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
+ if (ReqdWGS || FlatWGS) {
+ unsigned Min = FlatWGS ? FlatWGS->getMin() : 0;
+ unsigned Max = FlatWGS ? FlatWGS->getMax() : 0;
+ if (ReqdWGS && Min == 0 && Max == 0)
+ Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
if (Min != 0) {
assert(Min <= Max && "Min must be less than or equal Max");
@@ -8386,6 +8432,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::mips64el:
return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
+ case llvm::Triple::avr:
+ return SetCGInfo(new AVRTargetCodeGenInfo(Types));
+
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be: {
AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;