summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 11:06:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 11:06:01 +0000
commit486754660bb926339aefcf012a3f848592babb8b (patch)
treeecdbc446c9876f4f120f701c243373cd3cb43db3 /lib/CodeGen
parent55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff)
Notes
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/ABIInfo.h15
-rw-r--r--lib/CodeGen/BackendUtil.cpp282
-rw-r--r--lib/CodeGen/CGAtomic.cpp67
-rw-r--r--lib/CodeGen/CGBlocks.cpp493
-rw-r--r--lib/CodeGen/CGBlocks.h9
-rw-r--r--lib/CodeGen/CGBuilder.h37
-rw-r--r--lib/CodeGen/CGBuiltin.cpp3055
-rw-r--r--lib/CodeGen/CGCUDANV.cpp394
-rw-r--r--lib/CodeGen/CGCXX.cpp29
-rw-r--r--lib/CodeGen/CGCXXABI.cpp14
-rw-r--r--lib/CodeGen/CGCXXABI.h29
-rw-r--r--lib/CodeGen/CGCall.cpp510
-rw-r--r--lib/CodeGen/CGCall.h114
-rw-r--r--lib/CodeGen/CGClass.cpp111
-rw-r--r--lib/CodeGen/CGCleanup.cpp48
-rw-r--r--lib/CodeGen/CGCleanup.h9
-rw-r--r--lib/CodeGen/CGCoroutine.cpp73
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp508
-rw-r--r--lib/CodeGen/CGDebugInfo.h69
-rw-r--r--lib/CodeGen/CGDecl.cpp568
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp25
-rw-r--r--lib/CodeGen/CGException.cpp299
-rw-r--r--lib/CodeGen/CGExpr.cpp201
-rw-r--r--lib/CodeGen/CGExprAgg.cpp454
-rw-r--r--lib/CodeGen/CGExprCXX.cpp80
-rw-r--r--lib/CodeGen/CGExprComplex.cpp13
-rw-r--r--lib/CodeGen/CGExprConstant.cpp174
-rw-r--r--lib/CodeGen/CGExprScalar.cpp108
-rw-r--r--lib/CodeGen/CGGPUBuiltin.cpp13
-rw-r--r--lib/CodeGen/CGLoopInfo.h70
-rw-r--r--lib/CodeGen/CGNonTrivialStruct.cpp885
-rw-r--r--lib/CodeGen/CGObjC.cpp68
-rw-r--r--lib/CodeGen/CGObjCGNU.cpp1808
-rw-r--r--lib/CodeGen/CGObjCMac.cpp47
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.cpp81
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.h33
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp4158
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h998
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp2300
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h165
-rw-r--r--lib/CodeGen/CGRecordLayout.h18
-rw-r--r--lib/CodeGen/CGRecordLayoutBuilder.cpp83
-rw-r--r--lib/CodeGen/CGStmt.cpp51
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp1676
-rw-r--r--lib/CodeGen/CGVTT.cpp2
-rw-r--r--lib/CodeGen/CGVTables.cpp283
-rw-r--r--lib/CodeGen/CGVTables.h10
-rw-r--r--lib/CodeGen/CGValue.h44
-rw-r--r--lib/CodeGen/CMakeLists.txt7
-rw-r--r--lib/CodeGen/CodeGenAction.cpp54
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp255
-rw-r--r--lib/CodeGen/CodeGenFunction.h928
-rw-r--r--lib/CodeGen/CodeGenModule.cpp1020
-rw-r--r--lib/CodeGen/CodeGenModule.h155
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp8
-rw-r--r--lib/CodeGen/CodeGenTBAA.cpp31
-rw-r--r--lib/CodeGen/CodeGenTBAA.h9
-rw-r--r--lib/CodeGen/CodeGenTypeCache.h2
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp27
-rw-r--r--lib/CodeGen/CodeGenTypes.h9
-rw-r--r--lib/CodeGen/ConstantEmitter.h2
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp153
-rw-r--r--lib/CodeGen/CoverageMappingGen.h16
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp423
-rw-r--r--lib/CodeGen/MacroPPCallbacks.cpp3
-rw-r--r--lib/CodeGen/MacroPPCallbacks.h3
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp264
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp12
-rw-r--r--lib/CodeGen/SanitizerMetadata.cpp9
-rw-r--r--lib/CodeGen/SwiftCallingConv.cpp44
-rw-r--r--lib/CodeGen/TargetInfo.cpp615
-rw-r--r--lib/CodeGen/TargetInfo.h12
-rw-r--r--lib/CodeGen/VarBypassDetector.cpp2
73 files changed, 17740 insertions, 6904 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 575506da84d4..feed3833f24a 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -53,12 +53,9 @@ namespace swiftcall {
CodeGen::CodeGenTypes &CGT;
protected:
llvm::CallingConv::ID RuntimeCC;
- llvm::CallingConv::ID BuiltinCC;
public:
ABIInfo(CodeGen::CodeGenTypes &cgt)
- : CGT(cgt),
- RuntimeCC(llvm::CallingConv::C),
- BuiltinCC(llvm::CallingConv::C) {}
+ : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {}
virtual ~ABIInfo();
@@ -77,11 +74,6 @@ namespace swiftcall {
return RuntimeCC;
}
- /// Return the calling convention to use for compiler builtins
- llvm::CallingConv::ID getBuiltinCC() const {
- return BuiltinCC;
- }
-
virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0;
/// EmitVAArg - Emit the target dependent code to load a value of
@@ -108,8 +100,6 @@ namespace swiftcall {
virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const;
- virtual bool shouldSignExtUnsignedType(QualType Ty) const;
-
bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
uint64_t &Members) const;
@@ -137,8 +127,7 @@ namespace swiftcall {
bool supportsSwift() const final override { return true; }
- virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> types,
+ virtual bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> types,
bool asReturnValue) const = 0;
virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index e2349da5f0a4..415bd9626220 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -26,6 +26,7 @@
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -44,17 +45,19 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Transforms/Coroutines.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <memory>
@@ -101,7 +104,18 @@ class EmitAssemblyHelper {
///
/// \return True on success.
bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action,
- raw_pwrite_stream &OS);
+ raw_pwrite_stream &OS, raw_pwrite_stream *DwoOS);
+
+ std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) {
+ std::error_code EC;
+ auto F = llvm::make_unique<llvm::ToolOutputFile>(Path, EC,
+ llvm::sys::fs::F_None);
+ if (EC) {
+ Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message();
+ F.reset();
+ }
+ return F;
+ }
public:
EmitAssemblyHelper(DiagnosticsEngine &_Diags,
@@ -231,10 +245,9 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
PM.add(createAddressSanitizerFunctionPass(
- /*CompileKernel*/ true,
- /*Recover*/ true, /*UseAfterScope*/ false));
- PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true,
- /*Recover*/true));
+ /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false));
+ PM.add(createAddressSanitizerModulePass(
+ /*CompileKernel*/ true, /*Recover*/ true));
}
static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -243,7 +256,13 @@ static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
static_cast<const PassManagerBuilderWrapper &>(Builder);
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
- PM.add(createHWAddressSanitizerPass(Recover));
+ PM.add(createHWAddressSanitizerPass(/*CompileKernel*/ false, Recover));
+}
+
+static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ PM.add(createHWAddressSanitizerPass(
+ /*CompileKernel*/ true, /*Recover*/ true));
}
static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
@@ -361,21 +380,6 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) {
return static_cast<llvm::CodeModel::Model>(CodeModel);
}
-static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) {
- // Keep this synced with the equivalent code in
- // lib/Frontend/CompilerInvocation.cpp
- llvm::Optional<llvm::Reloc::Model> RM;
- RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
- .Case("static", llvm::Reloc::Static)
- .Case("pic", llvm::Reloc::PIC_)
- .Case("ropi", llvm::Reloc::ROPI)
- .Case("rwpi", llvm::Reloc::RWPI)
- .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
- .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
- assert(RM.hasValue() && "invalid PIC model!");
- return *RM;
-}
-
static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) {
if (Action == Backend_EmitObj)
return TargetMachine::CGFT_ObjectFile;
@@ -447,7 +451,10 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.DataSections = CodeGenOpts.DataSections;
Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
+ Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS;
Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
+ Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
+ Options.EmitAddrsig = CodeGenOpts.Addrsig;
if (CodeGenOpts.EnableSplitDwarf)
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
@@ -470,6 +477,23 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.MCOptions.IASSearchPaths.push_back(
Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
}
+static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
+ if (CodeGenOpts.DisableGCov)
+ return None;
+ if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes)
+ return None;
+ // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
+ // LLVM's -default-gcov-version flag is set to something invalid.
+ GCOVOptions Options;
+ Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
+ Options.EmitData = CodeGenOpts.EmitGcovArcs;
+ llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version));
+ Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
+ Options.NoRedZone = CodeGenOpts.DisableRedZone;
+ Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
+ Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
+ return Options;
+}
void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
legacy::FunctionPassManager &FPM) {
@@ -501,7 +525,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.Inliner = createFunctionInliningPass(
CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize,
(!CodeGenOpts.SampleProfileFile.empty() &&
- CodeGenOpts.EmitSummaryIndex));
+ CodeGenOpts.PrepareForThinLTO));
}
PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
@@ -511,7 +535,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
- PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex;
+ PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
@@ -535,6 +559,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addObjCARCOptPass);
}
+ if (LangOpts.CoroutinesTS)
+ addCoroutinePassesToExtensionPoints(PMBuilder);
+
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) {
PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate,
addBoundsCheckingPass);
@@ -572,6 +599,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addHWAddressSanitizerPasses);
}
+ if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) {
+ PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+ addKernelHWAddressSanitizerPasses);
+ PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+ addKernelHWAddressSanitizerPasses);
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
addMemorySanitizerPass);
@@ -593,9 +627,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addDataFlowSanitizerPass);
}
- if (LangOpts.CoroutinesTS)
- addCoroutinePassesToExtensionPoints(PMBuilder);
-
if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) {
PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
addEfficiencySanitizerPass);
@@ -612,20 +643,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (!CodeGenOpts.RewriteMapFiles.empty())
addSymbolRewriterPass(CodeGenOpts, &MPM);
- if (!CodeGenOpts.DisableGCov &&
- (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
- // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
- // LLVM's -default-gcov-version flag is set to something invalid.
- GCOVOptions Options;
- Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
- Options.EmitData = CodeGenOpts.EmitGcovArcs;
- memcpy(Options.Version, CodeGenOpts.CoverageVersion, 4);
- Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
- Options.NoRedZone = CodeGenOpts.DisableRedZone;
- Options.FunctionNamesInData =
- !CodeGenOpts.CoverageNoFunctionNamesInData;
- Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
- MPM.add(createGCOVProfilerPass(Options));
+ if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) {
+ MPM.add(createGCOVProfilerPass(*Options));
if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo)
MPM.add(createStripSymbolsPass(true));
}
@@ -664,8 +683,6 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
BackendArgs.push_back("-limit-float-precision");
BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
}
- for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
- BackendArgs.push_back(BackendOption.c_str());
BackendArgs.push_back(nullptr);
llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
BackendArgs.data());
@@ -685,7 +702,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
- llvm::Reloc::Model RM = getRelocModel(CodeGenOpts);
+ llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
llvm::TargetOptions Options;
@@ -696,7 +713,8 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
BackendAction Action,
- raw_pwrite_stream &OS) {
+ raw_pwrite_stream &OS,
+ raw_pwrite_stream *DwoOS) {
// Add LibraryInfo.
llvm::Triple TargetTriple(TheModule->getTargetTriple());
std::unique_ptr<TargetLibraryInfoImpl> TLII(
@@ -713,7 +731,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
if (CodeGenOpts.OptimizationLevel > 0)
CodeGenPasses.add(createObjCARCContractPass());
- if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT,
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, DwoOS, CGFT,
/*DisableVerify=*/!CodeGenOpts.VerifyModule)) {
Diags.Report(diag::err_fe_unable_to_interface_with_target);
return false;
@@ -724,7 +742,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
- TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
+ TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr);
setCommandLineOpts(CodeGenOpts);
@@ -752,31 +770,35 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
- std::unique_ptr<raw_fd_ostream> ThinLinkOS;
+ std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
switch (Action) {
case Backend_EmitNothing:
break;
case Backend_EmitBC:
- if (CodeGenOpts.EmitSummaryIndex) {
+ if (CodeGenOpts.PrepareForThinLTO) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
- std::error_code EC;
- ThinLinkOS.reset(new llvm::raw_fd_ostream(
- CodeGenOpts.ThinLinkBitcodeFile, EC,
- llvm::sys::fs::F_None));
- if (EC) {
- Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile
- << EC.message();
+ ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
+ if (!ThinLinkOS)
return;
- }
}
+ PerModulePasses.add(createWriteThinLTOBitcodePass(
+ *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
+ } else {
+ // Emit a module summary by default for Regular LTO except for ld64
+ // targets
+ bool EmitLTOSummary =
+ (CodeGenOpts.PrepareForLTO &&
+ llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
+ llvm::Triple::Apple);
+ if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+
PerModulePasses.add(
- createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get()));
+ createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
+ EmitLTOSummary));
}
- else
- PerModulePasses.add(
- createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists));
break;
case Backend_EmitLL:
@@ -785,7 +807,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
default:
- if (!AddEmitPasses(CodeGenPasses, Action, *OS))
+ if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
+ if (!DwoOS)
+ return;
+ }
+ if (!AddEmitPasses(CodeGenPasses, Action, *OS,
+ DwoOS ? &DwoOS->os() : nullptr))
return;
}
@@ -814,6 +842,11 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
PrettyStackTraceString CrashInfo("Code generation");
CodeGenPasses.run(*TheModule);
}
+
+ if (ThinLinkOS)
+ ThinLinkOS->keep();
+ if (DwoOS)
+ DwoOS->keep();
}
static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
@@ -827,7 +860,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
case 2:
switch (Opts.OptimizeSize) {
default:
- llvm_unreachable("Invalide optimization level for size!");
+ llvm_unreachable("Invalid optimization level for size!");
case 0:
return PassBuilder::O2;
@@ -854,7 +887,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
/// `EmitAssembly` at some point in the future when the default switches.
void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) {
- TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
+ TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr);
setCommandLineOpts(CodeGenOpts);
// The new pass manager always makes a target machine available to passes
@@ -913,10 +946,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
ModulePassManager MPM(CodeGenOpts.DebugPassManager);
if (!CodeGenOpts.DisableLLVMPasses) {
- bool IsThinLTO = CodeGenOpts.EmitSummaryIndex;
+ bool IsThinLTO = CodeGenOpts.PrepareForThinLTO;
bool IsLTO = CodeGenOpts.PrepareForLTO;
if (CodeGenOpts.OptimizationLevel == 0) {
+ if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
+ MPM.addPass(GCOVProfilerPass(*Options));
+
// Build a minimal pipeline based on the semantics required by Clang,
// which is just that always inlining occurs.
MPM.addPass(AlwaysInlinerPass());
@@ -925,8 +961,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
- // Lastly, add a semantically necessary pass for ThinLTO.
- if (IsThinLTO)
+ // Lastly, add a semantically necessary pass for LTO.
+ if (IsLTO || IsThinLTO)
MPM.addPass(NameAnonGlobalPass());
} else {
// Map our optimization levels into one of the distinct levels used to
@@ -940,6 +976,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
[](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
FPM.addPass(BoundsCheckingPass());
});
+ if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
+ PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) {
+ MPM.addPass(GCOVProfilerPass(*Options));
+ });
if (IsThinLTO) {
MPM = PB.buildThinLTOPreLinkDefaultPipeline(
@@ -948,6 +988,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
} else if (IsLTO) {
MPM = PB.buildLTOPreLinkDefaultPipeline(Level,
CodeGenOpts.DebugPassManager);
+ MPM.addPass(NameAnonGlobalPass());
} else {
MPM = PB.buildPerModuleDefaultPipeline(Level,
CodeGenOpts.DebugPassManager);
@@ -959,7 +1000,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// create that pass manager here and use it as needed below.
legacy::PassManager CodeGenPasses;
bool NeedCodeGen = false;
- Optional<raw_fd_ostream> ThinLinkOS;
+ std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
// Append any output we need to the pass manager.
switch (Action) {
@@ -967,23 +1008,26 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
break;
case Backend_EmitBC:
- if (CodeGenOpts.EmitSummaryIndex) {
+ if (CodeGenOpts.PrepareForThinLTO) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
- std::error_code EC;
- ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC,
- llvm::sys::fs::F_None);
- if (EC) {
- Diags.Report(diag::err_fe_unable_to_open_output)
- << CodeGenOpts.ThinLinkBitcodeFile << EC.message();
+ ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
+ if (!ThinLinkOS)
return;
- }
}
- MPM.addPass(
- ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr));
+ MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
+ : nullptr));
} else {
+ // Emit a module summary by default for Regular LTO except for ld64
+ // targets
+ bool EmitLTOSummary =
+ (CodeGenOpts.PrepareForLTO &&
+ llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
+ llvm::Triple::Apple);
+ if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+
MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- CodeGenOpts.EmitSummaryIndex,
- CodeGenOpts.EmitSummaryIndex));
+ EmitLTOSummary));
}
break;
@@ -997,7 +1041,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
NeedCodeGen = true;
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
- if (!AddEmitPasses(CodeGenPasses, Action, *OS))
+ if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
+ if (!DwoOS)
+ return;
+ }
+ if (!AddEmitPasses(CodeGenPasses, Action, *OS,
+ DwoOS ? &DwoOS->os() : nullptr))
// FIXME: Should we handle this error differently?
return;
break;
@@ -1017,6 +1067,11 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PrettyStackTraceString CrashInfo("Code generation");
CodeGenPasses.run(*TheModule);
}
+
+ if (ThinLinkOS)
+ ThinLinkOS->keep();
+ if (DwoOS)
+ DwoOS->keep();
}
Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) {
@@ -1026,16 +1081,22 @@ Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) {
// The bitcode file may contain multiple modules, we want the one that is
// marked as being the ThinLTO module.
- for (BitcodeModule &BM : *BMsOrErr) {
- Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
- if (LTOInfo && LTOInfo->IsThinLTO)
- return BM;
- }
+ if (const BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr))
+ return *Bm;
return make_error<StringError>("Could not find module summary",
inconvertibleErrorCode());
}
+BitcodeModule *clang::FindThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
+ for (BitcodeModule &BM : BMs) {
+ Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
+ if (LTOInfo && LTOInfo->IsThinLTO)
+ return &BM;
+ }
+ return nullptr;
+}
+
static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
const HeaderSearchOptions &HeaderOpts,
const CodeGenOptions &CGOpts,
@@ -1067,9 +1128,8 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
// e.g. record required linkage changes.
if (Summary->modulePath() == M->getModuleIdentifier())
continue;
- // Doesn't matter what value we plug in to the map, just needs an entry
- // to provoke importing by thinBackend.
- ImportList[Summary->modulePath()][GUID] = 1;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
}
std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports;
@@ -1100,15 +1160,27 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
return llvm::make_unique<lto::NativeObjectStream>(std::move(OS));
};
lto::Config Conf;
+ if (CGOpts.SaveTempsFilePrefix != "") {
+ if (Error E = Conf.addSaveTemps(CGOpts.SaveTempsFilePrefix + ".",
+ /* UseInputModulePath */ false)) {
+ handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+ errs() << "Error setting up ThinLTO save-temps: " << EIB.message()
+ << '\n';
+ });
+ }
+ }
Conf.CPU = TOpts.CPU;
Conf.CodeModel = getCodeModel(CGOpts);
Conf.MAttrs = TOpts.Features;
- Conf.RelocModel = getRelocModel(CGOpts);
+ Conf.RelocModel = CGOpts.RelocationModel;
Conf.CGOptLevel = getCGOptLevel(CGOpts);
initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
Conf.DebugPassManager = CGOpts.DebugPassManager;
+ Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness;
+ Conf.RemarksFilename = CGOpts.OptRecordFile;
+ Conf.DwoPath = CGOpts.SplitDwarfFile;
switch (Action) {
case Backend_EmitNothing:
Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
@@ -1123,7 +1195,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
break;
case Backend_EmitBC:
Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
- WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists);
+ WriteBitcodeToFile(*M, *OS, CGOpts.EmitLLVMUseLists);
return false;
};
break;
@@ -1132,7 +1204,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
break;
}
if (Error E = thinBackend(
- Conf, 0, AddStream, *M, *CombinedIndex, ImportList,
+ Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
@@ -1148,6 +1220,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
const llvm::DataLayout &TDesc, Module *M,
BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
+ std::unique_ptr<llvm::Module> EmptyModule;
if (!CGOpts.ThinLTOIndexFile.empty()) {
// If we are performing a ThinLTO importing compile, load the function index
// into memory and pass it into runThinLTOBackend, which will run the
@@ -1165,11 +1238,22 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// A null CombinedIndex means we should skip ThinLTO compilation
// (LLVM will optionally ignore empty index files, returning null instead
// of an error).
- bool DoThinLTOBackend = CombinedIndex != nullptr;
- if (DoThinLTOBackend) {
- runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
- LOpts, std::move(OS), CGOpts.SampleProfileFile, Action);
- return;
+ if (CombinedIndex) {
+ if (!CombinedIndex->skipModuleByDistributedBackend()) {
+ runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
+ LOpts, std::move(OS), CGOpts.SampleProfileFile,
+ Action);
+ return;
+ }
+ // Distributed indexing detected that nothing from the module is needed
+ // for the final linking. So we can skip the compilation. We sill need to
+ // output an empty object file to make sure that a linker does not fail
+ // trying to read it. Also for some features, like CFI, we must skip
+ // the compilation as CombinedIndex does not contain all required
+ // information.
+ EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext());
+ EmptyModule->setTargetTriple(M->getTargetTriple());
+ M = EmptyModule.get();
}
}
@@ -1228,7 +1312,7 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
// Save llvm.compiler.used and remote it.
SmallVector<Constant*, 2> UsedArray;
- SmallSet<GlobalValue*, 4> UsedGlobals;
+ SmallPtrSet<GlobalValue*, 4> UsedGlobals;
Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0);
GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true);
for (auto *GV : UsedGlobals) {
@@ -1253,7 +1337,7 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
// If the input is LLVM Assembly, bitcode is produced by serializing
// the module. Use-lists order need to be perserved in this case.
llvm::raw_string_ostream OS(Data);
- llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+ llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true);
ModuleData =
ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
} else
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 6862fd811186..b34bcdc1fc38 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -18,6 +18,7 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
@@ -186,7 +187,7 @@ namespace {
RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
SourceLocation loc, bool AsValue) const;
- /// \brief Converts a rvalue to integer value.
+ /// Converts a rvalue to integer value.
llvm::Value *convertRValueToInt(RValue RVal) const;
RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
@@ -207,13 +208,13 @@ namespace {
LVal.getBaseInfo(), LVal.getTBAAInfo());
}
- /// \brief Emits atomic load.
+ /// Emits atomic load.
/// \returns Loaded value.
RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
bool AsValue, llvm::AtomicOrdering AO,
bool IsVolatile);
- /// \brief Emits atomic compare-and-exchange sequence.
+ /// Emits atomic compare-and-exchange sequence.
/// \param Expected Expected value.
/// \param Desired Desired value.
/// \param Success Atomic ordering for success operation.
@@ -229,13 +230,13 @@ namespace {
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false);
- /// \brief Emits atomic update.
+ /// Emits atomic update.
/// \param AO Atomic ordering.
/// \param UpdateOp Update operation for the current lvalue.
void EmitAtomicUpdate(llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
- /// \brief Emits atomic update.
+ /// Emits atomic update.
/// \param AO Atomic ordering.
void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
bool IsVolatile);
@@ -243,25 +244,25 @@ namespace {
/// Materialize an atomic r-value in atomic-layout memory.
Address materializeRValue(RValue rvalue) const;
- /// \brief Creates temp alloca for intermediate operations on atomic value.
+ /// Creates temp alloca for intermediate operations on atomic value.
Address CreateTempAlloca() const;
private:
bool requiresMemSetZero(llvm::Type *type) const;
- /// \brief Emits atomic load as a libcall.
+ /// Emits atomic load as a libcall.
void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
llvm::AtomicOrdering AO, bool IsVolatile);
- /// \brief Emits atomic load as LLVM instruction.
+ /// Emits atomic load as LLVM instruction.
llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
- /// \brief Emits atomic compare-and-exchange op as a libcall.
+ /// Emits atomic compare-and-exchange op as a libcall.
llvm::Value *EmitAtomicCompareExchangeLibcall(
llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
llvm::AtomicOrdering Success =
llvm::AtomicOrdering::SequentiallyConsistent,
llvm::AtomicOrdering Failure =
llvm::AtomicOrdering::SequentiallyConsistent);
- /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
+ /// Emits atomic compare-and-exchange op as LLVM instruction.
std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
llvm::AtomicOrdering Success =
@@ -269,19 +270,19 @@ namespace {
llvm::AtomicOrdering Failure =
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false);
- /// \brief Emit atomic update as libcalls.
+ /// Emit atomic update as libcalls.
void
EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
- /// \brief Emit atomic update as LLVM instructions.
+ /// Emit atomic update as LLVM instructions.
void EmitAtomicUpdateOp(llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
- /// \brief Emit atomic update as libcalls.
+ /// Emit atomic update as libcalls.
void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal,
bool IsVolatile);
- /// \brief Emit atomic update as LLVM instructions.
+ /// Emit atomic update as LLVM instructions.
void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal,
bool IsVolatile);
};
@@ -590,11 +591,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__opencl_atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_min:
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
: llvm::AtomicRMWInst::UMin;
break;
case AtomicExpr::AO__opencl_atomic_fetch_max:
+ case AtomicExpr::AO__atomic_fetch_max:
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
: llvm::AtomicRMWInst::UMax;
break;
@@ -751,6 +754,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
Address Dest = Address::invalid();
Address Ptr = EmitPointerWithAlignment(E->getPtr());
+ if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
+ E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
+ LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
+ EmitAtomicInit(E->getVal1(), lvalue);
+ return RValue::get(nullptr);
+ }
+
CharUnits sizeChars, alignChars;
std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
uint64_t Size = sizeChars.getQuantity();
@@ -758,12 +768,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 ||
getContext().toBits(sizeChars) > MaxInlineWidthInBits);
- if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
- E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
- LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
- EmitAtomicInit(E->getVal1(), lvalue);
- return RValue::get(nullptr);
- }
+ if (UseLibcall)
+ CGM.getDiags().Report(E->getLocStart(), diag::warn_atomic_op_misaligned);
llvm::Value *Order = EmitScalarExpr(E->getOrder());
llvm::Value *Scope =
@@ -855,6 +861,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_or_fetch:
case AtomicExpr::AO__atomic_xor_fetch:
case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_max:
Val1 = EmitValToTemp(*this, E->getVal1());
break;
}
@@ -909,6 +917,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_or_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
case AtomicExpr::AO__atomic_xor_fetch:
+ case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_max:
// For these, only library calls for certain sizes exist.
UseOptimizedLibcall = true;
break;
@@ -1091,6 +1101,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), sizeChars);
break;
+ case AtomicExpr::AO__atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_min"
@@ -1098,6 +1109,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
LoweredMemTy, E->getExprLoc(), sizeChars);
break;
+ case AtomicExpr::AO__atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_max"
@@ -1160,7 +1172,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
if (UseOptimizedLibcall && Res.getScalarVal()) {
llvm::Value *ResVal = Res.getScalarVal();
if (PostOp) {
- llvm::Value *LoadVal1 = Args[1].RV.getScalarVal();
+ llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
}
if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
@@ -1508,11 +1520,13 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
// which means that the caller is responsible for having zeroed
// any padding. Just do an aggregate copy of that type.
if (rvalue.isAggregate()) {
- CGF.EmitAggregateCopy(getAtomicAddress(),
- rvalue.getAggregateAddress(),
- getAtomicType(),
- (rvalue.isVolatileQualified()
- || LVal.isVolatileQualified()));
+ LValue Dest = CGF.MakeAddrLValue(getAtomicAddress(), getAtomicType());
+ LValue Src = CGF.MakeAddrLValue(rvalue.getAggregateAddress(),
+ getAtomicType());
+ bool IsVolatile = rvalue.isVolatileQualified() ||
+ LVal.isVolatileQualified();
+ CGF.EmitAggregateCopy(Dest, Src, getAtomicType(),
+ AggValueSlot::DoesNotOverlap, IsVolatile);
return;
}
@@ -2007,6 +2021,7 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap,
Zeroed ? AggValueSlot::IsZeroed :
AggValueSlot::IsNotZeroed);
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 5f73d4cf7913..617856a7b43e 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -66,7 +66,7 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
/// buildBlockDescriptor - Build the block descriptor meta-data for a block.
/// buildBlockDescriptor is accessed from 5th field of the Block_literal
/// meta-data and contains stationary information about the block literal.
-/// Its definition will have 4 (or optinally 6) words.
+/// Its definition will have 4 (or optionally 6) words.
/// \code
/// struct Block_descriptor {
/// unsigned long reserved;
@@ -104,7 +104,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
elements.addInt(ulong, blockInfo.BlockSize.getQuantity());
// Optional copy/dispose helpers.
- if (blockInfo.NeedsCopyDispose) {
+ if (blockInfo.needsCopyDisposeHelpers()) {
// copy_func_helper_decl
elements.add(buildCopyHelper(CGM, blockInfo));
@@ -159,6 +159,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
/// These are the flags (with corresponding bit number) that the
/// compiler is actually supposed to know about.
+ /// 23. BLOCK_IS_NOESCAPE - indicates that the block is non-escaping
/// 25. BLOCK_HAS_COPY_DISPOSE - indicates that the block
/// descriptor provides copy and dispose helper functions
/// 26. BLOCK_HAS_CXX_OBJ - indicates that there's a captured
@@ -307,25 +308,12 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
assert(elementTypes.empty());
if (CGM.getLangOpts().OpenCL) {
- // The header is basically 'struct { int; int; generic void *;
+ // The header is basically 'struct { int; int;
// custom_fields; }'. Assert that struct is packed.
- auto GenericAS =
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
- auto GenPtrAlign =
- CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
- auto GenPtrSize =
- CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
- assert(CGM.getIntSize() <= GenPtrSize);
- assert(CGM.getIntAlign() <= GenPtrAlign);
- assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
elementTypes.push_back(CGM.IntTy); /* total size */
elementTypes.push_back(CGM.IntTy); /* align */
- elementTypes.push_back(
- CGM.getOpenCLRuntime()
- .getGenericVoidPointerType()); /* invoke function */
- unsigned Offset =
- 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
- unsigned BlockAlign = GenPtrAlign.getQuantity();
+ unsigned Offset = 2 * CGM.getIntSize().getQuantity();
+ unsigned BlockAlign = CGM.getIntAlign().getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -343,7 +331,7 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
info.BlockSize = CharUnits::fromQuantity(Offset);
} else {
// The header is basically 'struct { void *; int; int; void *; void *; }'.
- // Assert that that struct is packed.
+ // Assert that the struct is packed.
assert(CGM.getIntSize() <= CGM.getPointerSize());
assert(CGM.getIntAlign() <= CGM.getPointerAlign());
assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
@@ -477,6 +465,14 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
info.NeedsCopyDispose = true;
info.HasCXXObject = true;
+ // So do C structs that require non-trivial copy construction or
+ // destruction.
+ } else if (variable->getType().isNonTrivialToPrimitiveCopy() ==
+ QualType::PCK_Struct ||
+ variable->getType().isDestructedType() ==
+ QualType::DK_nontrivial_c_struct) {
+ info.NeedsCopyDispose = true;
+
// And so do types with destructors.
} else if (CGM.getLangOpts().CPlusPlus) {
if (const CXXRecordDecl *record =
@@ -705,11 +701,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
/// kind of cleanup object is a BlockDecl*.
void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) {
assert(E->getNumObjects() != 0);
- ArrayRef<ExprWithCleanups::CleanupObject> cleanups = E->getObjects();
- for (ArrayRef<ExprWithCleanups::CleanupObject>::iterator
- i = cleanups.begin(), e = cleanups.end(); i != e; ++i) {
- enterBlockScope(*this, *i);
- }
+ for (const ExprWithCleanups::CleanupObject &C : E->getObjects())
+ enterBlockScope(*this, C);
}
/// Find the layout for the given block in a linked list and remove it.
@@ -740,27 +733,19 @@ void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) {
}
/// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
- llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
// If the block has no captures, we won't have a pre-computed
// layout for it.
if (!blockExpr->getBlockDecl()->hasCaptures()) {
// The block literal is emitted as a global variable, and the block invoke
// function has to be extracted from its initializer.
if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
- if (InvokeF) {
- auto *GV = cast<llvm::GlobalVariable>(
- cast<llvm::Constant>(Block)->stripPointerCasts());
- auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
- *InvokeF = cast<llvm::Function>(
- BlockInit->getAggregateElement(2)->stripPointerCasts());
- }
return Block;
}
CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
computeBlockInfo(CGM, this, blockInfo);
blockInfo.BlockExpression = blockExpr;
- return EmitBlockLiteral(blockInfo, InvokeF);
+ return EmitBlockLiteral(blockInfo);
}
// Find the block info for this block and take ownership of it.
@@ -769,28 +754,17 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
blockExpr->getBlockDecl()));
blockInfo->BlockExpression = blockExpr;
- return EmitBlockLiteral(*blockInfo, InvokeF);
+ return EmitBlockLiteral(*blockInfo);
}
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
- llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
- auto GenVoidPtrTy =
- IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
- LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
- auto GenVoidPtrSize = CharUnits::fromQuantity(
- CGM.getTarget().getPointerWidth(
- CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
- 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
CodeGenFunction BlockCGF{CGM, true};
BlockCGF.SanOpts = SanOpts;
auto *InvokeFn = BlockCGF.GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
- if (InvokeF)
- *InvokeF = InvokeFn;
- auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
@@ -805,8 +779,13 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
llvm::Constant *descriptor;
BlockFlags flags;
if (!IsOpenCL) {
- isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(),
- VoidPtrTy);
+ // If the block is non-escaping, set field 'isa 'to NSConcreteGlobalBlock
+ // and set the BLOCK_IS_GLOBAL bit of field 'flags'. Copying a non-escaping
+ // block just returns the original block and releasing it is a no-op.
+ llvm::Constant *blockISA = blockInfo.getBlockDecl()->doesNotEscape()
+ ? CGM.getNSConcreteGlobalBlock()
+ : CGM.getNSConcreteStackBlock();
+ isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy);
// Build the block descriptor.
descriptor = buildBlockDescriptor(CGM, blockInfo);
@@ -815,12 +794,14 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
flags = BLOCK_HAS_SIGNATURE;
if (blockInfo.HasCapturedVariableLayout)
flags |= BLOCK_HAS_EXTENDED_LAYOUT;
- if (blockInfo.NeedsCopyDispose)
+ if (blockInfo.needsCopyDisposeHelpers())
flags |= BLOCK_HAS_COPY_DISPOSE;
if (blockInfo.HasCXXObject)
flags |= BLOCK_HAS_CXX_OBJ;
if (blockInfo.UsesStret)
flags |= BLOCK_USE_STRET;
+ if (blockInfo.getBlockDecl()->doesNotEscape())
+ flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL;
}
auto projectField =
@@ -859,11 +840,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
getIntSize(), "block.align");
}
- addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
- if (!IsOpenCL)
+ if (!IsOpenCL) {
+ addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
+ getPointerSize(), "block.invoke");
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
- else if (auto *Helper =
- CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ } else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
addHeaderField(
I.first,
@@ -913,7 +895,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
const CGBlockInfo::Capture &enclosingCapture =
BlockInfo->getCapture(variable);
- // This is a [[type]]*, except that a byref entry wil just be an i8**.
+ // This is a [[type]]*, except that a byref entry will just be an i8**.
src = Builder.CreateStructGEP(LoadBlockStruct(),
enclosingCapture.getIndex(),
enclosingCapture.getOffset(),
@@ -955,7 +937,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
AggValueSlot::forAddr(blockField, Qualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
EmitAggExpr(copyExpr, Slot);
} else {
EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr);
@@ -1024,6 +1007,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
llvm::Value *result = Builder.CreatePointerCast(
blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
+ if (IsOpenCL) {
+ CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+ result);
+ }
+
return result;
}
@@ -1061,38 +1049,23 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
}
llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
+ assert(!getLangOpts().OpenCL && "OpenCL does not need this");
+
if (GenericBlockLiteralType)
return GenericBlockLiteralType;
llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
- if (getLangOpts().OpenCL) {
- // struct __opencl_block_literal_generic {
- // int __size;
- // int __align;
- // __generic void *__invoke;
- // /* custom fields */
- // };
- SmallVector<llvm::Type *, 8> StructFields(
- {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
- if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
- for (auto I : Helper->getCustomFieldTypes())
- StructFields.push_back(I);
- }
- GenericBlockLiteralType = llvm::StructType::create(
- StructFields, "struct.__opencl_block_literal_generic");
- } else {
- // struct __block_literal_generic {
- // void *__isa;
- // int __flags;
- // int __reserved;
- // void (*__invoke)(void *);
- // struct __block_descriptor *__descriptor;
- // };
- GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
- IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
- }
+ // struct __block_literal_generic {
+ // void *__isa;
+ // int __flags;
+ // int __reserved;
+ // void (*__invoke)(void *);
+ // struct __block_descriptor *__descriptor;
+ // };
+ GenericBlockLiteralType =
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
return GenericBlockLiteralType;
}
@@ -1103,27 +1076,21 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
E->getCallee()->getType()->getAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
+ llvm::Value *FuncPtr;
- // Get a pointer to the generic block literal.
- // For OpenCL we generate generic AS void ptr to be able to reuse the same
- // block definition for blocks with captures generated as private AS local
- // variables and without captures generated as global AS program scope
- // variables.
- unsigned AddrSpace = 0;
- if (getLangOpts().OpenCL)
- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
-
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
+ if (!CGM.getLangOpts().OpenCL) {
+ // Get a pointer to the generic block literal.
+ llvm::Type *BlockLiteralTy =
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+ // Bitcast the callee to a block literal.
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
- // Get the function pointer from the literal.
- llvm::Value *FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
- CGM.getLangOpts().OpenCL ? 2 : 3);
+ // Get the function pointer from the literal.
+ FuncPtr =
+ Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
+ }
// Add the block literal.
CallArgList Args;
@@ -1146,7 +1113,11 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
// Load the function.
- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ llvm::Value *Func;
+ if (CGM.getLangOpts().OpenCL)
+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+ else
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
@@ -1255,14 +1226,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Reserved
fields.addInt(CGM.IntTy, 0);
+
+ // Function
+ fields.add(blockFn);
} else {
fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
}
- // Function
- fields.add(blockFn);
-
if (!IsOpenCL) {
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
@@ -1287,6 +1258,10 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
llvm::Constant *Result =
llvm::ConstantExpr::getPointerCast(literal, RequiredType);
CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+ if (CGM.getContext().getLangOpts().OpenCL)
+ CGM.getOpenCLRuntime().recordBlockInfo(
+ blockInfo.BlockExpression,
+ cast<llvm::Function>(blockFn->stripPointerCasts()), Result);
return Result;
}
@@ -1479,8 +1454,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
if (capture.isConstant()) {
auto addr = LocalDeclMap.find(variable)->second;
- DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
- Builder);
+ (void)DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
+ Builder);
continue;
}
@@ -1513,6 +1488,7 @@ enum class BlockCaptureEntityKind {
CXXRecord, // Copy or destroy
ARCWeak,
ARCStrong,
+ NonTrivialCStruct,
BlockObject, // Assign or release
None
};
@@ -1548,39 +1524,46 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
Flags |= BLOCK_FIELD_IS_WEAK;
return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
}
- if (!T->isObjCRetainableType())
- // For all other types, the memcpy is fine.
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
Flags = BLOCK_FIELD_IS_OBJECT;
bool isBlockPointer = T->isBlockPointerType();
if (isBlockPointer)
Flags = BLOCK_FIELD_IS_BLOCK;
- // Special rules for ARC captures:
- Qualifiers QS = T.getQualifiers();
-
- // We need to register __weak direct captures with the runtime.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+ switch (T.isNonTrivialToPrimitiveCopy()) {
+ case QualType::PCK_Struct:
+ return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct,
+ BlockFieldFlags());
+ case QualType::PCK_ARCWeak:
+ // We need to register __weak direct captures with the runtime.
return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
-
- // We need to retain the copied value for __strong direct captures.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) {
- // If it's a block pointer, we have to copy the block and
- // assign that to the destination pointer, so we might as
- // well use _Block_object_assign. Otherwise we can avoid that.
+ case QualType::PCK_ARCStrong:
+ // We need to retain the copied value for __strong direct captures.
+ // If it's a block pointer, we have to copy the block and assign that to
+ // the destination pointer, so we might as well use _Block_object_assign.
+ // Otherwise we can avoid that.
return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong
: BlockCaptureEntityKind::BlockObject,
Flags);
- }
+ case QualType::PCK_Trivial:
+ case QualType::PCK_VolatileTrivial: {
+ if (!T->isObjCRetainableType())
+ // For all other types, the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
- // Non-ARC captures of retainable pointers are strong and
- // therefore require a call to _Block_object_assign.
- if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
- return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+ // Special rules for ARC captures:
+ Qualifiers QS = T.getQualifiers();
- // Otherwise the memcpy is fine.
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
+ // Non-ARC captures of retainable pointers are strong and
+ // therefore require a call to _Block_object_assign.
+ if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+ // Otherwise the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ }
+ }
+ llvm_unreachable("after exhaustive PrimitiveCopyKind switch");
}
/// Find the set of block captures that need to be explicitly copied or destroy.
@@ -1602,6 +1585,64 @@ static void findBlockCapturedManagedEntities(
}
}
+namespace {
+/// Release a __block variable.
+struct CallBlockRelease final : EHScopeStack::Cleanup {
+ Address Addr;
+ BlockFieldFlags FieldFlags;
+ bool LoadBlockVarAddr;
+
+ CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue)
+ : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {}
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ llvm::Value *BlockVarAddr;
+ if (LoadBlockVarAddr) {
+ BlockVarAddr = CGF.Builder.CreateLoad(Addr);
+ BlockVarAddr = CGF.Builder.CreateBitCast(BlockVarAddr, CGF.VoidPtrTy);
+ } else {
+ BlockVarAddr = Addr.getPointer();
+ }
+
+ CGF.BuildBlockRelease(BlockVarAddr, FieldFlags);
+ }
+};
+} // end anonymous namespace
+
+static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
+ Address Field, QualType CaptureType,
+ BlockFieldFlags Flags, bool EHOnly,
+ CodeGenFunction &CGF) {
+ switch (CaptureKind) {
+ case BlockCaptureEntityKind::CXXRecord:
+ case BlockCaptureEntityKind::ARCWeak:
+ case BlockCaptureEntityKind::NonTrivialCStruct:
+ case BlockCaptureEntityKind::ARCStrong: {
+ if (CaptureType.isDestructedType() &&
+ (!EHOnly || CGF.needsEHCleanup(CaptureType.isDestructedType()))) {
+ CodeGenFunction::Destroyer *Destroyer =
+ CaptureKind == BlockCaptureEntityKind::ARCStrong
+ ? CodeGenFunction::destroyARCStrongImprecise
+ : CGF.getDestroyer(CaptureType.isDestructedType());
+ CleanupKind Kind =
+ EHOnly ? EHCleanup
+ : CGF.getCleanupKind(CaptureType.isDestructedType());
+ CGF.pushDestroy(Kind, Field, CaptureType, Destroyer, Kind & EHCleanup);
+ }
+ break;
+ }
+ case BlockCaptureEntityKind::BlockObject: {
+ if (!EHOnly || CGF.getLangOpts().Exceptions) {
+ CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup;
+ CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true);
+ }
+ break;
+ }
+ case BlockCaptureEntityKind::None:
+ llvm_unreachable("unexpected BlockCaptureEntityKind");
+ }
+}
+
/// Generate the copy-helper function for a block closure object:
/// static void block_copy_helper(block_t *dst, block_t *src);
/// The runtime will have previously initialized 'dst' by doing a
@@ -1644,7 +1685,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
false,
false);
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
@@ -1665,6 +1706,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
for (const auto &CopiedCapture : CopiedCaptures) {
const BlockDecl::Capture &CI = CopiedCapture.CI;
const CGBlockInfo::Capture &capture = CopiedCapture.Capture;
+ QualType captureType = CI.getVariable()->getType();
BlockFieldFlags flags = CopiedCapture.Flags;
unsigned index = capture.getIndex();
@@ -1677,6 +1719,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
} else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
EmitARCCopyWeak(dstField, srcField);
+ // If this is a C struct that requires non-trivial copy construction, emit a
+ // call to its copy constructor.
+ } else if (CopiedCapture.Kind ==
+ BlockCaptureEntityKind::NonTrivialCStruct) {
+ QualType varType = CI.getVariable()->getType();
+ callCStructCopyConstructor(MakeAddrLValue(dstField, varType),
+ MakeAddrLValue(srcField, varType));
} else {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
@@ -1695,9 +1744,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
} else {
EmitARCRetainNonBlock(srcValue);
- // We don't need this anymore, so kill it. It's not quite
- // worth the annoyance to avoid creating it in the first place.
- cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
+ // Unless EH cleanup is required, we don't need this anymore, so kill
+ // it. It's not quite worth the annoyance to avoid creating it in the
+ // first place.
+ if (!needsEHCleanup(captureType.isDestructedType()))
+ cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
}
} else {
assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject);
@@ -1725,6 +1776,11 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
}
}
}
+
+ // Ensure that we destroy the copied object if an exception is thrown later
+ // in the helper function.
+ pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true,
+ *this);
}
FinishFunction();
@@ -1732,50 +1788,51 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
}
+static BlockFieldFlags
+getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI,
+ QualType T) {
+ BlockFieldFlags Flags = BLOCK_FIELD_IS_OBJECT;
+ if (T->isBlockPointerType())
+ Flags = BLOCK_FIELD_IS_BLOCK;
+ return Flags;
+}
+
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
- BlockFieldFlags Flags;
if (CI.isByRef()) {
- Flags = BLOCK_FIELD_IS_BYREF;
+ BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
}
- if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) {
- if (Record->hasTrivialDestructor())
- return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ switch (T.isDestructedType()) {
+ case QualType::DK_cxx_destructor:
return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
- }
-
- // Other types don't need to be destroy explicitly.
- if (!T->isObjCRetainableType())
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
-
- Flags = BLOCK_FIELD_IS_OBJECT;
- if (T->isBlockPointerType())
- Flags = BLOCK_FIELD_IS_BLOCK;
-
- // Special rules for ARC captures.
- Qualifiers QS = T.getQualifiers();
-
- // Use objc_storeStrong for __strong direct captures; the
- // dynamic tools really like it when we do this.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Strong)
- return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags);
-
- // Support __weak direct captures.
- if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
- return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
-
- // Non-ARC captures are strong, and we need to use
- // _Block_object_dispose.
- if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount)
- return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
-
- // Otherwise, we have nothing to do.
- return std::make_pair(BlockCaptureEntityKind::None, Flags);
+ case QualType::DK_objc_strong_lifetime:
+ // Use objc_storeStrong for __strong direct captures; the
+ // dynamic tools really like it when we do this.
+ return std::make_pair(BlockCaptureEntityKind::ARCStrong,
+ getBlockFieldFlagsForObjCObjectPointer(CI, T));
+ case QualType::DK_objc_weak_lifetime:
+ // Support __weak direct captures.
+ return std::make_pair(BlockCaptureEntityKind::ARCWeak,
+ getBlockFieldFlagsForObjCObjectPointer(CI, T));
+ case QualType::DK_nontrivial_c_struct:
+ return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct,
+ BlockFieldFlags());
+ case QualType::DK_none: {
+ // Non-ARC captures are strong, and we need to use _Block_object_dispose.
+ if (T->isObjCRetainableType() && !T.getQualifiers().hasObjCLifetime() &&
+ !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject,
+ getBlockFieldFlagsForObjCObjectPointer(CI, T));
+ // Otherwise, we have nothing to do.
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ }
+ }
+ llvm_unreachable("after exhaustive DestructionKind switch");
}
/// Generate the destroy-helper function for a block closure object:
@@ -1814,7 +1871,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
nullptr, SC_Static,
false, false);
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
@@ -1839,29 +1896,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
Address srcField =
Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
- // If the captured record has a destructor then call it.
- if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) {
- const auto *Dtor =
- CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor();
- PushDestructorCleanup(Dtor, srcField);
-
- // If this is a __weak capture, emit the release directly.
- } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
- EmitARCDestroyWeak(srcField);
-
- // Destroy strong objects with a call if requested.
- } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
- EmitARCDestroyStrong(srcField, ARCImpreciseLifetime);
-
- // Otherwise we call _Block_object_dispose. It wouldn't be too
- // hard to just emit this as a cleanup if we wanted to make sure
- // that things were done in reverse.
- } else {
- assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject);
- llvm::Value *value = Builder.CreateLoad(srcField);
- value = Builder.CreateBitCast(value, VoidPtrTy);
- BuildBlockRelease(value, flags);
- }
+ pushCaptureCleanup(DestroyedCapture.Kind, srcField,
+ CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this);
}
cleanups.ForceCleanup();
@@ -2020,6 +2056,36 @@ public:
id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
}
};
+
+/// Emits the copy/dispose helpers for a __block variable that is a non-trivial
+/// C struct.
+class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers {
+ QualType VarType;
+
+public:
+ NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type)
+ : BlockByrefHelpers(alignment), VarType(type) {}
+
+ void emitCopy(CodeGenFunction &CGF, Address destField,
+ Address srcField) override {
+ CGF.callCStructMoveConstructor(CGF.MakeAddrLValue(destField, VarType),
+ CGF.MakeAddrLValue(srcField, VarType));
+ }
+
+ bool needsDispose() const override {
+ return VarType.isDestructedType();
+ }
+
+ void emitDispose(CodeGenFunction &CGF, Address field) override {
+ EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin();
+ CGF.pushDestroy(VarType.isDestructedType(), field, VarType);
+ CGF.PopCleanupBlocks(cleanupDepth);
+ }
+
+ void profileImpl(llvm::FoldingSetNodeID &id) const override {
+ id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
+ }
+};
} // end anonymous namespace
static llvm::Constant *
@@ -2059,7 +2125,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
SC_Static,
false, false);
- CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
CGF.StartFunction(FD, R, Fn, FI, args);
@@ -2133,7 +2199,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
SC_Static,
false, false);
- CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
CGF.StartFunction(FD, R, Fn, FI, args);
@@ -2205,6 +2271,13 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr));
}
+ // If type is a non-trivial C struct type that is non-trivial to
+ // destructly move or destroy, build the copy and dispose helpers.
+ if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct ||
+ type.isDestructedType() == QualType::DK_nontrivial_c_struct)
+ return ::buildByrefHelpers(
+ CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type));
+
// Otherwise, if we don't have a retainable type, there's nothing to do.
// that the runtime does extra copies.
if (!type->isObjCRetainableType()) return nullptr;
@@ -2503,30 +2576,10 @@ void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) {
EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors?
}
-namespace {
- /// Release a __block variable.
- struct CallBlockRelease final : EHScopeStack::Cleanup {
- llvm::Value *Addr;
- CallBlockRelease(llvm::Value *Addr) : Addr(Addr) {}
-
- void Emit(CodeGenFunction &CGF, Flags flags) override {
- // Should we be passing FIELD_IS_WEAK here?
- CGF.BuildBlockRelease(Addr, BLOCK_FIELD_IS_BYREF);
- }
- };
-} // end anonymous namespace
-
-/// Enter a cleanup to destroy a __block variable. Note that this
-/// cleanup should be a no-op if the variable hasn't left the stack
-/// yet; if a cleanup is required for the variable itself, that needs
-/// to be done externally.
-void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) {
- // We don't enter this cleanup if we're in pure-GC mode.
- if (CGM.getLangOpts().getGC() == LangOptions::GCOnly)
- return;
-
- EHStack.pushCleanup<CallBlockRelease>(NormalAndEHCleanup,
- emission.Addr.getPointer());
+void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr,
+ BlockFieldFlags Flags,
+ bool LoadBlockVarAddr) {
+ EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr);
}
/// Adjust the declaration of something from the blocks API.
@@ -2559,11 +2612,11 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM,
}
}
- if (!CGM.getLangOpts().BlocksRuntimeOptional)
- return;
-
- if (GV->isDeclaration() && GV->hasExternalLinkage())
+ if (CGM.getLangOpts().BlocksRuntimeOptional && GV->isDeclaration() &&
+ GV->hasExternalLinkage())
GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+
+ CGM.setDSOLocal(GV);
}
llvm::Constant *CodeGenModule::getBlockObjectDispose() {
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index 80e255f75417..5a8e960ffcc1 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -54,6 +54,7 @@ enum BlockByrefFlags {
};
enum BlockLiteralFlags {
+ BLOCK_IS_NOESCAPE = (1 << 23),
BLOCK_HAS_COPY_DISPOSE = (1 << 25),
BLOCK_HAS_CXX_OBJ = (1 << 26),
BLOCK_IS_GLOBAL = (1 << 28),
@@ -214,7 +215,8 @@ public:
/// no non-constant captures.
bool CanBeGlobal : 1;
- /// True if the block needs a custom copy or dispose function.
+ /// True if the block has captures that would necessitate custom copy or
+ /// dispose helper functions if the block were escaping.
bool NeedsCopyDispose : 1;
/// HasCXXObject - True if the block's custom copy/dispose functions
@@ -276,6 +278,11 @@ public:
}
CGBlockInfo(const BlockDecl *blockDecl, StringRef Name);
+
+ // Indicates whether the block needs a custom copy or dispose function.
+ bool needsCopyDisposeHelpers() const {
+ return NeedsCopyDispose && !Block->doesNotEscape();
+ }
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index 61fe4aac3afa..d2e5eb256d3b 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -20,7 +20,7 @@ namespace CodeGen {
class CodeGenFunction;
-/// \brief This is an IRBuilder insertion helper that forwards to
+/// This is an IRBuilder insertion helper that forwards to
/// CodeGenFunction::InsertHelper, which adds necessary metadata to
/// instructions.
class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter {
@@ -29,7 +29,7 @@ public:
explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
protected:
- /// \brief This forwards to CodeGenFunction::InsertHelper.
+ /// This forwards to CodeGenFunction::InsertHelper.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
llvm::BasicBlock *BB,
llvm::BasicBlock::iterator InsertPt) const;
@@ -244,6 +244,21 @@ public:
Addr.getAlignment().alignmentAtOffset(Offset));
}
+ using CGBuilderBaseTy::CreateConstInBoundsGEP2_32;
+ Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0,
+ unsigned Idx1, const llvm::DataLayout &DL,
+ const llvm::Twine &Name = "") {
+ auto *GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32(
+ Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name));
+ llvm::APInt Offset(
+ DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0,
+ /*IsSigned=*/true);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ llvm_unreachable("offset of GEP with constants is always computable");
+ return Address(GEP, Addr.getAlignment().alignmentAtOffset(
+ CharUnits::fromQuantity(Offset.getSExtValue())));
+ }
+
llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset,
const llvm::Twine &Name = "") {
assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty);
@@ -258,23 +273,23 @@ public:
using CGBuilderBaseTy::CreateMemCpy;
llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size,
bool IsVolatile = false) {
- auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
- return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
- Align.getQuantity(), IsVolatile);
+ return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+ Src.getPointer(), Src.getAlignment().getQuantity(),
+ Size,IsVolatile);
}
llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size,
bool IsVolatile = false) {
- auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
- return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
- Align.getQuantity(), IsVolatile);
+ return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+ Src.getPointer(), Src.getAlignment().getQuantity(),
+ Size, IsVolatile);
}
using CGBuilderBaseTy::CreateMemMove;
llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
bool IsVolatile = false) {
- auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
- return CreateMemMove(Dest.getPointer(), Src.getPointer(), Size,
- Align.getQuantity(), IsVolatile);
+ return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+ Src.getPointer(), Src.getAlignment().getQuantity(),
+ Size, IsVolatile);
}
using CGBuilderBaseTy::CreateMemSet;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index ba54f8342f1b..0892e84a044c 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -14,6 +14,7 @@
#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
+#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
@@ -188,7 +189,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
return RValue::get(Result);
}
-/// @brief Utility to insert an atomic cmpxchg instruction.
+/// Utility to insert an atomic cmpxchg instruction.
///
/// @param CGF The current codegen function.
/// @param E Builtin call expression to convert to cmpxchg.
@@ -319,7 +320,7 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
}
-/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
+/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
/// depending on IntrinsicID.
///
/// \arg CGF The current codegen function.
@@ -384,7 +385,7 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
}
// The encompassing type must have a width greater than or equal to the width
- // of the specified types. Aditionally, if the encompassing type is signed,
+ // of the specified types. Additionally, if the encompassing type is signed,
// its width must be strictly greater than the width of any unsigned types
// given.
unsigned Width = 0;
@@ -478,13 +479,261 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
// LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
Value *Min = Builder.getInt1((Type & 2) != 0);
- // For GCC compatability, __builtin_object_size treat NULL as unknown size.
+ // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
Value *NullIsUnknown = Builder.getTrue();
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
}
-// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
-// handle them here.
+namespace {
+/// A struct to generically desribe a bit test intrinsic.
+struct BitTest {
+ enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
+ enum InterlockingKind : uint8_t {
+ Unlocked,
+ Sequential,
+ Acquire,
+ Release,
+ NoFence
+ };
+
+ ActionKind Action;
+ InterlockingKind Interlocking;
+ bool Is64Bit;
+
+ static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
+};
+} // namespace
+
+BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ // Main portable variants.
+ case Builtin::BI_bittest:
+ return {TestOnly, Unlocked, false};
+ case Builtin::BI_bittestandcomplement:
+ return {Complement, Unlocked, false};
+ case Builtin::BI_bittestandreset:
+ return {Reset, Unlocked, false};
+ case Builtin::BI_bittestandset:
+ return {Set, Unlocked, false};
+ case Builtin::BI_interlockedbittestandreset:
+ return {Reset, Sequential, false};
+ case Builtin::BI_interlockedbittestandset:
+ return {Set, Sequential, false};
+
+ // X86-specific 64-bit variants.
+ case Builtin::BI_bittest64:
+ return {TestOnly, Unlocked, true};
+ case Builtin::BI_bittestandcomplement64:
+ return {Complement, Unlocked, true};
+ case Builtin::BI_bittestandreset64:
+ return {Reset, Unlocked, true};
+ case Builtin::BI_bittestandset64:
+ return {Set, Unlocked, true};
+ case Builtin::BI_interlockedbittestandreset64:
+ return {Reset, Sequential, true};
+ case Builtin::BI_interlockedbittestandset64:
+ return {Set, Sequential, true};
+
+ // ARM/AArch64-specific ordering variants.
+ case Builtin::BI_interlockedbittestandset_acq:
+ return {Set, Acquire, false};
+ case Builtin::BI_interlockedbittestandset_rel:
+ return {Set, Release, false};
+ case Builtin::BI_interlockedbittestandset_nf:
+ return {Set, NoFence, false};
+ case Builtin::BI_interlockedbittestandreset_acq:
+ return {Reset, Acquire, false};
+ case Builtin::BI_interlockedbittestandreset_rel:
+ return {Reset, Release, false};
+ case Builtin::BI_interlockedbittestandreset_nf:
+ return {Reset, NoFence, false};
+ }
+ llvm_unreachable("expected only bittest intrinsics");
+}
+
+static char bitActionToX86BTCode(BitTest::ActionKind A) {
+ switch (A) {
+ case BitTest::TestOnly: return '\0';
+ case BitTest::Complement: return 'c';
+ case BitTest::Reset: return 'r';
+ case BitTest::Set: return 's';
+ }
+ llvm_unreachable("invalid action");
+}
+
+static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
+ BitTest BT,
+ const CallExpr *E, Value *BitBase,
+ Value *BitPos) {
+ char Action = bitActionToX86BTCode(BT.Action);
+ char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
+
+ // Build the assembly.
+ SmallString<64> Asm;
+ raw_svector_ostream AsmOS(Asm);
+ if (BT.Interlocking != BitTest::Unlocked)
+ AsmOS << "lock ";
+ AsmOS << "bt";
+ if (Action)
+ AsmOS << Action;
+ AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
+
+ // Build the constraints. FIXME: We should support immediates when possible.
+ std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
+ llvm::IntegerType *IntType = llvm::IntegerType::get(
+ CGF.getLLVMContext(),
+ CGF.getContext().getTypeSize(E->getArg(1)->getType()));
+ llvm::Type *IntPtrType = IntType->getPointerTo();
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
+
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+ return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
+}
+
+static llvm::AtomicOrdering
+getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
+ switch (I) {
+ case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
+ case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
+ case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
+ case BitTest::Release: return llvm::AtomicOrdering::Release;
+ case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
+ }
+ llvm_unreachable("invalid interlocking");
+}
+
+/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
+/// bits and a bit position and read and optionally modify the bit at that
+/// position. The position index can be arbitrarily large, i.e. it can be larger
+/// than 31 or 63, so we need an indexed load in the general case.
+static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
+ unsigned BuiltinID,
+ const CallExpr *E) {
+ Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
+ Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+
+ BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
+
+ // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
+ // indexing operation internally. Use them if possible.
+ llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
+ if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+ return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
+
+ // Otherwise, use generic code to load one byte and test the bit. Use all but
+ // the bottom three bits as the array index, and the bottom three bits to form
+ // a mask.
+ // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
+ Value *ByteIndex = CGF.Builder.CreateAShr(
+ BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
+ Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
+ Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
+ ByteIndex, "bittest.byteaddr"),
+ CharUnits::One());
+ Value *PosLow =
+ CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
+ llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
+
+ // The updating instructions will need a mask.
+ Value *Mask = nullptr;
+ if (BT.Action != BitTest::TestOnly) {
+ Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
+ "bittest.mask");
+ }
+
+ // Check the action and ordering of the interlocked intrinsics.
+ llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
+
+ Value *OldByte = nullptr;
+ if (Ordering != llvm::AtomicOrdering::NotAtomic) {
+ // Emit a combined atomicrmw load/store operation for the interlocked
+ // intrinsics.
+ llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
+ if (BT.Action == BitTest::Reset) {
+ Mask = CGF.Builder.CreateNot(Mask);
+ RMWOp = llvm::AtomicRMWInst::And;
+ }
+ OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
+ Ordering);
+ } else {
+ // Emit a plain load for the non-interlocked intrinsics.
+ OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
+ Value *NewByte = nullptr;
+ switch (BT.Action) {
+ case BitTest::TestOnly:
+ // Don't store anything.
+ break;
+ case BitTest::Complement:
+ NewByte = CGF.Builder.CreateXor(OldByte, Mask);
+ break;
+ case BitTest::Reset:
+ NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
+ break;
+ case BitTest::Set:
+ NewByte = CGF.Builder.CreateOr(OldByte, Mask);
+ break;
+ }
+ if (NewByte)
+ CGF.Builder.CreateStore(NewByte, ByteAddr);
+ }
+
+ // However we loaded the old byte, either by plain load or atomicrmw, shift
+ // the bit into the low position and mask it to 0 or 1.
+ Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
+ return CGF.Builder.CreateAnd(
+ ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
+}
+
+namespace {
+enum class MSVCSetJmpKind {
+ _setjmpex,
+ _setjmp3,
+ _setjmp
+};
+}
+
+/// MSVC handles setjmp a bit differently on different platforms. On every
+/// architecture except 32-bit x86, the frame address is passed. On x86, extra
+/// parameters can be passed as variadic arguments, but we always pass none.
+static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
+ const CallExpr *E) {
+ llvm::Value *Arg1 = nullptr;
+ llvm::Type *Arg1Ty = nullptr;
+ StringRef Name;
+ bool IsVarArg = false;
+ if (SJKind == MSVCSetJmpKind::_setjmp3) {
+ Name = "_setjmp3";
+ Arg1Ty = CGF.Int32Ty;
+ Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
+ IsVarArg = true;
+ } else {
+ Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
+ Arg1Ty = CGF.Int8PtrTy;
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ }
+
+ // Mark the call site and declaration with ReturnsTwice.
+ llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
+ llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
+ CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::ReturnsTwice);
+ llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
+ ReturnsTwiceAttr, /*Local=*/true);
+
+ llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
+ CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
+ llvm::Value *Args[] = {Buf, Arg1};
+ llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
+ CS.setAttributes(ReturnsTwiceAttr);
+ return RValue::get(CS.getInstruction());
+}
+
+// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
+// we handle them here.
enum class CodeGenFunction::MSVCIntrin {
_BitScanForward,
_BitScanReverse,
@@ -496,7 +745,6 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
- _interlockedbittestandset,
__fastfail,
};
@@ -564,22 +812,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
- case MSVCIntrin::_interlockedbittestandset: {
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
- llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Or, Addr,
- Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
- llvm::AtomicOrdering::SequentiallyConsistent);
- // Shift the relevant bit to the least significant position, truncate to
- // the result type, and test the low bit.
- llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
- llvm::Value *Truncated =
- Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
- return Builder.CreateAnd(Truncated,
- ConstantInt::get(Truncated->getType(), 1));
- }
-
case MSVCIntrin::_InterlockedDecrement: {
llvm::Type *IntTy = ConvertType(E->getType());
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
@@ -915,7 +1147,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
}
- Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
+ // Negate the product if it would be negative in infinite precision.
+ Result = CGF.Builder.CreateSelect(
+ IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
+
+ Result = CGF.Builder.CreateTrunc(Result, ResTy);
}
assert(Overflow && Result && "Missing overflow or result");
@@ -926,6 +1162,96 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
return RValue::get(Overflow);
}
+static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
+ Value *&RecordPtr, CharUnits Align, Value *Func,
+ int Lvl) {
+ const auto *RT = RType->getAs<RecordType>();
+ ASTContext &Context = CGF.getContext();
+ RecordDecl *RD = RT->getDecl()->getDefinition();
+ ASTContext &Ctx = RD->getASTContext();
+ const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD);
+ std::string Pad = std::string(Lvl * 4, ' ');
+
+ Value *GString =
+ CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
+ Value *Res = CGF.Builder.CreateCall(Func, {GString});
+
+ static llvm::DenseMap<QualType, const char *> Types;
+ if (Types.empty()) {
+ Types[Context.CharTy] = "%c";
+ Types[Context.BoolTy] = "%d";
+ Types[Context.SignedCharTy] = "%hhd";
+ Types[Context.UnsignedCharTy] = "%hhu";
+ Types[Context.IntTy] = "%d";
+ Types[Context.UnsignedIntTy] = "%u";
+ Types[Context.LongTy] = "%ld";
+ Types[Context.UnsignedLongTy] = "%lu";
+ Types[Context.LongLongTy] = "%lld";
+ Types[Context.UnsignedLongLongTy] = "%llu";
+ Types[Context.ShortTy] = "%hd";
+ Types[Context.UnsignedShortTy] = "%hu";
+ Types[Context.VoidPtrTy] = "%p";
+ Types[Context.FloatTy] = "%f";
+ Types[Context.DoubleTy] = "%f";
+ Types[Context.LongDoubleTy] = "%Lf";
+ Types[Context.getPointerType(Context.CharTy)] = "%s";
+ Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
+ }
+
+ for (const auto *FD : RD->fields()) {
+ uint64_t Off = RL.getFieldOffset(FD->getFieldIndex());
+ Off = Ctx.toCharUnitsFromBits(Off).getQuantity();
+
+ Value *FieldPtr = RecordPtr;
+ if (RD->isUnion())
+ FieldPtr = CGF.Builder.CreatePointerCast(
+ FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
+ else
+ FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
+ FD->getFieldIndex());
+
+ GString = CGF.Builder.CreateGlobalStringPtr(
+ llvm::Twine(Pad)
+ .concat(FD->getType().getAsString())
+ .concat(llvm::Twine(' '))
+ .concat(FD->getNameAsString())
+ .concat(" : ")
+ .str());
+ Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
+ Res = CGF.Builder.CreateAdd(Res, TmpRes);
+
+ QualType CanonicalType =
+ FD->getType().getUnqualifiedType().getCanonicalType();
+
+ // We check whether we are in a recursive type
+ if (CanonicalType->isRecordType()) {
+ Value *TmpRes =
+ dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
+ Res = CGF.Builder.CreateAdd(TmpRes, Res);
+ continue;
+ }
+
+ // We try to determine the best format to print the current field
+ llvm::Twine Format = Types.find(CanonicalType) == Types.end()
+ ? Types[Context.VoidPtrTy]
+ : Types[CanonicalType];
+
+ Address FieldAddress = Address(FieldPtr, Align);
+ FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
+
+ // FIXME Need to handle bitfield here
+ GString = CGF.Builder.CreateGlobalStringPtr(
+ Format.concat(llvm::Twine('\n')).str());
+ TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
+ Res = CGF.Builder.CreateAdd(Res, TmpRes);
+ }
+
+ GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
+ Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
+ Res = CGF.Builder.CreateAdd(Res, TmpRes);
+ return Res;
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -962,6 +1288,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_copysign:
case Builtin::BI__builtin_copysignf:
case Builtin::BI__builtin_copysignl:
+ case Builtin::BI__builtin_copysignf128:
return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
case Builtin::BIcos:
@@ -994,6 +1321,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_fabs:
case Builtin::BI__builtin_fabsf:
case Builtin::BI__builtin_fabsl:
+ case Builtin::BI__builtin_fabsf128:
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
case Builtin::BIfloor:
@@ -1154,16 +1482,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_abs:
case Builtin::BI__builtin_labs:
case Builtin::BI__builtin_llabs: {
+ // X < 0 ? -X : X
+ // The negation has 'nsw' because abs of INT_MIN is undefined.
Value *ArgValue = EmitScalarExpr(E->getArg(0));
-
- Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
- Value *CmpResult =
- Builder.CreateICmpSGE(ArgValue,
- llvm::Constant::getNullValue(ArgValue->getType()),
- "abscond");
- Value *Result =
- Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
-
+ Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
+ Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
+ Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
+ Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
return RValue::get(Result);
}
case Builtin::BI__builtin_conj:
@@ -1190,6 +1515,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(ComplexVal.first);
}
+ case Builtin::BI__builtin_dump_struct: {
+ Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
+ CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
+
+ const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
+ QualType Arg0Type = Arg0->getType()->getPointeeType();
+
+ Value *RecordPtr = EmitScalarExpr(Arg0);
+ Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0);
+ return RValue::get(Res);
+ }
+
case Builtin::BI__builtin_cimag:
case Builtin::BI__builtin_cimagf:
case Builtin::BI__builtin_cimagl:
@@ -1300,20 +1637,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Type *ArgType = Val->getType();
Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
- Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
- Value *ArgZero = llvm::Constant::getNullValue(ArgType);
-
+ unsigned ArgWidth = ArgType->getIntegerBitWidth();
Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
- Shift = Builder.CreateAnd(Shift, Mask);
- Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
-
- Value *RightShifted = Builder.CreateLShr(Val, Shift);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
- Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
- Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
- Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
+ Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask);
+ Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
+ Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
+ Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
+ Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
return RValue::get(Result);
}
case Builtin::BI_rotl8:
@@ -1326,20 +1657,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Type *ArgType = Val->getType();
Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
- Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
- Value *ArgZero = llvm::Constant::getNullValue(ArgType);
-
+ unsigned ArgWidth = ArgType->getIntegerBitWidth();
Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
- Shift = Builder.CreateAnd(Shift, Mask);
- Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
-
- Value *LeftShifted = Builder.CreateShl(Val, Shift);
- Value *RightShifted = Builder.CreateLShr(Val, RightShift);
- Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
- Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
- Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
+ Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask);
+ Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
+ Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
+ Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
+ Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
return RValue::get(Result);
}
case Builtin::BI__builtin_unpredictable: {
@@ -1735,6 +2060,63 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
return RValue::get(Dest.getPointer());
}
+ case Builtin::BI__builtin_wmemcmp: {
+ // The MSVC runtime library does not provide a definition of wmemcmp, so we
+ // need an inline implementation.
+ if (!getTarget().getTriple().isOSMSVCRT())
+ break;
+
+ llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
+
+ Value *Dst = EmitScalarExpr(E->getArg(0));
+ Value *Src = EmitScalarExpr(E->getArg(1));
+ Value *Size = EmitScalarExpr(E->getArg(2));
+
+ BasicBlock *Entry = Builder.GetInsertBlock();
+ BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
+ BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
+ BasicBlock *Next = createBasicBlock("wmemcmp.next");
+ BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
+ Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
+ Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
+
+ EmitBlock(CmpGT);
+ PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
+ DstPhi->addIncoming(Dst, Entry);
+ PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
+ SrcPhi->addIncoming(Src, Entry);
+ PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
+ SizePhi->addIncoming(Size, Entry);
+ CharUnits WCharAlign =
+ getContext().getTypeAlignInChars(getContext().WCharTy);
+ Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
+ Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
+ Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
+ Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
+
+ EmitBlock(CmpLT);
+ Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
+ Builder.CreateCondBr(DstLtSrc, Exit, Next);
+
+ EmitBlock(Next);
+ Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
+ Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
+ Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
+ Value *NextSizeEq0 =
+ Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
+ Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
+ DstPhi->addIncoming(NextDst, Next);
+ SrcPhi->addIncoming(NextSrc, Next);
+ SizePhi->addIncoming(NextSize, Next);
+
+ EmitBlock(Exit);
+ PHINode *Ret = Builder.CreatePHI(IntTy, 4);
+ Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
+ Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
+ Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
+ Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
+ return RValue::get(Ret);
+ }
case Builtin::BI__builtin_dwarf_cfa: {
// The offset in bytes from the first argument to the CFA.
//
@@ -2033,7 +2415,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__sync_synchronize: {
// We assume this is supposed to correspond to a C++0x-style
// sequentially-consistent fence (i.e. this is only usable for
- // synchonization, not device I/O or anything like that). This intrinsic
+ // synchronization, not device I/O or anything like that). This intrinsic
// is really badly designed in the sense that in theory, there isn't
// any way to safely use it... but in practice, it mostly works
// to use it with non-atomic loads and stores to get acquire/release
@@ -2548,11 +2930,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_addressof:
return RValue::get(EmitLValue(E->getArg(0)).getPointer());
case Builtin::BI__builtin_operator_new:
- return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
- E->getArg(0), false);
+ return EmitBuiltinNewDeleteCall(
+ E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
case Builtin::BI__builtin_operator_delete:
- return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
- E->getArg(0), true);
+ return EmitBuiltinNewDeleteCall(
+ E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
+
case Builtin::BI__noop:
// __noop always evaluates to an integer literal zero.
return RValue::get(ConstantInt::get(IntTy, 0));
@@ -2639,9 +3022,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedXor16:
case Builtin::BI_InterlockedXor:
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
+
+ case Builtin::BI_bittest64:
+ case Builtin::BI_bittest:
+ case Builtin::BI_bittestandcomplement64:
+ case Builtin::BI_bittestandcomplement:
+ case Builtin::BI_bittestandreset64:
+ case Builtin::BI_bittestandreset:
+ case Builtin::BI_bittestandset64:
+ case Builtin::BI_bittestandset:
+ case Builtin::BI_interlockedbittestandreset:
+ case Builtin::BI_interlockedbittestandreset64:
+ case Builtin::BI_interlockedbittestandset64:
case Builtin::BI_interlockedbittestandset:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
+ case Builtin::BI_interlockedbittestandset_acq:
+ case Builtin::BI_interlockedbittestandset_rel:
+ case Builtin::BI_interlockedbittestandset_nf:
+ case Builtin::BI_interlockedbittestandreset_acq:
+ case Builtin::BI_interlockedbittestandreset_rel:
+ case Builtin::BI_interlockedbittestandreset_nf:
+ return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
case Builtin::BI__exception_code:
case Builtin::BI_exception_code:
@@ -2652,59 +3052,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__abnormal_termination:
case Builtin::BI_abnormal_termination:
return RValue::get(EmitSEHAbnormalTermination());
- case Builtin::BI_setjmpex: {
- if (getTarget().getTriple().isOSMSVCRT()) {
- llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
- llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
- getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
- llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
- "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *Buf = Builder.CreateBitOrPointerCast(
- EmitScalarExpr(E->getArg(0)), Int8PtrTy);
- llvm::Value *FrameAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
- ConstantInt::get(Int32Ty, 0));
- llvm::Value *Args[] = {Buf, FrameAddr};
- llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
- CS.setAttributes(ReturnsTwiceAttr);
- return RValue::get(CS.getInstruction());
- }
+ case Builtin::BI_setjmpex:
+ if (getTarget().getTriple().isOSMSVCRT())
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
break;
- }
- case Builtin::BI_setjmp: {
+ case Builtin::BI_setjmp:
if (getTarget().getTriple().isOSMSVCRT()) {
- llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
- getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
- llvm::Value *Buf = Builder.CreateBitOrPointerCast(
- EmitScalarExpr(E->getArg(0)), Int8PtrTy);
- llvm::CallSite CS;
- if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
- llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
- llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
- "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *Count = ConstantInt::get(IntTy, 0);
- llvm::Value *Args[] = {Buf, Count};
- CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
- } else {
- llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
- llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
- "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *FrameAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
- ConstantInt::get(Int32Ty, 0));
- llvm::Value *Args[] = {Buf, FrameAddr};
- CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
- }
- CS.setAttributes(ReturnsTwiceAttr);
- return RValue::get(CS.getInstruction());
+ if (getTarget().getTriple().getArch() == llvm::Triple::x86)
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
+ else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
+ return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
}
break;
- }
case Builtin::BI__GetExceptionInfo: {
if (llvm::GlobalVariable *GV =
@@ -2732,6 +3092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
case Builtin::BI__builtin_coro_frame:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
+ case Builtin::BI__builtin_coro_noop:
+ return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
case Builtin::BI__builtin_coro_free:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
case Builtin::BI__builtin_coro_destroy:
@@ -2882,11 +3244,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
case Builtin::BIget_pipe_num_packets:
case Builtin::BIget_pipe_max_packets: {
- const char *Name;
+ const char *BaseName;
+ const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
if (BuiltinID == Builtin::BIget_pipe_num_packets)
- Name = "__get_pipe_num_packets";
+ BaseName = "__get_pipe_num_packets";
else
- Name = "__get_pipe_max_packets";
+ BaseName = "__get_pipe_max_packets";
+ auto Name = std::string(BaseName) +
+ std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
// Building the generic function prototype.
Value *Arg0 = EmitScalarExpr(E->getArg(0));
@@ -2992,10 +3357,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return Ptr;
};
- // Could have events and/or vaargs.
+ // Could have events and/or varargs.
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
- Name = "__enqueue_kernel_vaargs";
+ Name = "__enqueue_kernel_varargs";
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
llvm::Value *Kernel =
@@ -3063,7 +3428,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Pass the number of variadics to the runtime function too.
Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
ArgTys.push_back(Int32Ty);
- Name = "__enqueue_kernel_events_vaargs";
+ Name = "__enqueue_kernel_events_varargs";
auto *PtrToSizeArray = CreateArrayForSizeVar(7);
Args.push_back(PtrToSizeArray);
@@ -3104,7 +3469,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
false),
- "__get_kernel_preferred_work_group_multiple_impl"),
+ "__get_kernel_preferred_work_group_size_multiple_impl"),
{Kernel, Arg}));
}
case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
@@ -3175,6 +3540,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__xray_customevent: {
if (!ShouldXRayInstrumentFunction())
return RValue::getIgnored();
+
+ if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::Custom))
+ return RValue::getIgnored();
+
if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
return RValue::getIgnored();
@@ -3198,6 +3568,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
}
+ case Builtin::BI__xray_typedevent: {
+ // TODO: There should be a way to always emit events even if the current
+ // function is not instrumented. Losing events in a stream can cripple
+ // a trace.
+ if (!ShouldXRayInstrumentFunction())
+ return RValue::getIgnored();
+
+ if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::Typed))
+ return RValue::getIgnored();
+
+ if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
+ if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
+ return RValue::getIgnored();
+
+ Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
+ auto FTy = F->getFunctionType();
+ auto Arg0 = EmitScalarExpr(E->getArg(0));
+ auto PTy0 = FTy->getParamType(0);
+ if (PTy0 != Arg0->getType())
+ Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
+ auto Arg1 = E->getArg(1);
+ auto Arg1Val = EmitScalarExpr(Arg1);
+ auto Arg1Ty = Arg1->getType();
+ auto PTy1 = FTy->getParamType(1);
+ if (PTy1 != Arg1Val->getType()) {
+ if (Arg1Ty->isArrayType())
+ Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
+ else
+ Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
+ }
+ auto Arg2 = EmitScalarExpr(E->getArg(2));
+ auto PTy2 = FTy->getParamType(2);
+ if (PTy2 != Arg2->getType())
+ Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
+ return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
+ }
+
case Builtin::BI__builtin_ms_va_start:
case Builtin::BI__builtin_ms_va_end:
return RValue::get(
@@ -3246,6 +3654,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// can move this up to the beginning of the function.
checkTargetFeatures(E, FD);
+ if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
+ LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
+
// See if we have a target specific intrinsic.
const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
@@ -3253,7 +3664,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
if (!Prefix.empty()) {
IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
- // NOTE we dont need to perform a compatibility flag check here since the
+ // NOTE we don't need to perform a compatibility flag check here since the
// intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
// MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
if (IntrinsicID == Intrinsic::not_intrinsic)
@@ -3378,7 +3789,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
NeonTypeFlags TypeFlags,
- llvm::Triple::ArchType Arch,
+ bool HasLegalHalfType=true,
bool V1Ty=false) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
@@ -3389,9 +3800,7 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
case NeonTypeFlags::Poly16:
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Float16:
- // FIXME: Only AArch64 backend can so far properly handle half types.
- // Remove else part once ARM backend support for half is complete.
- if (Arch == llvm::Triple::aarch64)
+ if (HasLegalHalfType)
return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
else
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
@@ -3454,7 +3863,7 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
return ConstantInt::get(Ty, neg ? -SV : SV);
}
-// \brief Right-shift a vector by a constant.
+// Right-shift a vector by a constant.
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
llvm::Type *Ty, bool usgn,
const char *name) {
@@ -3557,13 +3966,24 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
NEONMAP1(vclz_v, ctlz, Add1ArgType),
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_v),
NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
@@ -3583,6 +4003,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
@@ -3627,6 +4048,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP0(vcvtq_f16_v),
NEONMAP0(vcvtq_f32_v),
NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
@@ -3642,6 +4064,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vcvtq_u16_v),
NEONMAP0(vcvtq_u32_v),
NEONMAP0(vcvtq_u64_v),
+ NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
+ NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
NEONMAP0(vext_v),
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
@@ -3652,18 +4076,30 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
NEONMAP0(vld1_dup_v),
NEONMAP1(vld1_v, arm_neon_vld1, 0),
+ NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
+ NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
+ NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
NEONMAP0(vld1q_dup_v),
NEONMAP1(vld1q_v, arm_neon_vld1, 0),
+ NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
+ NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
+ NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
+ NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
NEONMAP1(vld2_v, arm_neon_vld2, 0),
+ NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
NEONMAP1(vld2q_v, arm_neon_vld2, 0),
+ NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
NEONMAP1(vld3_v, arm_neon_vld3, 0),
+ NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
NEONMAP1(vld3q_v, arm_neon_vld3, 0),
+ NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
NEONMAP1(vld4_v, arm_neon_vld4, 0),
+ NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
NEONMAP1(vld4q_v, arm_neon_vld4, 0),
NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
@@ -3722,6 +4158,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
@@ -3755,7 +4193,13 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vshrn_n_v),
NEONMAP0(vshrq_n_v),
NEONMAP1(vst1_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
+ NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
+ NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
NEONMAP1(vst1q_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
+ NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
+ NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
NEONMAP1(vst2_v, arm_neon_vst2, 0),
NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
@@ -3795,8 +4239,18 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
NEONMAP1(vclz_v, ctlz, Add1ArgType),
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
@@ -3826,6 +4280,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+ NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
+ NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
NEONMAP0(vext_v),
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
@@ -3834,6 +4290,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
NEONMAP0(vmovl_v),
NEONMAP0(vmovn_v),
NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
@@ -3874,6 +4336,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
@@ -3897,6 +4361,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vshr_n_v),
NEONMAP0(vshrn_n_v),
NEONMAP0(vshrq_n_v),
+ NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
+ NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
NEONMAP0(vsubhn_v),
NEONMAP0(vtst_v),
NEONMAP0(vtstq_v),
@@ -4095,6 +4565,37 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
+ // FP16 scalar intrinisics go here.
+ NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
};
#undef NEONMAP0
@@ -4244,8 +4745,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
NeonTypeFlags Type(NeonTypeConst.getZExtValue());
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
+ const bool HasLegalHalfType = getTarget().hasLegalHalfType();
- llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
+ llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -4310,6 +4812,26 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
return EmitNeonCall(F, Ops, NameHint);
}
+ case NEON::BI__builtin_neon_vceqz_v:
+ case NEON::BI__builtin_neon_vceqzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
+ ICmpInst::ICMP_EQ, "vceqz");
+ case NEON::BI__builtin_neon_vcgez_v:
+ case NEON::BI__builtin_neon_vcgezq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
+ ICmpInst::ICMP_SGE, "vcgez");
+ case NEON::BI__builtin_neon_vclez_v:
+ case NEON::BI__builtin_neon_vclezq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
+ ICmpInst::ICMP_SLE, "vclez");
+ case NEON::BI__builtin_neon_vcgtz_v:
+ case NEON::BI__builtin_neon_vcgtzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
+ ICmpInst::ICMP_SGT, "vcgtz");
+ case NEON::BI__builtin_neon_vcltz_v:
+ case NEON::BI__builtin_neon_vcltzq_v:
+ return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
+ ICmpInst::ICMP_SLT, "vcltz");
case NEON::BI__builtin_neon_vclz_v:
case NEON::BI__builtin_neon_vclzq_v:
// We generate target-independent intrinsic, which needs a second argument
@@ -4319,13 +4841,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_f32_v:
case NEON::BI__builtin_neon_vcvtq_f32_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
+ HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f16_v:
case NEON::BI__builtin_neon_vcvtq_f16_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
+ HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_n_f16_v:
@@ -4374,6 +4898,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
+ case NEON::BI__builtin_neon_vcvta_u16_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
case NEON::BI__builtin_neon_vcvta_u64_v:
case NEON::BI__builtin_neon_vcvtaq_s16_v:
@@ -4448,12 +4973,33 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops.push_back(getAlignmentValue32(PtrOp0));
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
}
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+ llvm::Type *Tys[2] = { VTy, PTy };
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
+ }
case NEON::BI__builtin_neon_vld2_v:
case NEON::BI__builtin_neon_vld2q_v:
case NEON::BI__builtin_neon_vld3_v:
case NEON::BI__builtin_neon_vld3q_v:
case NEON::BI__builtin_neon_vld4_v:
- case NEON::BI__builtin_neon_vld4q_v: {
+ case NEON::BI__builtin_neon_vld4q_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v: {
llvm::Type *Tys[] = {Ty, Int8PtrTy};
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
Value *Align = getAlignmentValue32(PtrOp1);
@@ -4552,7 +5098,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vrsqrteq_v:
Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
-
+ case NEON::BI__builtin_neon_vrndi_v:
+ case NEON::BI__builtin_neon_vrndiq_v:
+ Int = Intrinsic::nearbyint;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
case NEON::BI__builtin_neon_vrshr_n_v:
case NEON::BI__builtin_neon_vrshrq_n_v:
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
@@ -4603,6 +5152,23 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops.push_back(getAlignmentValue32(PtrOp0));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
}
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v: {
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
+ // in AArch64 it comes last. We may want to stick to one or another.
+ if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
+ llvm::Type *Tys[2] = { VTy, PTy };
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
+ }
+ llvm::Type *Tys[2] = { PTy, VTy };
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
+ }
case NEON::BI__builtin_neon_vsubhn_v: {
llvm::VectorType *SrcTy =
llvm::VectorType::getExtendedElementVectorType(VTy);
@@ -4685,6 +5251,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
return SV;
}
+ case NEON::BI__builtin_neon_vdot_v:
+ case NEON::BI__builtin_neon_vdotq_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
+ }
}
assert(Int && "Expected valid intrinsic number");
@@ -4893,6 +5467,34 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
return true;
}
+Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+ CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
+ llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+ LoadSize.getQuantity() * 8);
+ Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
+ llvm::LoadInst *Load =
+ Builder.CreateAlignedLoad(Ptr, LoadSize);
+ Load->setVolatile(true);
+ return Load;
+}
+
+Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Value = EmitScalarExpr(E->getArg(1));
+ QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+ CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
+ llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+ StoreSize.getQuantity() * 8);
+ Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
+ llvm::StoreInst *Store =
+ Builder.CreateAlignedStore(Value, Ptr,
+ StoreSize);
+ Store->setVolatile(true);
+ return Store;
+}
+
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
llvm::Triple::ArchType Arch) {
@@ -5135,35 +5737,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case ARM::BI__iso_volatile_load8:
case ARM::BI__iso_volatile_load16:
case ARM::BI__iso_volatile_load32:
- case ARM::BI__iso_volatile_load64: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
- LoadSize.getQuantity() * 8);
- Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::LoadInst *Load =
- Builder.CreateAlignedLoad(Ptr, LoadSize);
- Load->setVolatile(true);
- return Load;
- }
+ case ARM::BI__iso_volatile_load64:
+ return EmitISOVolatileLoad(E);
case ARM::BI__iso_volatile_store8:
case ARM::BI__iso_volatile_store16:
case ARM::BI__iso_volatile_store32:
- case ARM::BI__iso_volatile_store64: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Value = EmitScalarExpr(E->getArg(1));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
- StoreSize.getQuantity() * 8);
- Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::StoreInst *Store =
- Builder.CreateAlignedStore(Value, Ptr,
- StoreSize);
- Store->setVolatile(true);
- return Store;
- }
+ case ARM::BI__iso_volatile_store64:
+ return EmitISOVolatileStore(E);
}
if (BuiltinID == ARM::BI__builtin_arm_clrex) {
@@ -5308,8 +5888,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld4_lane_v:
case NEON::BI__builtin_neon_vld4q_lane_v:
case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
@@ -5345,6 +5928,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vgetq_lane_f32:
return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
+ case NEON::BI__builtin_neon_vrndns_f32: {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *Tys[] = {Arg->getType()};
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
+ return Builder.CreateCall(F, {Arg}, "vrndn"); }
+
case NEON::BI__builtin_neon_vset_lane_i8:
case NEON::BI__builtin_neon_vset_lane_i16:
case NEON::BI__builtin_neon_vset_lane_i32:
@@ -5434,7 +6023,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
bool usgn = Type.isUnsigned();
bool rightShift = false;
- llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
+ llvm::VectorType *VTy = GetNeonType(this, Type,
+ getTarget().hasLegalHalfType());
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -5479,68 +6069,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Value *Ld = Builder.CreateLoad(PtrOp0);
return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
}
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld4_dup_v: {
- // Handle 64-bit elements as a special-case. There is no "dup" needed.
- if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_dup_v:
- Int = Intrinsic::arm_neon_vld2;
- break;
- case NEON::BI__builtin_neon_vld3_dup_v:
- Int = Intrinsic::arm_neon_vld3;
- break;
- case NEON::BI__builtin_neon_vld4_dup_v:
- Int = Intrinsic::arm_neon_vld4;
- break;
- default: llvm_unreachable("unknown vld_dup intrinsic?");
- }
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(Int, Tys);
- llvm::Value *Align = getAlignmentValue32(PtrOp1);
- Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_dup_v:
- Int = Intrinsic::arm_neon_vld2lane;
- break;
- case NEON::BI__builtin_neon_vld3_dup_v:
- Int = Intrinsic::arm_neon_vld3lane;
- break;
- case NEON::BI__builtin_neon_vld4_dup_v:
- Int = Intrinsic::arm_neon_vld4lane;
- break;
- default: llvm_unreachable("unknown vld_dup intrinsic?");
- }
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(Int, Tys);
- llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
-
- SmallVector<Value*, 6> Args;
- Args.push_back(Ops[1]);
- Args.append(STy->getNumElements(), UndefValue::get(Ty));
-
- llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
- Args.push_back(CI);
- Args.push_back(getAlignmentValue32(PtrOp1));
-
- Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
- // splat lane 0 to all elts in each vector of the result.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Value *Val = Builder.CreateExtractValue(Ops[1], i);
- Value *Elt = Builder.CreateBitCast(Val, Ty);
- Elt = EmitNeonSplat(Elt, CI);
- Elt = Builder.CreateBitCast(Elt, Val->getType());
- Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
- }
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
case NEON::BI__builtin_neon_vqrshrn_n_v:
Int =
usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
@@ -5680,7 +6208,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(Result.getZExtValue());
- llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch);
+ llvm::VectorType *Ty = GetNeonType(&CGF, Type);
if (!Ty)
return nullptr;
@@ -5799,18 +6327,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
HintID = 0;
break;
case AArch64::BI__builtin_arm_yield:
+ case AArch64::BI__yield:
HintID = 1;
break;
case AArch64::BI__builtin_arm_wfe:
+ case AArch64::BI__wfe:
HintID = 2;
break;
case AArch64::BI__builtin_arm_wfi:
+ case AArch64::BI__wfi:
HintID = 3;
break;
case AArch64::BI__builtin_arm_sev:
+ case AArch64::BI__sev:
HintID = 4;
break;
case AArch64::BI__builtin_arm_sevl:
+ case AArch64::BI__sevl:
HintID = 5;
break;
}
@@ -6077,6 +6610,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// Handle non-overloaded intrinsics first.
switch (BuiltinID) {
default: break;
+ case NEON::BI__builtin_neon_vabsh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
case NEON::BI__builtin_neon_vldrq_p128: {
llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
@@ -6119,6 +6655,153 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateUIToFP(Ops[0], FTy);
return Builder.CreateSIToFP(Ops[0], FTy);
}
+ case NEON::BI__builtin_neon_vcvth_f16_u16:
+ case NEON::BI__builtin_neon_vcvth_f16_u32:
+ case NEON::BI__builtin_neon_vcvth_f16_u64:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_f16_s16:
+ case NEON::BI__builtin_neon_vcvth_f16_s32:
+ case NEON::BI__builtin_neon_vcvth_f16_s64: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ llvm::Type *FTy = HalfTy;
+ llvm::Type *InTy;
+ if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
+ InTy = Int64Ty;
+ else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
+ InTy = Int32Ty;
+ else
+ InTy = Int16Ty;
+ Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+ if (usgn)
+ return Builder.CreateUIToFP(Ops[0], FTy);
+ return Builder.CreateSIToFP(Ops[0], FTy);
+ }
+ case NEON::BI__builtin_neon_vcvth_u16_f16:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_s16_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Int16Ty);
+ return Builder.CreateFPToSI(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_u32_f16:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_s32_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Int32Ty);
+ return Builder.CreateFPToSI(Ops[0], Int32Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_u64_f16:
+ usgn = true;
+ // FALL THROUGH
+ case NEON::BI__builtin_neon_vcvth_s64_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ if (usgn)
+ return Builder.CreateFPToUI(Ops[0], Int64Ty);
+ return Builder.CreateFPToSI(Ops[0], Int64Ty);
+ }
+ case NEON::BI__builtin_neon_vcvtah_u16_f16:
+ case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+ case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+ case NEON::BI__builtin_neon_vcvtph_u16_f16:
+ case NEON::BI__builtin_neon_vcvtah_s16_f16:
+ case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+ case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+ case NEON::BI__builtin_neon_vcvtph_s16_f16: {
+ unsigned Int;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type *Tys[2] = {InTy, FTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcvtah_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtau; break;
+ case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtmu; break;
+ case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtnu; break;
+ case NEON::BI__builtin_neon_vcvtph_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtpu; break;
+ case NEON::BI__builtin_neon_vcvtah_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtas; break;
+ case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtms; break;
+ case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtns; break;
+ case NEON::BI__builtin_neon_vcvtph_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtps; break;
+ }
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
+ return Builder.CreateTrunc(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcaleh_f16:
+ case NEON::BI__builtin_neon_vcalth_f16:
+ case NEON::BI__builtin_neon_vcageh_f16:
+ case NEON::BI__builtin_neon_vcagth_f16: {
+ unsigned Int;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type *Tys[2] = {InTy, FTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcageh_f16:
+ Int = Intrinsic::aarch64_neon_facge; break;
+ case NEON::BI__builtin_neon_vcagth_f16:
+ Int = Intrinsic::aarch64_neon_facgt; break;
+ case NEON::BI__builtin_neon_vcaleh_f16:
+ Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
+ case NEON::BI__builtin_neon_vcalth_f16:
+ Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
+ }
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
+ return Builder.CreateTrunc(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+ case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
+ unsigned Int;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type *Tys[2] = {InTy, FTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+ Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
+ case NEON::BI__builtin_neon_vcvth_n_u16_f16:
+ Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
+ }
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
+ return Builder.CreateTrunc(Ops[0], Int16Ty);
+ }
+ case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
+ unsigned Int;
+ llvm::Type* FTy = HalfTy;
+ llvm::Type* InTy = Int32Ty;
+ llvm::Type *Tys[2] = {FTy, InTy};
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+ Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
+ Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
+ break;
+ case NEON::BI__builtin_neon_vcvth_n_f16_u16:
+ Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
+ Ops[0] = Builder.CreateZExt(Ops[0], InTy);
+ break;
+ }
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
+ }
case NEON::BI__builtin_neon_vpaddd_s64: {
llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
Value *Vec = EmitScalarExpr(E->getArg(0));
@@ -6160,6 +6843,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vceqzd_s64:
case NEON::BI__builtin_neon_vceqzd_f64:
case NEON::BI__builtin_neon_vceqzs_f32:
+ case NEON::BI__builtin_neon_vceqzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6167,6 +6851,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcgezd_s64:
case NEON::BI__builtin_neon_vcgezd_f64:
case NEON::BI__builtin_neon_vcgezs_f32:
+ case NEON::BI__builtin_neon_vcgezh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6174,6 +6859,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vclezd_s64:
case NEON::BI__builtin_neon_vclezd_f64:
case NEON::BI__builtin_neon_vclezs_f32:
+ case NEON::BI__builtin_neon_vclezh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6181,6 +6867,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcgtzd_s64:
case NEON::BI__builtin_neon_vcgtzd_f64:
case NEON::BI__builtin_neon_vcgtzs_f32:
+ case NEON::BI__builtin_neon_vcgtzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6188,6 +6875,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcltzd_s64:
case NEON::BI__builtin_neon_vcltzd_f64:
case NEON::BI__builtin_neon_vcltzs_f32:
+ case NEON::BI__builtin_neon_vcltzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -6240,6 +6928,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
}
+ case NEON::BI__builtin_neon_vceqh_f16:
+ case NEON::BI__builtin_neon_vcleh_f16:
+ case NEON::BI__builtin_neon_vclth_f16:
+ case NEON::BI__builtin_neon_vcgeh_f16:
+ case NEON::BI__builtin_neon_vcgth_f16: {
+ llvm::CmpInst::Predicate P;
+ switch (BuiltinID) {
+ default: llvm_unreachable("missing builtin ID in switch!");
+ case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
+ case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
+ case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
+ case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
+ case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
+ }
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
+ Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+ return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
+ }
case NEON::BI__builtin_neon_vceqd_s64:
case NEON::BI__builtin_neon_vceqd_u64:
case NEON::BI__builtin_neon_vcgtd_s64:
@@ -6377,6 +7085,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::VectorType::get(DoubleTy, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
+ case NEON::BI__builtin_neon_vaddh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
+ case NEON::BI__builtin_neon_vsubh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
+ case NEON::BI__builtin_neon_vmulh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
+ case NEON::BI__builtin_neon_vdivh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
+ case NEON::BI__builtin_neon_vfmah_f16: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall(F,
+ {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
+ }
+ case NEON::BI__builtin_neon_vfmsh_f16: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+ Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
+ Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+ }
case NEON::BI__builtin_neon_vaddd_s64:
case NEON::BI__builtin_neon_vaddd_u64:
return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
@@ -6534,7 +7267,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
}
- llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
+ llvm::VectorType *VTy = GetNeonType(this, Type);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -6599,7 +7332,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch);
+ NeonTypeFlags(NeonTypeFlags::Float64, false, true));
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
@@ -6651,12 +7384,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
+ case NEON::BI__builtin_neon_vmaxh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fmax;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
+ }
case NEON::BI__builtin_neon_vmin_v:
case NEON::BI__builtin_neon_vminq_v:
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
+ case NEON::BI__builtin_neon_vminh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fmin;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
+ }
case NEON::BI__builtin_neon_vabd_v:
case NEON::BI__builtin_neon_vabdq_v:
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
@@ -6695,20 +7438,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminnmq_v:
Int = Intrinsic::aarch64_neon_fminnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+ case NEON::BI__builtin_neon_vminnmh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
case NEON::BI__builtin_neon_vmaxnm_v:
case NEON::BI__builtin_neon_vmaxnmq_v:
Int = Intrinsic::aarch64_neon_fmaxnm;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+ case NEON::BI__builtin_neon_vmaxnmh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ Int = Intrinsic::aarch64_neon_fmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
case NEON::BI__builtin_neon_vrecpss_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(1)));
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
Ops, "vrecps");
}
- case NEON::BI__builtin_neon_vrecpsd_f64: {
+ case NEON::BI__builtin_neon_vrecpsd_f64:
Ops.push_back(EmitScalarExpr(E->getArg(1)));
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
Ops, "vrecps");
- }
+ case NEON::BI__builtin_neon_vrecpsh_f16:
+ Ops.push_back(EmitScalarExpr(E->getArg(1)));
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
+ Ops, "vrecps");
case NEON::BI__builtin_neon_vqshrun_n_v:
Int = Intrinsic::aarch64_neon_sqshrun;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
@@ -6724,72 +7478,87 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vqrshrn_n_v:
Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
+ case NEON::BI__builtin_neon_vrndah_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::round;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
+ }
case NEON::BI__builtin_neon_vrnda_v:
case NEON::BI__builtin_neon_vrndaq_v: {
Int = Intrinsic::round;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
}
- case NEON::BI__builtin_neon_vrndi_v:
- case NEON::BI__builtin_neon_vrndiq_v: {
+ case NEON::BI__builtin_neon_vrndih_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::nearbyint;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
+ }
+ case NEON::BI__builtin_neon_vrndmh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::floor;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
}
case NEON::BI__builtin_neon_vrndm_v:
case NEON::BI__builtin_neon_vrndmq_v: {
Int = Intrinsic::floor;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
}
+ case NEON::BI__builtin_neon_vrndnh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::aarch64_neon_frintn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
+ }
case NEON::BI__builtin_neon_vrndn_v:
case NEON::BI__builtin_neon_vrndnq_v: {
Int = Intrinsic::aarch64_neon_frintn;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
}
+ case NEON::BI__builtin_neon_vrndns_f32: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::aarch64_neon_frintn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
+ }
+ case NEON::BI__builtin_neon_vrndph_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::ceil;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
+ }
case NEON::BI__builtin_neon_vrndp_v:
case NEON::BI__builtin_neon_vrndpq_v: {
Int = Intrinsic::ceil;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
}
+ case NEON::BI__builtin_neon_vrndxh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::rint;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
+ }
case NEON::BI__builtin_neon_vrndx_v:
case NEON::BI__builtin_neon_vrndxq_v: {
Int = Intrinsic::rint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
}
+ case NEON::BI__builtin_neon_vrndh_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::trunc;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
+ }
case NEON::BI__builtin_neon_vrnd_v:
case NEON::BI__builtin_neon_vrndq_v: {
Int = Intrinsic::trunc;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
}
- case NEON::BI__builtin_neon_vceqz_v:
- case NEON::BI__builtin_neon_vceqzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
- ICmpInst::ICMP_EQ, "vceqz");
- case NEON::BI__builtin_neon_vcgez_v:
- case NEON::BI__builtin_neon_vcgezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
- ICmpInst::ICMP_SGE, "vcgez");
- case NEON::BI__builtin_neon_vclez_v:
- case NEON::BI__builtin_neon_vclezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
- ICmpInst::ICMP_SLE, "vclez");
- case NEON::BI__builtin_neon_vcgtz_v:
- case NEON::BI__builtin_neon_vcgtzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
- ICmpInst::ICMP_SGT, "vcgtz");
- case NEON::BI__builtin_neon_vcltz_v:
- case NEON::BI__builtin_neon_vcltzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
- ICmpInst::ICMP_SLT, "vcltz");
case NEON::BI__builtin_neon_vcvt_f64_v:
case NEON::BI__builtin_neon_vcvtq_f64_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f64_f32: {
assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
"unexpected vcvt_f64_f32 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
}
@@ -6797,7 +7566,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
assert(Type.getEltType() == NeonTypeFlags::Float32 &&
"unexpected vcvt_f32_f64 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
}
@@ -6805,20 +7574,21 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
- case NEON::BI__builtin_neon_vcvt_s16_v:
- case NEON::BI__builtin_neon_vcvt_u16_v:
+ case NEON::BI__builtin_neon_vcvt_s16_v:
+ case NEON::BI__builtin_neon_vcvt_u16_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
case NEON::BI__builtin_neon_vcvtq_u64_v:
- case NEON::BI__builtin_neon_vcvtq_s16_v:
- case NEON::BI__builtin_neon_vcvtq_u16_v: {
+ case NEON::BI__builtin_neon_vcvtq_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
if (usgn)
return Builder.CreateFPToUI(Ops[0], Ty);
return Builder.CreateFPToSI(Ops[0], Ty);
}
case NEON::BI__builtin_neon_vcvta_s16_v:
+ case NEON::BI__builtin_neon_vcvta_u16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvtaq_s16_v:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
@@ -6886,6 +7656,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fmulx;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
}
+ case NEON::BI__builtin_neon_vmulxh_lane_f16:
+ case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
+ // vmulx_lane should be mapped to Neon scalar mulx after
+ // extracting the scalar element
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+ Ops.pop_back();
+ Int = Intrinsic::aarch64_neon_fmulx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
+ }
case NEON::BI__builtin_neon_vmul_lane_v:
case NEON::BI__builtin_neon_vmul_laneq_v: {
// v1f64 vmul_lane should be mapped to Neon scalar mul lane
@@ -6894,7 +7674,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch);
+ NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -6902,6 +7682,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vnegd_s64:
return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
+ case NEON::BI__builtin_neon_vnegh_f16:
+ return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
case NEON::BI__builtin_neon_vpmaxnm_v:
case NEON::BI__builtin_neon_vpmaxnmq_v: {
Int = Intrinsic::aarch64_neon_fmaxnmp;
@@ -6912,6 +7694,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fminnmp;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
}
+ case NEON::BI__builtin_neon_vsqrth_f16: {
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Int = Intrinsic::sqrt;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
+ }
case NEON::BI__builtin_neon_vsqrt_v:
case NEON::BI__builtin_neon_vsqrtq_v: {
Int = Intrinsic::sqrt;
@@ -7289,64 +8076,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
return Builder.CreateAdd(Ops[0], tmp);
}
- // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
- // of an Align parameter here.
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- unsigned Int;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- Int = Intrinsic::aarch64_neon_ld1x2;
- break;
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- Int = Intrinsic::aarch64_neon_ld1x3;
- break;
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v:
- Int = Intrinsic::aarch64_neon_ld1x4;
- break;
- }
- Function *F = CGM.getIntrinsic(Int, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
- llvm::Type *Tys[2] = { VTy, PTy };
- unsigned Int;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- Int = Intrinsic::aarch64_neon_st1x2;
- break;
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- Int = Intrinsic::aarch64_neon_st1x3;
- break;
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v:
- Int = Intrinsic::aarch64_neon_st1x4;
- break;
- }
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
- }
case NEON::BI__builtin_neon_vld1_v:
case NEON::BI__builtin_neon_vld1q_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
@@ -7653,6 +8382,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_suqadd;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
}
+ case AArch64::BI__iso_volatile_load8:
+ case AArch64::BI__iso_volatile_load16:
+ case AArch64::BI__iso_volatile_load32:
+ case AArch64::BI__iso_volatile_load64:
+ return EmitISOVolatileLoad(E);
+ case AArch64::BI__iso_volatile_store8:
+ case AArch64::BI__iso_volatile_store16:
+ case AArch64::BI__iso_volatile_store32:
+ case AArch64::BI__iso_volatile_store64:
+ return EmitISOVolatileStore(E);
+ case AArch64::BI_BitScanForward:
+ case AArch64::BI_BitScanForward64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
+ case AArch64::BI_BitScanReverse:
+ case AArch64::BI_BitScanReverse64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
+ case AArch64::BI_InterlockedAnd64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
+ case AArch64::BI_InterlockedExchange64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
+ case AArch64::BI_InterlockedExchangeAdd64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
+ case AArch64::BI_InterlockedExchangeSub64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
+ case AArch64::BI_InterlockedOr64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
+ case AArch64::BI_InterlockedXor64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
+ case AArch64::BI_InterlockedDecrement64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
+ case AArch64::BI_InterlockedIncrement64:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
}
}
@@ -7704,42 +8465,66 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
}
static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
- SmallVectorImpl<Value *> &Ops,
+ ArrayRef<Value *> Ops,
unsigned Align) {
// Cast the pointer to right type.
- Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Ops[1]->getType()));
- // If the mask is all ones just emit a regular store.
- if (const auto *C = dyn_cast<Constant>(Ops[2]))
- if (C->isAllOnesValue())
- return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
-
Value *MaskVec = getMaskVecValue(CGF, Ops[2],
Ops[1]->getType()->getVectorNumElements());
- return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
+ return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
}
static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
- SmallVectorImpl<Value *> &Ops, unsigned Align) {
+ ArrayRef<Value *> Ops, unsigned Align) {
// Cast the pointer to right type.
- Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Ops[1]->getType()));
- // If the mask is all ones just emit a regular store.
- if (const auto *C = dyn_cast<Constant>(Ops[2]))
- if (C->isAllOnesValue())
- return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
-
Value *MaskVec = getMaskVecValue(CGF, Ops[2],
Ops[1]->getType()->getVectorNumElements());
- return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
+ return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
+}
+
+static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *ResultTy = Ops[1]->getType();
+ llvm::Type *PtrTy = ResultTy->getVectorElementType();
+
+ // Cast the pointer to element type.
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(PtrTy));
+
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+ ResultTy->getVectorNumElements());
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
+ ResultTy);
+ return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
+}
+
+static Value *EmitX86CompressStore(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *ResultTy = Ops[1]->getType();
+ llvm::Type *PtrTy = ResultTy->getVectorElementType();
+
+ // Cast the pointer to element type.
+ Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(PtrTy));
+
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+ ResultTy->getVectorNumElements());
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
+ ResultTy);
+ return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
}
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
- unsigned NumElts, SmallVectorImpl<Value *> &Ops,
+ unsigned NumElts, ArrayRef<Value *> Ops,
bool InvertLHS = false) {
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
@@ -7751,26 +8536,6 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
}
-static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
- SmallVectorImpl<Value *> &Ops,
- llvm::Type *DstTy,
- unsigned SrcSizeInBits,
- unsigned Align) {
- // Load the subvector.
- Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
-
- // Create broadcast mask.
- unsigned NumDstElts = DstTy->getVectorNumElements();
- unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
-
- SmallVector<uint32_t, 8> Mask;
- for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
- for (unsigned j = 0; j != NumSrcElts; ++j)
- Mask.push_back(j);
-
- return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
-}
-
static Value *EmitX86Select(CodeGenFunction &CGF,
Value *Mask, Value *Op0, Value *Op1) {
@@ -7784,8 +8549,48 @@ static Value *EmitX86Select(CodeGenFunction &CGF,
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
}
+static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
+ Value *Mask, Value *Op0, Value *Op1) {
+ // If the mask is all ones just return first argument.
+ if (const auto *C = dyn_cast<Constant>(Mask))
+ if (C->isAllOnesValue())
+ return Op0;
+
+ llvm::VectorType *MaskTy =
+ llvm::VectorType::get(CGF.Builder.getInt1Ty(),
+ Mask->getType()->getIntegerBitWidth());
+ Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
+ Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
+ return CGF.Builder.CreateSelect(Mask, Op0, Op1);
+}
+
+static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
+ unsigned NumElts, Value *MaskIn) {
+ if (MaskIn) {
+ const auto *C = dyn_cast<Constant>(MaskIn);
+ if (!C || !C->isAllOnesValue())
+ Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
+ }
+
+ if (NumElts < 8) {
+ uint32_t Indices[8];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+ for (unsigned i = NumElts; i != 8; ++i)
+ Indices[i] = i % NumElts + NumElts;
+ Cmp = CGF.Builder.CreateShuffleVector(
+ Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+ }
+
+ return CGF.Builder.CreateBitCast(Cmp,
+ IntegerType::get(CGF.getLLVMContext(),
+ std::max(NumElts, 8U)));
+}
+
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
- bool Signed, SmallVectorImpl<Value *> &Ops) {
+ bool Signed, ArrayRef<Value *> Ops) {
+ assert((Ops.size() == 2 || Ops.size() == 4) &&
+ "Unexpected number of arguments");
unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
Value *Cmp;
@@ -7809,22 +8614,16 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
}
- const auto *C = dyn_cast<Constant>(Ops.back());
- if (!C || !C->isAllOnesValue())
- Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
+ Value *MaskIn = nullptr;
+ if (Ops.size() == 4)
+ MaskIn = Ops[3];
- if (NumElts < 8) {
- uint32_t Indices[8];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i;
- for (unsigned i = NumElts; i != 8; ++i)
- Indices[i] = i % NumElts + NumElts;
- Cmp = CGF.Builder.CreateShuffleVector(
- Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
- }
- return CGF.Builder.CreateBitCast(Cmp,
- IntegerType::get(CGF.getLLVMContext(),
- std::max(NumElts, 8U)));
+ return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
+}
+
+static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
+ Value *Zero = Constant::getNullValue(In->getType());
+ return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
}
static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
@@ -7834,9 +8633,7 @@ static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
- if (Ops.size() == 1)
- return Res;
- return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
+ return Res;
}
static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
@@ -7844,11 +8641,211 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
- if (Ops.size() == 2)
- return Res;
+ assert(Ops.size() == 2);
+ return Res;
+}
+
+// Lowers X86 FMA intrinsics to IR.
+static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ unsigned BuiltinID, bool IsAddSub) {
- assert(Ops.size() == 4);
- return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
+ bool Subtract = false;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ switch (BuiltinID) {
+ default: break;
+ case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
+ case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
+ break;
+ }
+
+ Value *A = Ops[0];
+ Value *B = Ops[1];
+ Value *C = Ops[2];
+
+ if (Subtract)
+ C = CGF.Builder.CreateFNeg(C);
+
+ Value *Res;
+
+ // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
+ if (IID != Intrinsic::not_intrinsic &&
+ cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
+ Function *Intr = CGF.CGM.getIntrinsic(IID);
+ Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
+ } else {
+ llvm::Type *Ty = A->getType();
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+
+ if (IsAddSub) {
+ // Negate even elts in C using a mask.
+ unsigned NumElts = Ty->getVectorNumElements();
+ SmallVector<uint32_t, 16> Indices(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + (i % 2) * NumElts;
+
+ Value *NegC = CGF.Builder.CreateFNeg(C);
+ Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
+ Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
+ }
+ }
+
+ // Handle any required masking.
+ Value *MaskFalseVal = nullptr;
+ switch (BuiltinID) {
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ MaskFalseVal = Ops[0];
+ break;
+ case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ MaskFalseVal = Ops[2];
+ break;
+ }
+
+ if (MaskFalseVal)
+ return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
+
+ return Res;
+}
+
+static Value *
+EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
+ Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
+ bool NegAcc = false) {
+ unsigned Rnd = 4;
+ if (Ops.size() > 4)
+ Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
+
+ if (NegAcc)
+ Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
+
+ Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ Value *Res;
+ if (Rnd != 4) {
+ Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
+ Intrinsic::x86_avx512_vfmadd_f32 :
+ Intrinsic::x86_avx512_vfmadd_f64;
+ Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ {Ops[0], Ops[1], Ops[2], Ops[4]});
+ } else {
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
+ Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
+ }
+ // If we have more than 3 arguments, we need to do masking.
+ if (Ops.size() > 3) {
+ Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
+ : Ops[PTIdx];
+
+ // If we negated the accumulator and the its the PassThru value we need to
+ // bypass the negate. Conveniently Upper should be the same thing in this
+ // case.
+ if (NegAcc && PTIdx == 2)
+ PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
+
+ Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
+ }
+ return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
+}
+
+static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *Ty = Ops[0]->getType();
+ // Arguments have a vXi32 type so cast to vXi64.
+ Ty = llvm::VectorType::get(CGF.Int64Ty,
+ Ty->getPrimitiveSizeInBits() / 64);
+ Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
+ Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
+
+ if (IsSigned) {
+ // Shift left then arithmetic shift right.
+ Constant *ShiftAmt = ConstantInt::get(Ty, 32);
+ LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
+ LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
+ RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
+ RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
+ } else {
+ // Clear the upper bits.
+ Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
+ LHS = CGF.Builder.CreateAnd(LHS, Mask);
+ RHS = CGF.Builder.CreateAnd(RHS, Mask);
+ }
+
+ return CGF.Builder.CreateMul(LHS, RHS);
+}
+
+// Emit a masked pternlog intrinsic. This only exists because the header has to
+// use a macro and we aren't able to pass the input argument to a pternlog
+// builtin and a select builtin without evaluating it twice.
+static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *Ty = Ops[0]->getType();
+
+ unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+ unsigned EltWidth = Ty->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ Ops.drop_back());
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
+ return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
}
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
@@ -7914,11 +8911,10 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
return EmitX86CpuSupports(FeatureStr);
}
-Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
+uint32_t
+CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
-
uint32_t FeaturesMask = 0;
-
for (const StringRef &FeatureStr : FeatureStrs) {
unsigned Feature =
StringSwitch<unsigned>(FeatureStr)
@@ -7927,7 +8923,14 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
;
FeaturesMask |= (1U << Feature);
}
+ return FeaturesMask;
+}
+
+Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
+ return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
+}
+llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
// Matching the struct layout from the compiler-rt/libgcc structure that is
// filled in:
// unsigned int __cpu_vendor;
@@ -8063,8 +9066,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateBitCast(BuildVector(Ops),
llvm::Type::getX86_MMXTy(getLLVMContext()));
case X86::BI__builtin_ia32_vec_ext_v2si:
- return Builder.CreateExtractElement(Ops[0],
- llvm::ConstantInt::get(Ops[1]->getType(), 0));
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
+ Index &= NumElts - 1;
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateExtractElement(Ops[0], Index);
+ }
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
+ Index &= NumElts - 1;
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
+ }
case X86::BI_mm_setcsr:
case X86::BI__builtin_ia32_ldmxcsr: {
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
@@ -8141,7 +9173,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storess128_mask:
case X86::BI__builtin_ia32_storesd128_mask: {
- return EmitX86MaskedStore(*this, Ops, 16);
+ return EmitX86MaskedStore(*this, Ops, 1);
}
case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
@@ -8173,6 +9205,66 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtmask2q512:
return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ return EmitX86ConvertToMask(*this, Ops[0]);
+
+ case X86::BI__builtin_ia32_vfmaddss3:
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0]);
+ case X86::BI__builtin_ia32_vfmaddss:
+ case X86::BI__builtin_ia32_vfmaddsd:
+ return EmitScalarFMAExpr(*this, Ops,
+ Constant::getNullValue(Ops[0]->getType()));
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
+ /*NegAcc*/true);
+ case X86::BI__builtin_ia32_vfmaddps:
+ case X86::BI__builtin_ia32_vfmaddpd:
+ case X86::BI__builtin_ia32_vfmaddps256:
+ case X86::BI__builtin_ia32_vfmaddpd256:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
+ case X86::BI__builtin_ia32_vfmaddsubps:
+ case X86::BI__builtin_ia32_vfmaddsubpd:
+ case X86::BI__builtin_ia32_vfmaddsubps256:
+ case X86::BI__builtin_ia32_vfmaddsubpd256:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
+
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
@@ -8211,7 +9303,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
- return EmitX86MaskedLoad(*this, Ops, 16);
+ return EmitX86MaskedLoad(*this, Ops, 1);
case X86::BI__builtin_ia32_loadaps128_mask:
case X86::BI__builtin_ia32_loadaps256_mask:
@@ -8230,11 +9322,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedLoad(*this, Ops, Align);
}
- case X86::BI__builtin_ia32_vbroadcastf128_pd256:
- case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
- llvm::Type *DstTy = ConvertType(E->getType());
- return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
- }
+ case X86::BI__builtin_ia32_expandloaddf128_mask:
+ case X86::BI__builtin_ia32_expandloaddf256_mask:
+ case X86::BI__builtin_ia32_expandloaddf512_mask:
+ case X86::BI__builtin_ia32_expandloadsf128_mask:
+ case X86::BI__builtin_ia32_expandloadsf256_mask:
+ case X86::BI__builtin_ia32_expandloadsf512_mask:
+ case X86::BI__builtin_ia32_expandloaddi128_mask:
+ case X86::BI__builtin_ia32_expandloaddi256_mask:
+ case X86::BI__builtin_ia32_expandloaddi512_mask:
+ case X86::BI__builtin_ia32_expandloadsi128_mask:
+ case X86::BI__builtin_ia32_expandloadsi256_mask:
+ case X86::BI__builtin_ia32_expandloadsi512_mask:
+ case X86::BI__builtin_ia32_expandloadhi128_mask:
+ case X86::BI__builtin_ia32_expandloadhi256_mask:
+ case X86::BI__builtin_ia32_expandloadhi512_mask:
+ case X86::BI__builtin_ia32_expandloadqi128_mask:
+ case X86::BI__builtin_ia32_expandloadqi256_mask:
+ case X86::BI__builtin_ia32_expandloadqi512_mask:
+ return EmitX86ExpandLoad(*this, Ops);
+
+ case X86::BI__builtin_ia32_compressstoredf128_mask:
+ case X86::BI__builtin_ia32_compressstoredf256_mask:
+ case X86::BI__builtin_ia32_compressstoredf512_mask:
+ case X86::BI__builtin_ia32_compressstoresf128_mask:
+ case X86::BI__builtin_ia32_compressstoresf256_mask:
+ case X86::BI__builtin_ia32_compressstoresf512_mask:
+ case X86::BI__builtin_ia32_compressstoredi128_mask:
+ case X86::BI__builtin_ia32_compressstoredi256_mask:
+ case X86::BI__builtin_ia32_compressstoredi512_mask:
+ case X86::BI__builtin_ia32_compressstoresi128_mask:
+ case X86::BI__builtin_ia32_compressstoresi256_mask:
+ case X86::BI__builtin_ia32_compressstoresi512_mask:
+ case X86::BI__builtin_ia32_compressstorehi128_mask:
+ case X86::BI__builtin_ia32_compressstorehi256_mask:
+ case X86::BI__builtin_ia32_compressstorehi512_mask:
+ case X86::BI__builtin_ia32_compressstoreqi128_mask:
+ case X86::BI__builtin_ia32_compressstoreqi256_mask:
+ case X86::BI__builtin_ia32_compressstoreqi512_mask:
+ return EmitX86CompressStore(*this, Ops);
case X86::BI__builtin_ia32_storehps:
case X86::BI__builtin_ia32_storelps: {
@@ -8246,17 +9372,275 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// extract (0, 1)
unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
- llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
// cast pointer to i64 & store
Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask: {
+ llvm::Type *DstTy = ConvertType(E->getType());
+ unsigned NumElts = DstTy->getVectorNumElements();
+ unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned SubVectors = SrcNumElts / NumElts;
+ unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
+ assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
+ Index &= SubVectors - 1; // Remove any extra bits.
+ Index *= NumElts;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + Index;
+
+ Value *Res = Builder.CreateShuffleVector(Ops[0],
+ UndefValue::get(Ops[0]->getType()),
+ makeArrayRef(Indices, NumElts),
+ "extract");
+
+ if (Ops.size() == 4)
+ Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
+
+ return Res;
+ }
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_inserti32x4:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512: {
+ unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
+ unsigned SubVectors = DstNumElts / SrcNumElts;
+ unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
+ assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
+ Index &= SubVectors - 1; // Remove any extra bits.
+ Index *= SrcNumElts;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != DstNumElts; ++i)
+ Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
+
+ Value *Op1 = Builder.CreateShuffleVector(Ops[1],
+ UndefValue::get(Ops[1]->getType()),
+ makeArrayRef(Indices, DstNumElts),
+ "widen");
+
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ if (i >= Index && i < (Index + SrcNumElts))
+ Indices[i] = (i - Index) + DstNumElts;
+ else
+ Indices[i] = i;
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Op1,
+ makeArrayRef(Indices, DstNumElts),
+ "insert");
+ }
+ case X86::BI__builtin_ia32_pmovqd512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask: {
+ Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+ return EmitX86Select(*this, Ops[2], Res, Ops[1]);
+ }
+ case X86::BI__builtin_ia32_pmovdb512_mask:
+ case X86::BI__builtin_ia32_pmovdw512_mask:
+ case X86::BI__builtin_ia32_pmovqw512_mask: {
+ if (const auto *C = dyn_cast<Constant>(Ops[2]))
+ if (C->isAllOnesValue())
+ return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_pmovdb512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_db_512;
+ break;
+ case X86::BI__builtin_ia32_pmovdw512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
+ break;
+ case X86::BI__builtin_ia32_pmovqw512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
+ break;
+ }
+
+ Function *Intr = CGM.getIntrinsic(IID);
+ return Builder.CreateCall(Intr, Ops);
+ }
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_pblendd256: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+
+ uint32_t Indices[16];
+ // If there are more than 8 elements, the immediate is used twice so make
+ // sure we handle that.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
+
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "blend");
+ }
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[32];
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i) {
+ Indices[l + i] = l + (Imm & 3);
+ Imm >>= 2;
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ Indices[l + i] = l + i;
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "pshuflw");
+ }
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[32];
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i)
+ Indices[l + i] = l + i;
+ for (unsigned i = 4; i != 8; ++i) {
+ Indices[l + i] = l + 4 + (Imm & 3);
+ Imm >>= 2;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "pshufhw");
+ }
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ case X86::BI__builtin_ia32_vpermilpd:
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilpd256:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ Indices[i + l] = (Imm % NumLaneElts) + l;
+ Imm /= NumLaneElts;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "permil");
+ }
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Index = Imm % NumLaneElts;
+ Imm /= NumLaneElts;
+ if (i >= (NumLaneElts / 2))
+ Index += NumElts;
+ Indices[l + i] = l + Index;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "shufp");
+ }
+ case X86::BI__builtin_ia32_permdi256:
+ case X86::BI__builtin_ia32_permdf256:
+ case X86::BI__builtin_ia32_permdi512:
+ case X86::BI__builtin_ia32_permdf512: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // These intrinsics operate on 256-bit lanes of four 64-bit elements.
+ uint32_t Indices[8];
+ for (unsigned l = 0; l != NumElts; l += 4)
+ for (unsigned i = 0; i != 4; ++i)
+ Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "perm");
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
- case X86::BI__builtin_ia32_palignr512_mask: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ case X86::BI__builtin_ia32_palignr512: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
assert(NumElts % 16 == 0);
@@ -8285,15 +9669,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
- makeArrayRef(Indices, NumElts),
- "palignr");
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "palignr");
+ }
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
+
+ // Mask the shift amount to width of two vectors.
+ ShiftVal &= (2 * NumElts) - 1;
- // If this isn't a masked builtin, just return the align operation.
- if (Ops.size() == 3)
- return Align;
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + ShiftVal;
+
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "valign");
+ }
+ case X86::BI__builtin_ia32_shuf_f32x4_256:
+ case X86::BI__builtin_ia32_shuf_f64x2_256:
+ case X86::BI__builtin_ia32_shuf_i32x4_256:
+ case X86::BI__builtin_ia32_shuf_i64x2_256:
+ case X86::BI__builtin_ia32_shuf_f32x4:
+ case X86::BI__builtin_ia32_shuf_f64x2:
+ case X86::BI__builtin_ia32_shuf_i32x4:
+ case X86::BI__builtin_ia32_shuf_i64x2: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ unsigned Index = (Imm % NumLanes) * NumLaneElts;
+ Imm /= NumLanes; // Discard the bits we just used.
+ if (l >= (NumElts / 2))
+ Index += NumElts; // Switch to other source.
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ Indices[l + i] = Index + i;
+ }
+ }
- return EmitX86Select(*this, Ops[4], Align, Ops[3]);
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "shuf");
}
case X86::BI__builtin_ia32_vperm2f128_pd256:
@@ -8335,6 +9762,66 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
"vperm");
}
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ llvm::Type *ResultType = Ops[0]->getType();
+ // Builtin type is vXi64 so multiply by 8 to get bytes.
+ unsigned NumElts = ResultType->getVectorNumElements() * 8;
+
+ // If pslldq is shifting the vector more than 15 bytes, emit zero.
+ if (ShiftVal >= 16)
+ return llvm::Constant::getNullValue(ResultType);
+
+ uint32_t Indices[64];
+ // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = NumElts + i - ShiftVal;
+ if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+ Value *Zero = llvm::Constant::getNullValue(VecTy);
+ Value *SV = Builder.CreateShuffleVector(Zero, Cast,
+ makeArrayRef(Indices, NumElts),
+ "pslldq");
+ return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
+ }
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ llvm::Type *ResultType = Ops[0]->getType();
+ // Builtin type is vXi64 so multiply by 8 to get bytes.
+ unsigned NumElts = ResultType->getVectorNumElements() * 8;
+
+ // If psrldq is shifting the vector more than 15 bytes, emit zero.
+ if (ShiftVal >= 16)
+ return llvm::Constant::getNullValue(ResultType);
+
+ uint32_t Indices[64];
+ // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = i + ShiftVal;
+ if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+ Value *Zero = llvm::Constant::getNullValue(VecTy);
+ Value *SV = Builder.CreateShuffleVector(Cast, Zero,
+ makeArrayRef(Indices, NumElts),
+ "psrldq");
+ return Builder.CreateBitCast(SV, ResultType, "cast");
+ }
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
@@ -8380,6 +9867,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectpd_256:
case X86::BI__builtin_ia32_selectpd_512:
return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
+ case X86::BI__builtin_ia32_selectss_128:
+ case X86::BI__builtin_ia32_selectsd_128: {
+ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ A = EmitX86ScalarSelect(*this, Ops[0], A, B);
+ return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
+ }
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
@@ -8411,6 +9905,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kortestchi:
+ case X86::BI__builtin_ia32_kortestzhi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ Value *C;
+ if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
+ C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
+ else
+ C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+ Value *Cmp = Builder.CreateICmpEQ(Or, C);
+ return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+ }
+
case X86::BI__builtin_ia32_kandhi:
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
case X86::BI__builtin_ia32_kandnhi:
@@ -8427,85 +9933,176 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Builder.getInt16Ty());
}
- case X86::BI__builtin_ia32_vplzcntd_128_mask:
- case X86::BI__builtin_ia32_vplzcntd_256_mask:
- case X86::BI__builtin_ia32_vplzcntd_512_mask:
- case X86::BI__builtin_ia32_vplzcntq_128_mask:
- case X86::BI__builtin_ia32_vplzcntq_256_mask:
- case X86::BI__builtin_ia32_vplzcntq_512_mask: {
+ case X86::BI__builtin_ia32_kunpckdi:
+ case X86::BI__builtin_ia32_kunpcksi:
+ case X86::BI__builtin_ia32_kunpckhi: {
+ unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ uint32_t Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+
+ // First extract half of each vector. This gives better codegen than
+ // doing it in a single shuffle.
+ LHS = Builder.CreateShuffleVector(LHS, LHS,
+ makeArrayRef(Indices, NumElts / 2));
+ RHS = Builder.CreateShuffleVector(RHS, RHS,
+ makeArrayRef(Indices, NumElts / 2));
+ // Concat the vectors.
+ // NOTE: Operands are swapped to match the intrinsic definition.
+ Value *Res = Builder.CreateShuffleVector(RHS, LHS,
+ makeArrayRef(Indices, NumElts));
+ return Builder.CreateBitCast(Res, Ops[0]->getType());
+ }
+
+ case X86::BI__builtin_ia32_vplzcntd_128:
+ case X86::BI__builtin_ia32_vplzcntd_256:
+ case X86::BI__builtin_ia32_vplzcntd_512:
+ case X86::BI__builtin_ia32_vplzcntq_128:
+ case X86::BI__builtin_ia32_vplzcntq_256:
+ case X86::BI__builtin_ia32_vplzcntq_512: {
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
- return EmitX86Select(*this, Ops[2],
- Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
- Ops[1]);
+ return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
+ }
+ case X86::BI__builtin_ia32_sqrtss:
+ case X86::BI__builtin_ia32_sqrtsd: {
+ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, {A});
+ return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
+ }
+ case X86::BI__builtin_ia32_sqrtsd_round_mask:
+ case X86::BI__builtin_ia32_sqrtss_round_mask: {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
+ // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
+ // otherwise keep the intrinsic.
+ if (CC != 4) {
+ Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
+ Intrinsic::x86_avx512_mask_sqrt_sd :
+ Intrinsic::x86_avx512_mask_sqrt_ss;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
+ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, A);
+ Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
+ return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
+ }
+ case X86::BI__builtin_ia32_sqrtpd256:
+ case X86::BI__builtin_ia32_sqrtpd:
+ case X86::BI__builtin_ia32_sqrtps256:
+ case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ if (Ops.size() == 2) {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
+ // otherwise keep the intrinsic.
+ if (CC != 4) {
+ Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
+ Intrinsic::x86_avx512_sqrt_ps_512 :
+ Intrinsic::x86_avx512_sqrt_pd_512;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
+ }
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
+ return Builder.CreateCall(F, Ops[0]);
}
-
case X86::BI__builtin_ia32_pabsb128:
case X86::BI__builtin_ia32_pabsw128:
case X86::BI__builtin_ia32_pabsd128:
case X86::BI__builtin_ia32_pabsb256:
case X86::BI__builtin_ia32_pabsw256:
case X86::BI__builtin_ia32_pabsd256:
- case X86::BI__builtin_ia32_pabsq128_mask:
- case X86::BI__builtin_ia32_pabsq256_mask:
- case X86::BI__builtin_ia32_pabsb512_mask:
- case X86::BI__builtin_ia32_pabsw512_mask:
- case X86::BI__builtin_ia32_pabsd512_mask:
- case X86::BI__builtin_ia32_pabsq512_mask:
+ case X86::BI__builtin_ia32_pabsq128:
+ case X86::BI__builtin_ia32_pabsq256:
+ case X86::BI__builtin_ia32_pabsb512:
+ case X86::BI__builtin_ia32_pabsw512:
+ case X86::BI__builtin_ia32_pabsd512:
+ case X86::BI__builtin_ia32_pabsq512:
return EmitX86Abs(*this, Ops);
case X86::BI__builtin_ia32_pmaxsb128:
case X86::BI__builtin_ia32_pmaxsw128:
case X86::BI__builtin_ia32_pmaxsd128:
- case X86::BI__builtin_ia32_pmaxsq128_mask:
+ case X86::BI__builtin_ia32_pmaxsq128:
case X86::BI__builtin_ia32_pmaxsb256:
case X86::BI__builtin_ia32_pmaxsw256:
case X86::BI__builtin_ia32_pmaxsd256:
- case X86::BI__builtin_ia32_pmaxsq256_mask:
- case X86::BI__builtin_ia32_pmaxsb512_mask:
- case X86::BI__builtin_ia32_pmaxsw512_mask:
- case X86::BI__builtin_ia32_pmaxsd512_mask:
- case X86::BI__builtin_ia32_pmaxsq512_mask:
+ case X86::BI__builtin_ia32_pmaxsq256:
+ case X86::BI__builtin_ia32_pmaxsb512:
+ case X86::BI__builtin_ia32_pmaxsw512:
+ case X86::BI__builtin_ia32_pmaxsd512:
+ case X86::BI__builtin_ia32_pmaxsq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
case X86::BI__builtin_ia32_pmaxub128:
case X86::BI__builtin_ia32_pmaxuw128:
case X86::BI__builtin_ia32_pmaxud128:
- case X86::BI__builtin_ia32_pmaxuq128_mask:
+ case X86::BI__builtin_ia32_pmaxuq128:
case X86::BI__builtin_ia32_pmaxub256:
case X86::BI__builtin_ia32_pmaxuw256:
case X86::BI__builtin_ia32_pmaxud256:
- case X86::BI__builtin_ia32_pmaxuq256_mask:
- case X86::BI__builtin_ia32_pmaxub512_mask:
- case X86::BI__builtin_ia32_pmaxuw512_mask:
- case X86::BI__builtin_ia32_pmaxud512_mask:
- case X86::BI__builtin_ia32_pmaxuq512_mask:
+ case X86::BI__builtin_ia32_pmaxuq256:
+ case X86::BI__builtin_ia32_pmaxub512:
+ case X86::BI__builtin_ia32_pmaxuw512:
+ case X86::BI__builtin_ia32_pmaxud512:
+ case X86::BI__builtin_ia32_pmaxuq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
case X86::BI__builtin_ia32_pminsb128:
case X86::BI__builtin_ia32_pminsw128:
case X86::BI__builtin_ia32_pminsd128:
- case X86::BI__builtin_ia32_pminsq128_mask:
+ case X86::BI__builtin_ia32_pminsq128:
case X86::BI__builtin_ia32_pminsb256:
case X86::BI__builtin_ia32_pminsw256:
case X86::BI__builtin_ia32_pminsd256:
- case X86::BI__builtin_ia32_pminsq256_mask:
- case X86::BI__builtin_ia32_pminsb512_mask:
- case X86::BI__builtin_ia32_pminsw512_mask:
- case X86::BI__builtin_ia32_pminsd512_mask:
- case X86::BI__builtin_ia32_pminsq512_mask:
+ case X86::BI__builtin_ia32_pminsq256:
+ case X86::BI__builtin_ia32_pminsb512:
+ case X86::BI__builtin_ia32_pminsw512:
+ case X86::BI__builtin_ia32_pminsd512:
+ case X86::BI__builtin_ia32_pminsq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
case X86::BI__builtin_ia32_pminub128:
case X86::BI__builtin_ia32_pminuw128:
case X86::BI__builtin_ia32_pminud128:
- case X86::BI__builtin_ia32_pminuq128_mask:
+ case X86::BI__builtin_ia32_pminuq128:
case X86::BI__builtin_ia32_pminub256:
case X86::BI__builtin_ia32_pminuw256:
case X86::BI__builtin_ia32_pminud256:
- case X86::BI__builtin_ia32_pminuq256_mask:
- case X86::BI__builtin_ia32_pminub512_mask:
- case X86::BI__builtin_ia32_pminuw512_mask:
- case X86::BI__builtin_ia32_pminud512_mask:
- case X86::BI__builtin_ia32_pminuq512_mask:
+ case X86::BI__builtin_ia32_pminuq256:
+ case X86::BI__builtin_ia32_pminub512:
+ case X86::BI__builtin_ia32_pminuw512:
+ case X86::BI__builtin_ia32_pminud512:
+ case X86::BI__builtin_ia32_pminuq512:
return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
+ case X86::BI__builtin_ia32_pmuludq128:
+ case X86::BI__builtin_ia32_pmuludq256:
+ case X86::BI__builtin_ia32_pmuludq512:
+ return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
+
+ case X86::BI__builtin_ia32_pmuldq128:
+ case X86::BI__builtin_ia32_pmuldq256:
+ case X86::BI__builtin_ia32_pmuldq512:
+ return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
+
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
+
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
@@ -8549,7 +10146,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Call, 1);
}
- // SSE packed comparison intrinsics
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ case X86::BI__builtin_ia32_fpclasspd512_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *MaskIn = Ops[2];
+ Ops.erase(&Ops[2]);
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ ID = Intrinsic::x86_avx512_fpclass_ps_128;
+ break;
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ ID = Intrinsic::x86_avx512_fpclass_ps_256;
+ break;
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ ID = Intrinsic::x86_avx512_fpclass_ps_512;
+ break;
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ ID = Intrinsic::x86_avx512_fpclass_pd_128;
+ break;
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ ID = Intrinsic::x86_avx512_fpclass_pd_256;
+ break;
+ case X86::BI__builtin_ia32_fpclasspd512_mask:
+ ID = Intrinsic::x86_avx512_fpclass_pd_512;
+ break;
+ }
+
+ Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
+ }
+
+ // packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
return getVectorFCmpIR(CmpInst::FCMP_OEQ);
@@ -8577,64 +10211,79 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
- unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- // If this one of the SSE immediates, we can use native IR.
- if (CC < 8) {
- FCmpInst::Predicate Pred;
- switch (CC) {
- case 0: Pred = FCmpInst::FCMP_OEQ; break;
- case 1: Pred = FCmpInst::FCMP_OLT; break;
- case 2: Pred = FCmpInst::FCMP_OLE; break;
- case 3: Pred = FCmpInst::FCMP_UNO; break;
- case 4: Pred = FCmpInst::FCMP_UNE; break;
- case 5: Pred = FCmpInst::FCMP_UGE; break;
- case 6: Pred = FCmpInst::FCMP_UGT; break;
- case 7: Pred = FCmpInst::FCMP_ORD; break;
- }
- return getVectorFCmpIR(Pred);
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask: {
+ // Lowering vector comparisons to fcmp instructions, while
+ // ignoring signalling behaviour requested
+ // ignoring rounding mode requested
+ // This is is only possible as long as FENV_ACCESS is not implemented.
+ // See also: https://reviews.llvm.org/D45616
+
+ // The third argument is the comparison condition, and integer in the
+ // range [0, 31]
+ unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
+
+ // Lowering to IR fcmp instruction.
+ // Ignoring requested signaling behaviour,
+ // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
+ FCmpInst::Predicate Pred;
+ switch (CC) {
+ case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x01: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x02: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x03: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x04: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x05: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x06: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x07: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x09: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
+ case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
+ case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x11: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x12: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x13: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x14: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x15: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x16: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x17: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x19: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
+ case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
+ default: llvm_unreachable("Unhandled CC");
}
- // We can't handle 8-31 immediates with native IR, use the intrinsic.
- // Except for predicates that create constants.
- Intrinsic::ID ID;
+ // Builtins without the _mask suffix return a vector of integers
+ // of the same width as the input vectors
switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpps:
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpps256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
- llvm::Constant::getNullValue(Builder.getInt32Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmppd:
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmppd256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
- llvm::Constant::getNullValue(Builder.getInt64Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_pd_256;
- break;
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
+ }
+ default:
+ return getVectorFCmpIR(Pred);
}
-
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
}
// SSE scalar comparison intrinsics
@@ -9195,19 +10844,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
- // Element zero comes from the first input vector and element one comes from
- // the second. The element indices within each vector are numbered in big
- // endian order so the shuffle mask must be adjusted for this on little
- // endian platforms (i.e. index is complemented and source vector reversed).
- unsigned ElemIdx0;
- unsigned ElemIdx1;
- if (getTarget().isLittleEndian()) {
- ElemIdx0 = (~Index & 1) + 2;
- ElemIdx1 = (~Index & 2) >> 1;
- } else { // BigEndian
- ElemIdx0 = (Index & 2) >> 1;
- ElemIdx1 = 2 + (Index & 1);
- }
+ // Account for endianness by treating this as just a shuffle. So we use the
+ // same indices for both LE and BE in order to produce expected results in
+ // both cases.
+ unsigned ElemIdx0 = (Index & 2) >> 1;
+ unsigned ElemIdx1 = 2 + (Index & 1);
Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
ConstantInt::get(Int32Ty, ElemIdx1)};
@@ -9398,6 +11039,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CI->setConvergent();
return CI;
}
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+ llvm::SmallVector<llvm::Value *, 5> Args;
+ for (unsigned I = 0; I != 5; ++I)
+ Args.push_back(EmitScalarExpr(E->getArg(I)));
+ const llvm::Type *PtrTy = Args[0]->getType();
+ // check pointer parameter
+ if (!PtrTy->isPointerTy() ||
+ E->getArg(0)
+ ->getType()
+ ->getPointeeType()
+ .getQualifiers()
+ .getAddressSpace() != LangAS::opencl_local ||
+ !PtrTy->getPointerElementType()->isFloatTy()) {
+ CGM.Error(E->getArg(0)->getLocStart(),
+ "parameter should have type \"local float*\"");
+ return nullptr;
+ }
+ // check float parameter
+ if (!Args[1]->getType()->isFloatTy()) {
+ CGM.Error(E->getArg(1)->getLocStart(),
+ "parameter should have type \"float\"");
+ return nullptr;
+ }
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ ID = Intrinsic::amdgcn_ds_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ ID = Intrinsic::amdgcn_ds_fmin;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+ ID = Intrinsic::amdgcn_ds_fmax;
+ break;
+ default:
+ llvm_unreachable("Unknown BuiltinID");
+ }
+ Value *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Args);
+ }
// amdgcn workitem
case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
@@ -10028,7 +11712,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__hmma_m16n16k16_ld_a:
case NVPTX::BI__hmma_m16n16k16_ld_b:
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
+ case NVPTX::BI__hmma_m32n8k16_ld_a:
+ case NVPTX::BI__hmma_m32n8k16_ld_b:
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
+ case NVPTX::BI__hmma_m8n32k16_ld_a:
+ case NVPTX::BI__hmma_m8n32k16_ld_b:
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
Address Dst = EmitPointerWithAlignment(E->getArg(0));
Value *Src = EmitScalarExpr(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
@@ -10040,31 +11732,70 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
unsigned NumResults;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_ld_a:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
- : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_b:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
- : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
- : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
- : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_a:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_b:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_a:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_b:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
NumResults = 8;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
}
Value *Result =
- Builder.CreateCall(CGM.getIntrinsic(IID),
- {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm});
+ Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
// Save returned values.
for (unsigned i = 0; i < NumResults; ++i) {
@@ -10078,7 +11809,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
}
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
+ case NVPTX::BI__hmma_m16n16k16_st_c_f32:
+ case NVPTX::BI__hmma_m32n8k16_st_c_f16:
+ case NVPTX::BI__hmma_m32n8k16_st_c_f32:
+ case NVPTX::BI__hmma_m8n32k16_st_c_f16:
+ case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
Value *Dst = EmitScalarExpr(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
@@ -10092,21 +11827,38 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
- : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
- : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_st_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_st_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_st_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_st_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
}
- Function *Intrinsic = CGM.getIntrinsic(IID);
+ Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
- SmallVector<Value *, 10> Values;
- Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy));
+ SmallVector<Value *, 10> Values = {Dst};
for (unsigned i = 0; i < NumResults; ++i) {
Value *V = Builder.CreateAlignedLoad(
Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
@@ -10118,12 +11870,20 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
return Result;
}
- // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
- // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
+ // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
- case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
Address Dst = EmitPointerWithAlignment(E->getArg(0));
Address SrcA = EmitPointerWithAlignment(E->getArg(1));
Address SrcB = EmitPointerWithAlignment(E->getArg(2));
@@ -10140,15 +11900,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
bool Satf = SatfArg.getSExtValue();
// clang-format off
-#define MMA_VARIANTS(type) {{ \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+#define MMA_VARIANTS(geom, type) {{ \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
+ Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
}}
// clang-format on
@@ -10162,22 +11922,62 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
unsigned NumEltsD;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
- IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
NumEltsC = 4;
NumEltsD = 4;
break;
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
- IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
NumEltsC = 4;
NumEltsD = 8;
break;
case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
- IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
NumEltsC = 8;
NumEltsD = 4;
break;
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
- IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
+ IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
+ NumEltsC = 8;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
+ NumEltsC = 4;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
+ NumEltsC = 4;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
+ NumEltsC = 8;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
+ NumEltsC = 8;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
+ NumEltsC = 4;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
+ NumEltsC = 4;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
+ NumEltsC = 8;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
NumEltsC = 8;
NumEltsD = 8;
break;
@@ -10231,6 +12031,36 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_memory_size: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *I = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+ return Builder.CreateCall(Callee, I);
+ }
+ case WebAssembly::BI__builtin_wasm_memory_grow: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Args[] = {
+ EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1))
+ };
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
+ return Builder.CreateCall(Callee, Args);
+ }
+ case WebAssembly::BI__builtin_wasm_mem_size: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *I = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
+ return Builder.CreateCall(Callee, I);
+ }
+ case WebAssembly::BI__builtin_wasm_mem_grow: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Args[] = {
+ EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1))
+ };
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
+ return Builder.CreateCall(Callee, Args);
+ }
case WebAssembly::BI__builtin_wasm_current_memory: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
@@ -10262,6 +12092,93 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
SmallVector<llvm::Value *, 4> Ops;
Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
+ // The base pointer is passed by address, so it needs to be loaded.
+ Address BP = EmitPointerWithAlignment(E->getArg(0));
+ BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
+ BP.getAlignment());
+ llvm::Value *Base = Builder.CreateLoad(BP);
+ // Operands are Base, Increment, Modifier, Start.
+ if (HasImm)
+ Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)) };
+ else
+ Ops = { Base, EmitScalarExpr(E->getArg(1)),
+ EmitScalarExpr(E->getArg(2)) };
+
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
+ llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
+ llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
+ NewBase->getType()->getPointerTo());
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ // The intrinsic generates two results. The new value for the base pointer
+ // needs to be stored.
+ Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
+ return Builder.CreateExtractValue(Result, 0);
+ };
+
+ auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
+ // The base pointer is passed by address, so it needs to be loaded.
+ Address BP = EmitPointerWithAlignment(E->getArg(0));
+ BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
+ BP.getAlignment());
+ llvm::Value *Base = Builder.CreateLoad(BP);
+ // Operands are Base, Increment, Modifier, Value, Start.
+ if (HasImm)
+ Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
+ else
+ Ops = { Base, EmitScalarExpr(E->getArg(1)),
+ EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
+
+ llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
+ llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
+ NewBase->getType()->getPointerTo());
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ // The intrinsic generates one result, which is the new value for the base
+ // pointer. It needs to be stored.
+ return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
+ };
+
+ // Handle the conversion of bit-reverse load intrinsics to bit code.
+ // The intrinsic call after this function only reads from memory and the
+ // write to memory is dealt by the store instruction.
+ auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
+ // The intrinsic generates one result, which is the new value for the base
+ // pointer. It needs to be returned. The result of the load instruction is
+ // passed to intrinsic by address, so the value needs to be stored.
+ llvm::Value *BaseAddress =
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+
+ // Expressions like &(*pt++) will be incremented per evaluation.
+ // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
+ // per call.
+ Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
+ DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
+ DestAddr.getAlignment());
+ llvm::Value *DestAddress = DestAddr.getPointer();
+
+ // Operands are Base, Dest, Modifier.
+ // The intrinsic format in LLVM IR is defined as
+ // { ValueType, i8* } (i8*, i32).
+ Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
+
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
+ // The value needs to be stored as the variable is passed by reference.
+ llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
+
+ // The store needs to be truncated to fit the destination type.
+ // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
+ // to be handled with stores of respective destination type.
+ DestVal = Builder.CreateTrunc(DestVal, DestTy);
+
+ llvm::Value *DestForStore =
+ Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
+ Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
+ // The updated value of the base pointer is returned.
+ return Builder.CreateExtractValue(Result, 1);
+ };
+
switch (BuiltinID) {
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
@@ -10307,6 +12224,64 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
+ return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
+ return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
+ case Hexagon::BI__builtin_brev_ldub:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
+ case Hexagon::BI__builtin_brev_ldb:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
+ case Hexagon::BI__builtin_brev_lduh:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
+ case Hexagon::BI__builtin_brev_ldh:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
+ case Hexagon::BI__builtin_brev_ldw:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
+ case Hexagon::BI__builtin_brev_ldd:
+ return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
+ default:
+ break;
} // switch
return nullptr;
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index d24ef0a8a974..5fcc9e011bcb 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -15,17 +15,20 @@
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Format.h"
using namespace clang;
using namespace CodeGen;
namespace {
+constexpr unsigned CudaFatMagic = 0x466243b1;
+constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
class CGNVCUDARuntime : public CGCUDARuntime {
@@ -41,14 +44,22 @@ private:
/// Keeps track of kernel launch stubs emitted in this module
llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
- /// Keeps track of variables containing handles of GPU binaries. Populated by
+ /// Keeps track of variable containing handle of GPU binary. Populated by
/// ModuleCtorFunction() and used to create corresponding cleanup calls in
/// ModuleDtorFunction()
- llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles;
+ llvm::GlobalVariable *GpuBinaryHandle = nullptr;
+ /// Whether we generate relocatable device code.
+ bool RelocatableDeviceCode;
llvm::Constant *getSetupArgumentFn() const;
llvm::Constant *getLaunchFn() const;
+ llvm::FunctionType *getRegisterGlobalsFnTy() const;
+ llvm::FunctionType *getCallbackFnTy() const;
+ llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
+ std::string addPrefixToName(StringRef FuncName) const;
+ std::string addUnderscoredPrefixToName(StringRef FuncName) const;
+
/// Creates a function to register all kernel stubs generated in this module.
llvm::Function *makeRegisterGlobalsFn();
@@ -64,14 +75,34 @@ private:
auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
llvm::GlobalVariable *GV =
cast<llvm::GlobalVariable>(ConstStr.getPointer());
- if (!SectionName.empty())
+ if (!SectionName.empty()) {
GV->setSection(SectionName);
+ // Mark the address as used which make sure that this section isn't
+ // merged and we will really have it in the object file.
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
+ }
if (Alignment)
GV->setAlignment(Alignment);
return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
ConstStr.getPointer(), Zeros);
- }
+ }
+
+ /// Helper function that generates an empty dummy function returning void.
+ llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
+ assert(FnTy->getReturnType()->isVoidTy() &&
+ "Can only generate dummy functions returning void!");
+ llvm::Function *DummyFunc = llvm::Function::Create(
+ FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
+
+ llvm::BasicBlock *DummyBlock =
+ llvm::BasicBlock::Create(Context, "", DummyFunc);
+ CGBuilderTy FuncBuilder(CGM, Context);
+ FuncBuilder.SetInsertPoint(DummyBlock);
+ FuncBuilder.CreateRetVoid();
+
+ return DummyFunc;
+ }
void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
@@ -91,9 +122,22 @@ public:
}
+std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
+ if (CGM.getLangOpts().HIP)
+ return ((Twine("hip") + Twine(FuncName)).str());
+ return ((Twine("cuda") + Twine(FuncName)).str());
+}
+std::string
+CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
+ if (CGM.getLangOpts().HIP)
+ return ((Twine("__hip") + Twine(FuncName)).str());
+ return ((Twine("__cuda") + Twine(FuncName)).str());
+}
+
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
- TheModule(CGM.getModule()) {
+ TheModule(CGM.getModule()),
+ RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
@@ -109,15 +153,37 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
- return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
- Params, false),
- "cudaSetupArgument");
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, Params, false),
+ addPrefixToName("SetupArgument"));
}
llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
- // cudaError_t cudaLaunch(char *)
- return CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
+ if (CGM.getLangOpts().HIP) {
+ // hipError_t hipLaunchByPtr(char *);
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
+ } else {
+ // cudaError_t cudaLaunch(char *);
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
+ }
+}
+
+llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
+ return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
+}
+
+llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
+ return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
+}
+
+llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
+ auto CallbackFnTy = getCallbackFnTy();
+ auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
+ llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
+ VoidPtrTy, CallbackFnTy->getPointerTo()};
+ return llvm::FunctionType::get(VoidTy, Params, false);
}
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
@@ -181,8 +247,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
return nullptr;
llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
- llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
+ getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
+ addUnderscoredPrefixToName("_register_globals"), &TheModule);
llvm::BasicBlock *EntryBB =
llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
CGBuilderTy Builder(CGM, Context);
@@ -195,7 +261,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
- "__cudaRegisterFunction");
+ addUnderscoredPrefixToName("RegisterFunction"));
// Extract GpuBinaryHandle passed as the first argument passed to
// __cuda_register_globals() and generate __cudaRegisterFunction() call for
@@ -219,7 +285,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
IntTy, IntTy};
llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterVarParams, false),
- "__cudaRegisterVar");
+ addUnderscoredPrefixToName("RegisterVar"));
for (auto &Pair : DeviceVars) {
llvm::GlobalVariable *Var = Pair.first;
unsigned Flags = Pair.second;
@@ -243,133 +309,307 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
}
/// Creates a global constructor function for the module:
+///
+/// For CUDA:
/// \code
/// void __cuda_module_ctor(void*) {
-/// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
-/// __cuda_register_globals(Handle0);
-/// ...
-/// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
-/// __cuda_register_globals(HandleN);
+/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
+/// __cuda_register_globals(Handle);
+/// }
+/// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_ctor(void*) {
+/// if (__hip_gpubin_handle == 0) {
+/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
+/// __hip_register_globals(__hip_gpubin_handle);
+/// }
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
- // No need to generate ctors/dtors if there are no GPU binaries.
- if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+ bool IsHIP = CGM.getLangOpts().HIP;
+ // No need to generate ctors/dtors if there is no GPU binary.
+ StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
+ if (CudaGpuBinaryFileName.empty() && !IsHIP)
return nullptr;
- // void __cuda_register_globals(void* handle);
+ // void __{cuda|hip}_register_globals(void* handle);
llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
- // void ** __cudaRegisterFatBinary(void *);
+ // We always need a function to pass in as callback. Create a dummy
+ // implementation if we don't need to register anything.
+ if (RelocatableDeviceCode && !RegisterGlobalsFunc)
+ RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
+
+ // void ** __{cuda|hip}RegisterFatBinary(void *);
llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
- "__cudaRegisterFatBinary");
+ addUnderscoredPrefixToName("RegisterFatBinary"));
// struct { int magic, int version, void * gpu_binary, void * dont_care };
llvm::StructType *FatbinWrapperTy =
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
+ // Register GPU binary with the CUDA runtime, store returned handle in a
+ // global variable and save a reference in GpuBinaryHandle to be cleaned up
+ // in destructor on exit. Then associate all known kernels with the GPU binary
+ // handle so CUDA runtime can figure out what to call on the GPU side.
+ std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;
+ if (!IsHIP) {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
+ llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
+ if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << CudaGpuBinaryFileName << EC.message();
+ return nullptr;
+ }
+ CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
+ }
+
llvm::Function *ModuleCtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
+ llvm::GlobalValue::InternalLinkage,
+ addUnderscoredPrefixToName("_module_ctor"), &TheModule);
llvm::BasicBlock *CtorEntryBB =
llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
CGBuilderTy CtorBuilder(CGM, Context);
CtorBuilder.SetInsertPoint(CtorEntryBB);
- // For each GPU binary, register it with the CUDA runtime and store returned
- // handle in a global variable and save the handle in GpuBinaryHandles vector
- // to be cleaned up in destructor on exit. Then associate all known kernels
- // with the GPU binary handle so CUDA runtime can figure out what to call on
- // the GPU side.
- for (const std::string &GpuBinaryFileName :
- CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
- llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
- if (std::error_code EC = GpuBinaryOrErr.getError()) {
- CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
- << EC.message();
- continue;
- }
-
- const char *FatbinConstantName =
- CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+ const char *FatbinConstantName;
+ const char *FatbinSectionName;
+ const char *ModuleIDSectionName;
+ StringRef ModuleIDPrefix;
+ llvm::Constant *FatBinStr;
+ unsigned FatMagic;
+ if (IsHIP) {
+ FatbinConstantName = ".hip_fatbin";
+ FatbinSectionName = ".hipFatBinSegment";
+
+ ModuleIDSectionName = "__hip_module_id";
+ ModuleIDPrefix = "__hip_";
+
+ // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.
+ // The external symbol is supposed to contain the fat binary but will be
+ // populated somewhere else, e.g. by lld through link script.
+ FatBinStr = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty,
+ /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
+ "__hip_fatbin", nullptr,
+ llvm::GlobalVariable::NotThreadLocal);
+ cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+
+ FatMagic = HIPFatMagic;
+ } else {
+ if (RelocatableDeviceCode)
+ FatbinConstantName = CGM.getTriple().isMacOSX()
+ ? "__NV_CUDA,__nv_relfatbin"
+ : "__nv_relfatbin";
+ else
+ FatbinConstantName =
+ CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
// NVIDIA's cuobjdump looks for fatbins in this section.
- const char *FatbinSectionName =
+ FatbinSectionName =
CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
- // Create initialized wrapper structure that points to the loaded GPU binary
- ConstantInitBuilder Builder(CGM);
- auto Values = Builder.beginStruct(FatbinWrapperTy);
- // Fatbin wrapper magic.
- Values.addInt(IntTy, 0x466243b1);
- // Fatbin version.
- Values.addInt(IntTy, 1);
- // Data.
- Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(),
- "", FatbinConstantName, 8));
- // Unused in fatbin v1.
- Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
- llvm::GlobalVariable *FatbinWrapper =
- Values.finishAndCreateGlobal("__cuda_fatbin_wrapper",
- CGM.getPointerAlign(),
- /*constant*/ true);
- FatbinWrapper->setSection(FatbinSectionName);
+ ModuleIDSectionName = CGM.getTriple().isMacOSX()
+ ? "__NV_CUDA,__nv_module_id"
+ : "__nv_module_id";
+ ModuleIDPrefix = "__nv_";
+
+ // For CUDA, create a string literal containing the fat binary loaded from
+ // the given file.
+ FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatbinConstantName, 8);
+ FatMagic = CudaFatMagic;
+ }
+ // Create initialized wrapper structure that points to the loaded GPU binary
+ ConstantInitBuilder Builder(CGM);
+ auto Values = Builder.beginStruct(FatbinWrapperTy);
+ // Fatbin wrapper magic.
+ Values.addInt(IntTy, FatMagic);
+ // Fatbin version.
+ Values.addInt(IntTy, 1);
+ // Data.
+ Values.add(FatBinStr);
+ // Unused in fatbin v1.
+ Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
+ llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
+ addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
+ /*constant*/ true);
+ FatbinWrapper->setSection(FatbinSectionName);
+
+ // There is only one HIP fat binary per linked module, however there are
+ // multiple constructor functions. Make sure the fat binary is registered
+ // only once. The constructor functions are executed by the dynamic loader
+ // before the program gains control. The dynamic loader cannot execute the
+ // constructor functions concurrently since doing that would not guarantee
+ // thread safety of the loaded program. Therefore we can assume sequential
+ // execution of constructor functions here.
+ if (IsHIP) {
+ llvm::BasicBlock *IfBlock =
+ llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
+ llvm::BasicBlock *ExitBlock =
+ llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
+ // The name, size, and initialization pattern of this variable is part
+ // of HIP ABI.
+ GpuBinaryHandle = new llvm::GlobalVariable(
+ TheModule, VoidPtrPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::LinkOnceAnyLinkage,
+ /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
+ "__hip_gpubin_handle");
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ Address GpuBinaryAddr(
+ GpuBinaryHandle,
+ CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
+ {
+ auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+ llvm::Constant *Zero =
+ llvm::Constant::getNullValue(HandleValue->getType());
+ llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
+ CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
+ }
+ {
+ CtorBuilder.SetInsertPoint(IfBlock);
+ // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
+ llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+ RegisterFatbinFunc,
+ CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+ CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
+ CtorBuilder.CreateBr(ExitBlock);
+ }
+ {
+ CtorBuilder.SetInsertPoint(ExitBlock);
+ // Call __hip_register_globals(GpuBinaryHandle);
+ if (RegisterGlobalsFunc) {
+ auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+ CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
+ }
+ }
+ } else if (!RelocatableDeviceCode) {
+ // Register binary with CUDA runtime. This is substantially different in
+ // default mode vs. separate compilation!
// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
RegisterFatbinFunc,
CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
- llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
+ GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+ GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
// Call __cuda_register_globals(GpuBinaryHandle);
if (RegisterGlobalsFunc)
CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
+ } else {
+ // Generate a unique module ID.
+ SmallString<64> ModuleID;
+ llvm::raw_svector_ostream OS(ModuleID);
+ OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID());
+ llvm::Constant *ModuleIDConstant =
+ makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
+
+ // Create an alias for the FatbinWrapper that nvcc will look for.
+ llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
+ Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
+
+ // void __cudaRegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
+ // void *, void (*)(void **))
+ SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
+ RegisterLinkedBinaryName += ModuleID;
+ llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
+ getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
+
+ assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
+ llvm::Value *Args[] = {RegisterGlobalsFunc,
+ CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
+ ModuleIDConstant,
+ makeDummyFunction(getCallbackFnTy())};
+ CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
+ }
- // Save GpuBinaryHandle so we can unregister it in destructor.
- GpuBinaryHandles.push_back(GpuBinaryHandle);
+ // Create destructor and register it with atexit() the way NVCC does it. Doing
+ // it during regular destructor phase worked in CUDA before 9.2 but results in
+ // double-free in 9.2.
+ if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
+ // extern "C" int atexit(void (*f)(void));
+ llvm::FunctionType *AtExitTy =
+ llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
+ llvm::Constant *AtExitFunc =
+ CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
+ /*Local=*/true);
+ CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
}
CtorBuilder.CreateRetVoid();
return ModuleCtorFunc;
}
-/// Creates a global destructor function that unregisters all GPU code blobs
+/// Creates a global destructor function that unregisters the GPU code blob
/// registered by constructor.
+///
+/// For CUDA:
/// \code
/// void __cuda_module_dtor(void*) {
-/// __cudaUnregisterFatBinary(Handle0);
-/// ...
-/// __cudaUnregisterFatBinary(HandleN);
+/// __cudaUnregisterFatBinary(Handle);
+/// }
+/// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_dtor(void*) {
+/// if (__hip_gpubin_handle) {
+/// __hipUnregisterFatBinary(__hip_gpubin_handle);
+/// __hip_gpubin_handle = 0;
+/// }
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
- // No need for destructor if we don't have handles to unregister.
- if (GpuBinaryHandles.empty())
+ // No need for destructor if we don't have a handle to unregister.
+ if (!GpuBinaryHandle)
return nullptr;
// void __cudaUnregisterFatBinary(void ** handle);
llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
- "__cudaUnregisterFatBinary");
+ addUnderscoredPrefixToName("UnregisterFatBinary"));
llvm::Function *ModuleDtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
+ llvm::GlobalValue::InternalLinkage,
+ addUnderscoredPrefixToName("_module_dtor"), &TheModule);
+
llvm::BasicBlock *DtorEntryBB =
llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
CGBuilderTy DtorBuilder(CGM, Context);
DtorBuilder.SetInsertPoint(DtorEntryBB);
- for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
- auto HandleValue =
- DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
+ Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity(
+ GpuBinaryHandle->getAlignment()));
+ auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
+ // There is only one HIP fat binary per linked module, however there are
+ // multiple destructor functions. Make sure the fat binary is unregistered
+ // only once.
+ if (CGM.getLangOpts().HIP) {
+ llvm::BasicBlock *IfBlock =
+ llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc);
+ llvm::BasicBlock *ExitBlock =
+ llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc);
+ llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
+ llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
+ DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
+
+ DtorBuilder.SetInsertPoint(IfBlock);
DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
- }
+ DtorBuilder.CreateStore(Zero, GpuBinaryAddr);
+ DtorBuilder.CreateBr(ExitBlock);
+ DtorBuilder.SetInsertPoint(ExitBlock);
+ } else {
+ DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
+ }
DtorBuilder.CreateRetVoid();
return ModuleDtorFunc;
}
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 5ef4dc45fba1..475f17b77d92 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -109,17 +109,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
D->getType()->getAs<FunctionType>()->getCallConv())
return true;
- return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base),
- GlobalDecl(BaseD, Dtor_Base));
-}
-
-/// Try to emit a definition as a global alias for another definition.
-/// If \p InEveryTU is true, we know that an equivalent alias can be produced
-/// in every translation unit.
-bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
- GlobalDecl TargetDecl) {
- if (!getCodeGenOpts().CXXCtorDtorAliases)
- return true;
+ GlobalDecl AliasDecl(D, Dtor_Base);
+ GlobalDecl TargetDecl(BaseD, Dtor_Base);
// The alias will use the linkage of the referent. If we can't
// support aliases with that linkage, fail.
@@ -193,6 +184,9 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "",
Aliasee, &getModule());
+ // Destructors are always unnamed_addr.
+ Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
// Switch any previous uses to the alias.
if (Entry) {
assert(Entry->getType() == AliasType &&
@@ -205,7 +199,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
}
// Finally, set up the alias with its proper name and attributes.
- setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
+ SetCommonAttributes(AliasDecl, Alias);
return false;
}
@@ -227,10 +221,9 @@ llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD,
}
setFunctionLinkage(GD, Fn);
- setFunctionDLLStorageClass(GD, Fn);
CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo);
- setFunctionDefinitionAttributes(MD, Fn);
+ setNonAliasAttributes(GD, Fn);
SetLLVMFunctionAttributesForDefinition(MD, Fn);
return Fn;
}
@@ -243,6 +236,11 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor(
if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
GD = GlobalDecl(CD, toCXXCtorType(Type));
} else {
+ // Always alias equivalent complete destructors to base destructors in the
+ // MS ABI.
+ if (getTarget().getCXXABI().isMicrosoft() &&
+ Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0)
+ Type = StructorType::Base;
GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type));
}
@@ -263,7 +261,6 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
const CXXRecordDecl *RD) {
assert(!CGF.CGM.getTarget().getCXXABI().isMicrosoft() &&
"No kext in Microsoft ABI");
- GD = GD.getCanonicalDecl();
CodeGenModule &CGM = CGF.CGM;
llvm::Value *VTable = CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
Ty = Ty->getPointerTo()->getPointerTo();
@@ -279,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
llvm::Value *VFunc =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
- CGCallee Callee(GD.getDecl(), VFunc);
+ CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc);
return Callee;
}
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index a27c3e9d27e3..0611749acf17 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -287,6 +287,20 @@ CGCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
return nullptr;
}
+void CGCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
+ const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const {
+ // Assume the base C++ ABI has no special rules for destructor variants.
+ CGM.setDLLImportDLLExport(GV, Dtor);
+}
+
+llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage(
+ GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const {
+ // Delegate back to CGM by default.
+ return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
+ /*isConstantVariable=*/false);
+}
+
bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
return false;
}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 83426dc3a03c..65b50e14f436 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -40,7 +40,7 @@ class CodeGenFunction;
class CodeGenModule;
struct CatchTypeInfo;
-/// \brief Implements C++ ABI-specific code generation functions.
+/// Implements C++ ABI-specific code generation functions.
class CGCXXABI {
protected:
CodeGenModule &CGM;
@@ -222,7 +222,7 @@ protected:
/// is required.
llvm::Constant *getMemberPointerAdjustment(const CastExpr *E);
- /// \brief Computes the non-virtual adjustment needed for a member pointer
+ /// Computes the non-virtual adjustment needed for a member pointer
/// conversion along an inheritance path stored in an APValue. Unlike
/// getMemberPointerAdjustment(), the adjustment can be negative if the path
/// is from a derived type to a base type.
@@ -237,7 +237,7 @@ public:
virtual void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) = 0;
virtual llvm::GlobalVariable *getThrowInfo(QualType T) { return nullptr; }
- /// \brief Determine whether it's possible to emit a vtable for \p RD, even
+ /// Determine whether it's possible to emit a vtable for \p RD, even
/// though we do not know that the vtable has been marked as used by semantic
/// analysis.
virtual bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const = 0;
@@ -319,6 +319,14 @@ public:
virtual bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
CXXDtorType DT) const = 0;
+ virtual void setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
+ const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const;
+
+ virtual llvm::GlobalValue::LinkageTypes
+ getCXXDestructorLinkage(GVALinkage Linkage, const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const;
+
/// Emit destructor variants required by this ABI.
virtual void EmitCXXDestructors(const CXXDestructorDecl *D) = 0;
@@ -414,8 +422,7 @@ public:
/// Build a virtual function pointer in the ABI-specific way.
virtual CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF,
- GlobalDecl GD,
- Address This,
+ GlobalDecl GD, Address This,
llvm::Type *Ty,
SourceLocation Loc) = 0;
@@ -434,6 +441,7 @@ public:
/// base tables.
virtual void emitVirtualInheritanceTables(const CXXRecordDecl *RD) = 0;
+ virtual bool exportThunk() = 0;
virtual void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
GlobalDecl GD, bool ReturnAdjustment) = 0;
@@ -599,6 +607,17 @@ CGCXXABI *CreateItaniumCXXABI(CodeGenModule &CGM);
/// Creates a Microsoft-family ABI.
CGCXXABI *CreateMicrosoftCXXABI(CodeGenModule &CGM);
+struct CatchRetScope final : EHScopeStack::Cleanup {
+ llvm::CatchPadInst *CPI;
+
+ CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {}
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest");
+ CGF.Builder.CreateCatchRet(CPI, BB);
+ CGF.EmitBlock(BB);
+ }
+};
}
}
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 38d7344572d3..f066ce168588 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -29,15 +29,15 @@
#include "clang/CodeGen/SwiftCallingConv.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Intrinsics.h"
using namespace clang;
using namespace CodeGen;
@@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
}
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
+ const FunctionDecl *FD) {
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ const FunctionType *FT = FTy->getAs<FunctionType>();
+ CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
+ FTy = FT->getCanonicalTypeUnqualified();
+ }
+}
+
/// Arrange the argument and result information for a declaration or
/// definition of the given C++ non-static member function. The
/// member function must be an ordinary function, i.e. not a
@@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
- CanQual<FunctionProtoType> prototype = GetFormalType(MD);
+ CanQualType FT = GetFormalType(MD).getAs<Type>();
+ setCUDAKernelCallingConvention(FT, CGM, MD);
+ auto prototype = FT.getAs<FunctionProtoType>();
if (MD->isInstance()) {
// The abstract case is perfectly fine.
@@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
assert(isa<FunctionType>(FTy));
+ setCUDAKernelCallingConvention(FTy, CGM, FD);
// When declaring a function without a prototype, always use a
// non-variadic type.
@@ -513,8 +526,8 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
/// correct type, and the caller will bitcast the function to the correct
/// prototype.
const CGFunctionInfo &
-CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) {
- assert(MD->isVirtual() && "only virtual memptrs have thunks");
+CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
+ assert(MD->isVirtual() && "only methods have thunks");
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
@@ -803,6 +816,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
FI->NoReturn = info.getNoReturn();
FI->ReturnsRetained = info.getProducesResult();
FI->NoCallerSavedRegs = info.getNoCallerSavedRegs();
+ FI->NoCfCheck = info.getNoCfCheck();
FI->Required = required;
FI->HasRegParm = info.getHasRegParm();
FI->RegParm = info.getRegParm();
@@ -904,8 +918,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
CharUnits UnionSize = CharUnits::Zero();
for (const auto *FD : RD->fields()) {
- // Skip zero length bitfields.
- if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+ if (FD->isZeroLengthBitField(Context))
continue;
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
@@ -926,8 +939,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
}
for (const auto *FD : RD->fields()) {
- // Skip zero length bitfields.
- if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+ if (FD->isZeroLengthBitField(Context))
continue;
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
@@ -1040,42 +1052,49 @@ void CodeGenFunction::ExpandTypeFromArgs(
}
void CodeGenFunction::ExpandTypeToArgs(
- QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy,
+ QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) {
auto Exp = getTypeExpansion(Ty, getContext());
if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
- forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(),
- [&](Address EltAddr) {
- RValue EltRV =
- convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
- ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos);
- });
+ Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+ : Arg.getKnownRValue().getAggregateAddress();
+ forConstantArrayExpansion(
+ *this, CAExp, Addr, [&](Address EltAddr) {
+ CallArg EltArg = CallArg(
+ convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()),
+ CAExp->EltTy);
+ ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs,
+ IRCallArgPos);
+ });
} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
- Address This = RV.getAggregateAddress();
+ Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+ : Arg.getKnownRValue().getAggregateAddress();
for (const CXXBaseSpecifier *BS : RExp->Bases) {
// Perform a single step derived-to-base conversion.
Address Base =
GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
/*NullCheckValue=*/false, SourceLocation());
- RValue BaseRV = RValue::getAggregate(Base);
+ CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType());
// Recurse onto bases.
- ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs,
+ ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
}
LValue LV = MakeAddrLValue(This, Ty);
for (auto FD : RExp->Fields) {
- RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
- ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs,
+ CallArg FldArg =
+ CallArg(EmitRValueForField(LV, FD, SourceLocation()), FD->getType());
+ ExpandTypeToArgs(FD->getType(), FldArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
}
} else if (isa<ComplexExpansion>(Exp.get())) {
- ComplexPairTy CV = RV.getComplexVal();
+ ComplexPairTy CV = Arg.getKnownRValue().getComplexVal();
IRCallArgs[IRCallArgPos++] = CV.first;
IRCallArgs[IRCallArgPos++] = CV.second;
} else {
assert(isa<NoExpansion>(Exp.get()));
+ auto RV = Arg.getKnownRValue();
assert(RV.isScalar() &&
"Unexpected non-scalar rvalue during struct expansion.");
@@ -1479,7 +1498,8 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
/***/
bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
- return FI.getReturnInfo().isIndirect();
+ const auto &RI = FI.getReturnInfo();
+ return RI.isIndirect() || (RI.isInAlloca() && RI.getInAllocaSRet());
}
bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
@@ -1672,7 +1692,7 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
return;
if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
- FPT->isNothrow(Ctx))
+ FPT->isNothrow())
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
@@ -1714,12 +1734,19 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute("less-precise-fpmad",
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
+ if (CodeGenOpts.NullPointerIsValid)
+ FuncAttrs.addAttribute("null-pointer-is-valid", "true");
if (!CodeGenOpts.FPDenormalMode.empty())
FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
FuncAttrs.addAttribute("no-trapping-math",
llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+ // Strict (compliant) code is the default, so only add this attribute to
+ // indicate that we are trying to workaround a problem case.
+ if (!CodeGenOpts.StrictFloatCastOverflow)
+ FuncAttrs.addAttribute("strict-float-cast-overflow", "false");
+
// TODO: Are these all needed?
// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
FuncAttrs.addAttribute("no-infs-fp-math",
@@ -1738,6 +1765,10 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
"correctly-rounded-divide-sqrt-fp-math",
llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
+ if (getLangOpts().OpenCL)
+ FuncAttrs.addAttribute("denorms-are-zero",
+ llvm::toStringRef(CodeGenOpts.FlushDenorm));
+
// TODO: Reciprocal estimate codegen options should apply to instructions?
const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals;
if (!Recips.empty())
@@ -1769,7 +1800,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
// Respect -fcuda-flush-denormals-to-zero.
- if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+ if (CodeGenOpts.FlushDenorm)
FuncAttrs.addAttribute("nvptx-f32ftz", "true");
}
}
@@ -1793,7 +1824,7 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
// If we have information about the function prototype, we can learn
- // attributes form there.
+ // attributes from there.
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
@@ -1838,18 +1869,20 @@ void CodeGenModule::ConstructAttributeList(
}
if (TargetDecl->hasAttr<RestrictAttr>())
RetAttrs.addAttribute(llvm::Attribute::NoAlias);
- if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
+ if (TargetDecl->hasAttr<ReturnsNonNullAttr>() &&
+ !CodeGenOpts.NullPointerIsValid)
RetAttrs.addAttribute(llvm::Attribute::NonNull);
if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>())
FuncAttrs.addAttribute("no_caller_saved_registers");
+ if (TargetDecl->hasAttr<AnyX86NoCfCheckAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Optional<unsigned> NumElemsParam;
- // alloc_size args are base-1, 0 means not present.
- if (unsigned N = AllocSize->getNumElemsParam())
- NumElemsParam = N - 1;
- FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam() - 1,
+ if (AllocSize->getNumElemsParam().isValid())
+ NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();
+ FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),
NumElemsParam);
}
}
@@ -1870,53 +1903,40 @@ void CodeGenModule::ConstructAttributeList(
}
}
- if (!AttrOnCallSite) {
- bool DisableTailCalls =
- CodeGenOpts.DisableTailCalls ||
- (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
- TargetDecl->hasAttr<AnyX86InterruptAttr>()));
- FuncAttrs.addAttribute("disable-tail-calls",
- llvm::toStringRef(DisableTailCalls));
-
- // Add target-cpu and target-features attributes to functions. If
- // we have a decl for the function and it has a target attribute then
- // parse that and add it to the feature set.
- StringRef TargetCPU = getTarget().getTargetOpts().CPU;
- std::vector<std::string> Features;
- const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
- if (FD && FD->hasAttr<TargetAttr>()) {
- llvm::StringMap<bool> FeatureMap;
- getFunctionFeatureMap(FeatureMap, FD);
-
- // Produce the canonical string for this set of features.
- for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(),
- ie = FeatureMap.end();
- it != ie; ++it)
- Features.push_back((it->second ? "+" : "-") + it->first().str());
-
- // Now add the target-cpu and target-features to the function.
- // While we populated the feature map above, we still need to
- // get and parse the target attribute so we can get the cpu for
- // the function.
- const auto *TD = FD->getAttr<TargetAttr>();
- TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
- if (ParsedAttr.Architecture != "" &&
- getTarget().isValidCPUName(ParsedAttr.Architecture))
- TargetCPU = ParsedAttr.Architecture;
+ if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+ if (getLangOpts().OpenCLVersion <= 120) {
+ // OpenCL v1.2 Work groups are always uniform
+ FuncAttrs.addAttribute("uniform-work-group-size", "true");
} else {
- // Otherwise just add the existing target cpu and target features to the
- // function.
- Features = getTarget().getTargetOpts().Features;
+ // OpenCL v2.0 Work groups may be whether uniform or not.
+ // '-cl-uniform-work-group-size' compile option gets a hint
+ // to the compiler that the global work-size be a multiple of
+ // the work-group size specified to clEnqueueNDRangeKernel
+ // (i.e. work groups are uniform).
+ FuncAttrs.addAttribute("uniform-work-group-size",
+ llvm::toStringRef(CodeGenOpts.UniformWGSize));
}
+ }
- if (TargetCPU != "")
- FuncAttrs.addAttribute("target-cpu", TargetCPU);
- if (!Features.empty()) {
- std::sort(Features.begin(), Features.end());
- FuncAttrs.addAttribute(
- "target-features",
- llvm::join(Features, ","));
+ if (!AttrOnCallSite) {
+ bool DisableTailCalls = false;
+
+ if (CodeGenOpts.DisableTailCalls)
+ DisableTailCalls = true;
+ else if (TargetDecl) {
+ if (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+ TargetDecl->hasAttr<AnyX86InterruptAttr>())
+ DisableTailCalls = true;
+ else if (CodeGenOpts.NoEscapingBlockTailCalls) {
+ if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl))
+ if (!BD->doesNotEscape())
+ DisableTailCalls = true;
+ }
}
+
+ FuncAttrs.addAttribute("disable-tail-calls",
+ llvm::toStringRef(DisableTailCalls));
+ GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs);
}
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
@@ -1925,9 +1945,9 @@ void CodeGenModule::ConstructAttributeList(
const ABIArgInfo &RetAI = FI.getReturnInfo();
switch (RetAI.getKind()) {
case ABIArgInfo::Extend:
- if (RetTy->hasSignedIntegerRepresentation())
+ if (RetAI.isSignExt())
RetAttrs.addAttribute(llvm::Attribute::SExt);
- else if (RetTy->hasUnsignedIntegerRepresentation())
+ else
RetAttrs.addAttribute(llvm::Attribute::ZExt);
LLVM_FALLTHROUGH;
case ABIArgInfo::Direct:
@@ -1957,7 +1977,8 @@ void CodeGenModule::ConstructAttributeList(
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
.getQuantity());
- else if (getContext().getTargetAddressSpace(PTy) == 0)
+ else if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ !CodeGenOpts.NullPointerIsValid)
RetAttrs.addAttribute(llvm::Attribute::NonNull);
}
@@ -1967,7 +1988,8 @@ void CodeGenModule::ConstructAttributeList(
// Attach attributes to sret.
if (IRFunctionArgs.hasSRetArg()) {
llvm::AttrBuilder SRETAttrs;
- SRETAttrs.addAttribute(llvm::Attribute::StructRet);
+ if (!RetAI.getSuppressSRet())
+ SRETAttrs.addAttribute(llvm::Attribute::StructRet);
hasUsedSRet = true;
if (RetAI.getInReg())
SRETAttrs.addAttribute(llvm::Attribute::InReg);
@@ -2006,14 +2028,10 @@ void CodeGenModule::ConstructAttributeList(
// sense to do it here because parameters are so messed up.
switch (AI.getKind()) {
case ABIArgInfo::Extend:
- if (ParamType->isSignedIntegerOrEnumerationType())
+ if (AI.isSignExt())
Attrs.addAttribute(llvm::Attribute::SExt);
- else if (ParamType->isUnsignedIntegerOrEnumerationType()) {
- if (getTypes().getABIInfo().shouldSignExtUnsignedType(ParamType))
- Attrs.addAttribute(llvm::Attribute::SExt);
- else
- Attrs.addAttribute(llvm::Attribute::ZExt);
- }
+ else
+ Attrs.addAttribute(llvm::Attribute::ZExt);
LLVM_FALLTHROUGH;
case ABIArgInfo::Direct:
if (ArgNo == 0 && FI.isChainCall())
@@ -2070,7 +2088,8 @@ void CodeGenModule::ConstructAttributeList(
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
.getQuantity());
- else if (getContext().getTargetAddressSpace(PTy) == 0)
+ else if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ !CodeGenOpts.NullPointerIsValid)
Attrs.addAttribute(llvm::Attribute::NonNull);
}
@@ -2255,11 +2274,16 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
i != e; ++i, ++info_it, ++ArgNo) {
const VarDecl *Arg = *i;
- QualType Ty = info_it->type;
const ABIArgInfo &ArgI = info_it->info;
bool isPromoted =
isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted();
+ // We are converting from ABIArgInfo type to VarDecl type directly, unless
+ // the parameter is promoted. In this case we convert to
+ // CGFunctionInfo::ArgInfo type with subsequent argument demotion.
+ QualType Ty = isPromoted ? info_it->type : Arg->getType();
+ assert(hasScalarEvaluationKind(Ty) ==
+ hasScalarEvaluationKind(Arg->getType()));
unsigned FirstIRArg, NumIRArgs;
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
@@ -2325,7 +2349,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
- PVD->getFunctionScopeIndex()))
+ PVD->getFunctionScopeIndex()) &&
+ !CGM.getCodeGenOpts().NullPointerIsValid)
AI->addAttr(llvm::Attribute::NonNull);
QualType OTy = PVD->getOriginalType();
@@ -2344,7 +2369,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
Attrs.addDereferenceableAttr(
getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
AI->addAttrs(Attrs);
- } else if (getContext().getTargetAddressSpace(ETy) == 0) {
+ } else if (getContext().getTargetAddressSpace(ETy) == 0 &&
+ !CGM.getCodeGenOpts().NullPointerIsValid) {
AI->addAttr(llvm::Attribute::NonNull);
}
}
@@ -2354,7 +2380,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// we can't use the dereferenceable attribute, but in addrspace(0)
// we know that it must be nonnull.
if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
- !getContext().getTargetAddressSpace(ArrTy->getElementType()))
+ !getContext().getTargetAddressSpace(ArrTy->getElementType()) &&
+ !CGM.getCodeGenOpts().NullPointerIsValid)
AI->addAttr(llvm::Attribute::NonNull);
}
@@ -3022,7 +3049,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,
Ty.getQualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
}
void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
@@ -3062,6 +3090,19 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
} else {
args.add(convertTempToRValue(local, type, loc), type);
}
+
+ // Deactivate the cleanup for the callee-destructed param that was pushed.
+ if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk &&
+ type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee() &&
+ type.isDestructedType()) {
+ EHScopeStack::stable_iterator cleanup =
+ CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param));
+ assert(cleanup.isValid() &&
+ "cleanup for callee-destructed param not recorded");
+ // This unreachable is a temporary marker which will be removed later.
+ llvm::Instruction *isActive = Builder.CreateUnreachable();
+ args.addArgCleanupDeactivation(cleanup, isActive);
+ }
}
static bool isProvablyNull(llvm::Value *addr) {
@@ -3143,7 +3184,6 @@ static void emitWritebacks(CodeGenFunction &CGF,
static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
const CallArgList &CallArgs) {
- assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
ArrayRef<CallArgList::CallArgCleanup> Cleanups =
CallArgs.getCleanupsToDeactivate();
// Iterate in reverse to increase the likelihood of popping the cleanup.
@@ -3430,13 +3470,17 @@ void CodeGenFunction::EmitCallArgs(
assert(InitialArgSize + 1 == Args.size() &&
"The code below depends on only adding one arg per EmitCallArg");
(void)InitialArgSize;
- RValue RVArg = Args.back().RV;
- EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
- ParamsToSkip + Idx);
- // @llvm.objectsize should never have side-effects and shouldn't need
- // destruction/cleanups, so we can safely "emit" it after its arg,
- // regardless of right-to-leftness
- MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
+ // Since pointer argument are never emitted as LValue, it is safe to emit
+ // non-null argument check for r-value only.
+ if (!Args.back().hasLValue()) {
+ RValue RVArg = Args.back().getKnownRValue();
+ EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
+ ParamsToSkip + Idx);
+ // @llvm.objectsize should never have side-effects and shouldn't need
+ // destruction/cleanups, so we can safely "emit" it after its arg,
+ // regardless of right-to-leftness
+ MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
+ }
}
if (!LeftToRight) {
@@ -3456,10 +3500,15 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup {
QualType Ty;
void Emit(CodeGenFunction &CGF, Flags flags) override {
- const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
- assert(!Dtor->isTrivial());
- CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
- /*Delegating=*/false, Addr);
+ QualType::DestructionKind DtorKind = Ty.isDestructedType();
+ if (DtorKind == QualType::DK_cxx_destructor) {
+ const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
+ assert(!Dtor->isTrivial());
+ CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
+ /*Delegating=*/false, Addr);
+ } else {
+ CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty));
+ }
}
};
@@ -3478,6 +3527,33 @@ struct DisableDebugLocationUpdates {
} // end anonymous namespace
+RValue CallArg::getRValue(CodeGenFunction &CGF) const {
+ if (!HasLV)
+ return RV;
+ LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty);
+ CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap,
+ LV.isVolatile());
+ IsUsed = true;
+ return RValue::getAggregate(Copy.getAddress());
+}
+
+void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
+ LValue Dst = CGF.MakeAddrLValue(Addr, Ty);
+ if (!HasLV && RV.isScalar())
+ CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true);
+ else if (!HasLV && RV.isComplex())
+ CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true);
+ else {
+ auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress();
+ LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty);
+ // We assume that call args are never copied into subobjects.
+ CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap,
+ HasLV ? LV.isVolatileQualified()
+ : RV.isVolatileQualified());
+ }
+ IsUsed = true;
+}
+
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
QualType type) {
DisableDebugLocationUpdates Dis(*this, E);
@@ -3501,7 +3577,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
// However, we still have to push an EH-only cleanup in case we unwind before
// we make it to the call.
if (HasAggregateEvalKind &&
- CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+ type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
// If we're using inalloca, use the argument memory. Otherwise, use a
// temporary.
AggValueSlot Slot;
@@ -3510,10 +3586,12 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
else
Slot = CreateAggTemp(type, "agg.tmp");
- const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
- bool DestroyedInCallee =
- RD && RD->hasNonTrivialDestructor() &&
- CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+ bool DestroyedInCallee = true, NeedsEHCleanup = true;
+ if (const auto *RD = type->getAsCXXRecordDecl())
+ DestroyedInCallee = RD->hasNonTrivialDestructor();
+ else
+ NeedsEHCleanup = needsEHCleanup(type.isDestructedType());
+
if (DestroyedInCallee)
Slot.setExternallyDestructed();
@@ -3521,7 +3599,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
RValue RV = Slot.asRValue();
args.add(RV, type);
- if (DestroyedInCallee) {
+ if (DestroyedInCallee && NeedsEHCleanup) {
// Create a no-op GEP between the placeholder and the cleanup so we can
// RAUW it successfully. It also serves as a marker of the first
// instruction where the cleanup is active.
@@ -3538,15 +3616,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
assert(L.isSimple());
- if (L.getAlignment() >= getContext().getTypeAlignInChars(type)) {
- args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true);
- } else {
- // We can't represent a misaligned lvalue in the CallArgList, so copy
- // to an aligned temporary now.
- Address tmp = CreateMemTemp(type);
- EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile());
- args.add(RValue::getAggregate(tmp), type);
- }
+ args.addUncopiedAggregate(L, type);
return;
}
@@ -3608,20 +3678,21 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
// Calls which may throw must have operand bundles indicating which funclet
// they are nested within.
-static void
-getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad,
- SmallVectorImpl<llvm::OperandBundleDef> &BundleList) {
+SmallVector<llvm::OperandBundleDef, 1>
+CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) {
+ SmallVector<llvm::OperandBundleDef, 1> BundleList;
// There is no need for a funclet operand bundle if we aren't inside a
// funclet.
if (!CurrentFuncletPad)
- return;
+ return BundleList;
// Skip intrinsics which cannot throw.
auto *CalleeFn = dyn_cast<llvm::Function>(Callee->stripPointerCasts());
if (CalleeFn && CalleeFn->isIntrinsic() && CalleeFn->doesNotThrow())
- return;
+ return BundleList;
BundleList.emplace_back("funclet", CurrentFuncletPad);
+ return BundleList;
}
/// Emits a simple call (never an invoke) to the given runtime function.
@@ -3629,10 +3700,8 @@ llvm::CallInst *
CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
ArrayRef<llvm::Value*> args,
const llvm::Twine &name) {
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
-
- llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name);
+ llvm::CallInst *call =
+ Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name);
call->setCallingConv(getRuntimeCC());
return call;
}
@@ -3640,8 +3709,8 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
/// Emits a call or invoke to the given noreturn runtime function.
void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
ArrayRef<llvm::Value*> args) {
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList =
+ getBundlesForFunclet(callee);
if (getInvokeDest()) {
llvm::InvokeInst *invoke =
@@ -3684,8 +3753,8 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
ArrayRef<llvm::Value *> Args,
const Twine &Name) {
llvm::BasicBlock *InvokeDest = getInvokeDest();
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList =
+ getBundlesForFunclet(Callee);
llvm::Instruction *Inst;
if (!InvokeDest)
@@ -3705,16 +3774,6 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
return llvm::CallSite(Inst);
}
-/// \brief Store a non-aggregate value to an address to initialize it. For
-/// initialization, a non-atomic store will be used.
-static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src,
- LValue Dst) {
- if (Src.isScalar())
- CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true);
- else
- CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true);
-}
-
void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
llvm::Value *New) {
DeferredReplacements.push_back(std::make_pair(Old, New));
@@ -3728,7 +3787,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
SourceLocation Loc) {
// FIXME: We no longer need the types from CallArgs; lift up and simplify.
- assert(Callee.isOrdinary());
+ assert(Callee.isOrdinary() || Callee.isVirtual());
// Handle struct-return functions by passing a pointer to the
// location that we would like to return into.
@@ -3775,17 +3834,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
Address SRetPtr = Address::invalid();
- size_t UnusedReturnSize = 0;
+ Address SRetAlloca = Address::invalid();
+ llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
if (!ReturnValue.isNull()) {
SRetPtr = ReturnValue.getValue();
} else {
- SRetPtr = CreateMemTemp(RetTy);
+ SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca);
if (HaveInsertPoint() && ReturnValue.isUnused()) {
uint64_t size =
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
- if (EmitLifetimeStart(size, SRetPtr.getPointer()))
- UnusedReturnSize = size;
+ UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer());
}
}
if (IRFunctionArgs.hasSRetArg()) {
@@ -3807,7 +3866,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
I != E; ++I, ++info_it, ++ArgNo) {
const ABIArgInfo &ArgInfo = info_it->info;
- RValue RV = I->RV;
// Insert a padding argument to ensure proper alignment.
if (IRFunctionArgs.hasPaddingArg(ArgNo))
@@ -3821,13 +3879,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::InAlloca: {
assert(NumIRArgs == 0);
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
- if (RV.isAggregate()) {
+ if (I->isAggregate()) {
// Replace the placeholder with the appropriate argument slot GEP.
+ Address Addr = I->hasLValue()
+ ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
llvm::Instruction *Placeholder =
- cast<llvm::Instruction>(RV.getAggregatePointer());
+ cast<llvm::Instruction>(Addr.getPointer());
CGBuilderTy::InsertPoint IP = Builder.saveIP();
Builder.SetInsertPoint(Placeholder);
- Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex());
+ Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex());
Builder.restoreIP(IP);
deferPlaceholderReplacement(Placeholder, Addr.getPointer());
} else {
@@ -3840,22 +3901,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// from {}* to (%struct.foo*)*.
if (Addr.getType() != MemType)
Addr = Builder.CreateBitCast(Addr, MemType);
- LValue argLV = MakeAddrLValue(Addr, I->Ty);
- EmitInitStoreOfNonAggregate(*this, RV, argLV);
+ I->copyInto(*this, Addr);
}
break;
}
case ABIArgInfo::Indirect: {
assert(NumIRArgs == 1);
- if (RV.isScalar() || RV.isComplex()) {
+ if (!I->isAggregate()) {
// Make a temporary alloca to pass the argument.
- Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "indirect-arg-temp", false);
+ Address Addr = CreateMemTempWithoutCast(
+ I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
IRCallArgs[FirstIRArg] = Addr.getPointer();
- LValue argLV = MakeAddrLValue(Addr, I->Ty);
- EmitInitStoreOfNonAggregate(*this, RV, argLV);
+ I->copyInto(*this, Addr);
} else {
// We want to avoid creating an unnecessary temporary+copy here;
// however, we need one in three cases:
@@ -3863,30 +3922,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// source. (This case doesn't occur on any common architecture.)
// 2. If the argument is byval, RV is not sufficiently aligned, and
// we cannot force it to be sufficiently aligned.
- // 3. If the argument is byval, but RV is located in an address space
- // different than that of the argument (0).
- Address Addr = RV.getAggregateAddress();
+ // 3. If the argument is byval, but RV is not located in default
+ // or alloca address space.
+ Address Addr = I->hasLValue()
+ ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
+ llvm::Value *V = Addr.getPointer();
CharUnits Align = ArgInfo.getIndirectAlign();
const llvm::DataLayout *TD = &CGM.getDataLayout();
- const unsigned RVAddrSpace = Addr.getType()->getAddressSpace();
- const unsigned ArgAddrSpace =
- (FirstIRArg < IRFuncTy->getNumParams()
- ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
- : 0);
- if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
- (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align &&
- llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
- Align.getQuantity(), *TD)
- < Align.getQuantity()) ||
- (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
+
+ assert((FirstIRArg >= IRFuncTy->getNumParams() ||
+ IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() ==
+ TD->getAllocaAddrSpace()) &&
+ "indirect argument must be in alloca address space");
+
+ bool NeedCopy = false;
+
+ if (Addr.getAlignment() < Align &&
+ llvm::getOrEnforceKnownAlignment(V, Align.getQuantity(), *TD) <
+ Align.getQuantity()) {
+ NeedCopy = true;
+ } else if (I->hasLValue()) {
+ auto LV = I->getKnownLValue();
+ auto AS = LV.getAddressSpace();
+ if ((!ArgInfo.getIndirectByVal() &&
+ (LV.getAlignment() >=
+ getContext().getTypeAlignInChars(I->Ty))) ||
+ (ArgInfo.getIndirectByVal() &&
+ ((AS != LangAS::Default && AS != LangAS::opencl_private &&
+ AS != CGM.getASTAllocaAddressSpace())))) {
+ NeedCopy = true;
+ }
+ }
+ if (NeedCopy) {
// Create an aligned temporary, and copy to it.
- Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "byval-temp", false);
+ Address AI = CreateMemTempWithoutCast(
+ I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
IRCallArgs[FirstIRArg] = AI.getPointer();
- EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
+ I->copyInto(*this, AI);
} else {
// Skip the extra memcpy call.
- IRCallArgs[FirstIRArg] = Addr.getPointer();
+ auto *T = V->getType()->getPointerElementType()->getPointerTo(
+ CGM.getDataLayout().getAllocaAddrSpace());
+ IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast(
+ *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
+ true);
}
}
break;
@@ -3903,10 +3983,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
ArgInfo.getDirectOffset() == 0) {
assert(NumIRArgs == 1);
llvm::Value *V;
- if (RV.isScalar())
- V = RV.getScalarVal();
+ if (!I->isAggregate())
+ V = I->getKnownRValue().getScalarVal();
else
- V = Builder.CreateLoad(RV.getAggregateAddress());
+ V = Builder.CreateLoad(
+ I->hasLValue() ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress());
// Implement swifterror by copying into a new swifterror argument.
// We'll write back in the normal path out of the call.
@@ -3944,12 +4026,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// FIXME: Avoid the conversion through memory if possible.
Address Src = Address::invalid();
- if (RV.isScalar() || RV.isComplex()) {
+ if (!I->isAggregate()) {
Src = CreateMemTemp(I->Ty, "coerce");
- LValue SrcLV = MakeAddrLValue(Src, I->Ty);
- EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
+ I->copyInto(*this, Src);
} else {
- Src = RV.getAggregateAddress();
+ Src = I->hasLValue() ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
}
// If the value is offset in memory, apply the offset now.
@@ -4003,22 +4085,26 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *tempSize = nullptr;
Address addr = Address::invalid();
- if (RV.isAggregate()) {
- addr = RV.getAggregateAddress();
+ Address AllocaAddr = Address::invalid();
+ if (I->isAggregate()) {
+ addr = I->hasLValue() ? I->getKnownLValue().getAddress()
+ : I->getKnownRValue().getAggregateAddress();
+
} else {
+ RValue RV = I->getKnownRValue();
assert(RV.isScalar()); // complex should always just be direct
llvm::Type *scalarType = RV.getScalarVal()->getType();
auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
- tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize);
-
// Materialize to a temporary.
addr = CreateTempAlloca(RV.getScalarVal()->getType(),
- CharUnits::fromQuantity(std::max(layout->getAlignment(),
- scalarAlign)));
- EmitLifetimeStart(scalarSize, addr.getPointer());
+ CharUnits::fromQuantity(std::max(
+ layout->getAlignment(), scalarAlign)),
+ "tmp",
+ /*ArraySize=*/nullptr, &AllocaAddr);
+ tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());
Builder.CreateStore(RV.getScalarVal(), addr);
}
@@ -4036,7 +4122,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(IRArgPos == FirstIRArg + NumIRArgs);
if (tempSize) {
- EmitLifetimeEnd(tempSize, addr.getPointer());
+ EmitLifetimeEnd(tempSize, AllocaAddr.getPointer());
}
break;
@@ -4044,13 +4130,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::Expand:
unsigned IRArgPos = FirstIRArg;
- ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos);
+ ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos);
assert(IRArgPos == FirstIRArg + NumIRArgs);
break;
}
}
- llvm::Value *CalleePtr = Callee.getFunctionPointer();
+ const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
+ llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer();
// If we're using inalloca, set up that argument.
if (ArgMemory.isValid()) {
@@ -4191,10 +4278,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
}
+
+ // If we made a temporary, be sure to clean up after ourselves. Note that we
+ // can't depend on being inside of an ExprWithCleanups, so we need to manually
+ // pop this cleanup later on. Being eager about this is OK, since this
+ // temporary is 'invisible' outside of the callee.
+ if (UnusedReturnSizePtr)
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca,
+ UnusedReturnSizePtr);
+
llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
- SmallVector<llvm::OperandBundleDef, 1> BundleList;
- getBundlesForFunclet(CalleePtr, CurrentFuncletPad, BundleList);
+ SmallVector<llvm::OperandBundleDef, 1> BundleList =
+ getBundlesForFunclet(CalleePtr);
// Emit the actual call/invoke instruction.
llvm::CallSite CS;
@@ -4244,9 +4340,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// insertion point; this allows the rest of IRGen to discard
// unreachable code.
if (CS.doesNotReturn()) {
- if (UnusedReturnSize)
- EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
- SRetPtr.getPointer());
+ if (UnusedReturnSizePtr)
+ PopCleanupBlock();
// Strip away the noreturn attribute to better diagnose unreachable UB.
if (SanOpts.has(SanitizerKind::Unreachable)) {
@@ -4315,9 +4410,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::InAlloca:
case ABIArgInfo::Indirect: {
RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
- if (UnusedReturnSize)
- EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
- SRetPtr.getPointer());
+ if (UnusedReturnSizePtr)
+ PopCleanupBlock();
return ret;
}
@@ -4395,7 +4489,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
llvm::Value *ParamVal =
- CallArgs[AA->getParamIndex() - 1].RV.getScalarVal();
+ CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue(
+ *this).getScalarVal();
EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
}
}
@@ -4403,6 +4498,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
return Ret;
}
+CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const {
+ if (isVirtual()) {
+ const CallExpr *CE = getVirtualCallExpr();
+ return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
+ CGF, getVirtualMethodDecl(), getThisAddress(),
+ getFunctionType(), CE ? CE->getLocStart() : SourceLocation());
+ }
+
+ return *this;
+}
+
/* VarArg handling */
Address CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr) {
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 7e10407fc31c..8adbe76fa6c3 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -18,6 +18,7 @@
#include "CGValue.h"
#include "EHScopeStack.h"
#include "clang/AST/CanonicalType.h"
+#include "clang/AST/GlobalDecl.h"
#include "clang/AST/Type.h"
#include "llvm/IR/Value.h"
@@ -42,9 +43,9 @@ namespace CodeGen {
/// Abstract information about a function or function prototype.
class CGCalleeInfo {
- /// \brief The function prototype of the callee.
+ /// The function prototype of the callee.
const FunctionProtoType *CalleeProtoTy;
- /// \brief The function declaration of the callee.
+ /// The function declaration of the callee.
const Decl *CalleeDecl;
public:
@@ -68,8 +69,9 @@ public:
Invalid,
Builtin,
PseudoDestructor,
+ Virtual,
- Last = PseudoDestructor
+ Last = Virtual
};
struct BuiltinInfoStorage {
@@ -79,12 +81,19 @@ public:
struct PseudoDestructorInfoStorage {
const CXXPseudoDestructorExpr *Expr;
};
+ struct VirtualInfoStorage {
+ const CallExpr *CE;
+ GlobalDecl MD;
+ Address Addr;
+ llvm::FunctionType *FTy;
+ };
SpecialKind KindOrFunctionPointer;
union {
CGCalleeInfo AbstractInfo;
BuiltinInfoStorage BuiltinInfo;
PseudoDestructorInfoStorage PseudoDestructorInfo;
+ VirtualInfoStorage VirtualInfo;
};
explicit CGCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {}
@@ -127,6 +136,16 @@ public:
return CGCallee(abstractInfo, functionPtr);
}
+ static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr,
+ llvm::FunctionType *FTy) {
+ CGCallee result(SpecialKind::Virtual);
+ result.VirtualInfo.CE = CE;
+ result.VirtualInfo.MD = MD;
+ result.VirtualInfo.Addr = Addr;
+ result.VirtualInfo.FTy = FTy;
+ return result;
+ }
+
bool isBuiltin() const {
return KindOrFunctionPointer == SpecialKind::Builtin;
}
@@ -150,7 +169,9 @@ public:
bool isOrdinary() const {
return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last);
}
- const CGCalleeInfo &getAbstractInfo() const {
+ CGCalleeInfo getAbstractInfo() const {
+ if (isVirtual())
+ return VirtualInfo.MD.getDecl();
assert(isOrdinary());
return AbstractInfo;
}
@@ -158,29 +179,86 @@ public:
assert(isOrdinary());
return reinterpret_cast<llvm::Value*>(uintptr_t(KindOrFunctionPointer));
}
- llvm::FunctionType *getFunctionType() const {
- return cast<llvm::FunctionType>(
- getFunctionPointer()->getType()->getPointerElementType());
- }
void setFunctionPointer(llvm::Value *functionPtr) {
assert(isOrdinary());
KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr));
}
+
+ bool isVirtual() const {
+ return KindOrFunctionPointer == SpecialKind::Virtual;
+ }
+ const CallExpr *getVirtualCallExpr() const {
+ assert(isVirtual());
+ return VirtualInfo.CE;
+ }
+ GlobalDecl getVirtualMethodDecl() const {
+ assert(isVirtual());
+ return VirtualInfo.MD;
+ }
+ Address getThisAddress() const {
+ assert(isVirtual());
+ return VirtualInfo.Addr;
+ }
+
+ llvm::FunctionType *getFunctionType() const {
+ if (isVirtual())
+ return VirtualInfo.FTy;
+ return cast<llvm::FunctionType>(
+ getFunctionPointer()->getType()->getPointerElementType());
+ }
+
+ /// If this is a delayed callee computation of some sort, prepare
+ /// a concrete callee.
+ CGCallee prepareConcreteCallee(CodeGenFunction &CGF) const;
};
struct CallArg {
- RValue RV;
+ private:
+ union {
+ RValue RV;
+ LValue LV; /// The argument is semantically a load from this l-value.
+ };
+ bool HasLV;
+
+ /// A data-flow flag to make sure getRValue and/or copyInto are not
+ /// called twice for duplicated IR emission.
+ mutable bool IsUsed;
+
+ public:
QualType Ty;
- bool NeedsCopy;
- CallArg(RValue rv, QualType ty, bool needscopy)
- : RV(rv), Ty(ty), NeedsCopy(needscopy)
- { }
+ CallArg(RValue rv, QualType ty)
+ : RV(rv), HasLV(false), IsUsed(false), Ty(ty) {}
+ CallArg(LValue lv, QualType ty)
+ : LV(lv), HasLV(true), IsUsed(false), Ty(ty) {}
+ bool hasLValue() const { return HasLV; }
+ QualType getType() const { return Ty; }
+
+ /// \returns an independent RValue. If the CallArg contains an LValue,
+ /// a temporary copy is returned.
+ RValue getRValue(CodeGenFunction &CGF) const;
+
+ LValue getKnownLValue() const {
+ assert(HasLV && !IsUsed);
+ return LV;
+ }
+ RValue getKnownRValue() const {
+ assert(!HasLV && !IsUsed);
+ return RV;
+ }
+ void setRValue(RValue _RV) {
+ assert(!HasLV);
+ RV = _RV;
+ }
+
+ bool isAggregate() const { return HasLV || RV.isAggregate(); }
+
+ void copyInto(CodeGenFunction &CGF, Address A) const;
};
/// CallArgList - Type for representing both the value and type of
/// arguments in a call.
class CallArgList :
- public SmallVector<CallArg, 16> {
+ public SmallVector<CallArg, 8> {
public:
CallArgList() : StackBase(nullptr) {}
@@ -204,8 +282,10 @@ public:
llvm::Instruction *IsActiveIP;
};
- void add(RValue rvalue, QualType type, bool needscopy = false) {
- push_back(CallArg(rvalue, type, needscopy));
+ void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); }
+
+ void addUncopiedAggregate(LValue LV, QualType type) {
+ push_back(CallArg(LV, type));
}
/// Add all the arguments from another CallArgList to this one. After doing
@@ -254,7 +334,7 @@ public:
llvm::Instruction *getStackBase() const { return StackBase; }
void freeArgumentMemory(CodeGenFunction &CGF) const;
- /// \brief Returns if we're using an inalloca struct to pass arguments in
+ /// Returns if we're using an inalloca struct to pass arguments in
/// memory.
bool isUsingInAlloca() const { return StackBase; }
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index a6915071ec17..0b9311f7771c 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -406,8 +406,8 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr,
// Apply the offset.
llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy);
- Value = Builder.CreateGEP(Value, Builder.CreateNeg(NonVirtualOffset),
- "sub.ptr");
+ Value = Builder.CreateInBoundsGEP(Value, Builder.CreateNeg(NonVirtualOffset),
+ "sub.ptr");
// Just cast.
Value = Builder.CreateBitCast(Value, DerivedPtrTy);
@@ -555,10 +555,12 @@ static void EmitBaseInitializer(CodeGenFunction &CGF,
BaseClassDecl,
isBaseVirtual);
AggValueSlot AggSlot =
- AggValueSlot::forAddr(V, Qualifiers(),
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::forAddr(
+ V, Qualifiers(),
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual));
CGF.EmitAggExpr(BaseInit->getInit(), AggSlot);
@@ -615,7 +617,14 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
llvm::Value *ThisPtr = CGF.LoadCXXThis();
QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
- LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+ LValue LHS;
+
+ // If a base constructor is being emitted, create an LValue that has the
+ // non-virtual alignment.
+ if (CGF.CurGD.getCtorType() == Ctor_Base)
+ LHS = CGF.MakeNaturalAlignPointeeAddrLValue(ThisPtr, RecordTy);
+ else
+ LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
EmitLValueForAnyFieldInitialization(CGF, MemberInit, LHS);
@@ -640,7 +649,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field);
// Copy the aggregate.
- CGF.EmitAggregateCopy(LHS.getAddress(), Src.getAddress(), FieldType,
+ CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field),
LHS.isVolatileQualified());
// Ensure that we destroy the objects if an exception is thrown later in
// the constructor.
@@ -671,10 +680,12 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS,
break;
case TEK_Aggregate: {
AggValueSlot Slot =
- AggValueSlot::forLValue(LHS,
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::forLValue(
+ LHS,
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ overlapForFieldInit(Field));
EmitAggExpr(Init, Slot);
break;
}
@@ -905,15 +916,15 @@ namespace {
}
CharUnits getMemcpySize(uint64_t FirstByteOffset) const {
+ ASTContext &Ctx = CGF.getContext();
unsigned LastFieldSize =
- LastField->isBitField() ?
- LastField->getBitWidthValue(CGF.getContext()) :
- CGF.getContext().getTypeSize(LastField->getType());
- uint64_t MemcpySizeBits =
- LastFieldOffset + LastFieldSize - FirstByteOffset +
- CGF.getContext().getCharWidth() - 1;
- CharUnits MemcpySize =
- CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+ LastField->isBitField()
+ ? LastField->getBitWidthValue(Ctx)
+ : Ctx.toBits(
+ Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first);
+ uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize -
+ FirstByteOffset + Ctx.getCharWidth() - 1;
+ CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits);
return MemcpySize;
}
@@ -1265,7 +1276,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isInitializerOfDynamicClass(*B))
- CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+ CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis());
EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
}
@@ -1282,7 +1293,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isInitializerOfDynamicClass(*B))
- CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+ CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis());
EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
}
@@ -1466,11 +1477,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
// Initialize the vtable pointers before entering the body.
if (!CanSkipVTablePointerInitialization(*this, Dtor)) {
- // Insert the llvm.invariant.group.barrier intrinsic before initializing
+ // Insert the llvm.launder.invariant.group intrinsic before initializing
// the vptrs to cancel any previous assumptions we might have made.
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0)
- CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+ CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis());
InitializeVTablePointers(Dtor->getParent());
}
@@ -1728,7 +1739,7 @@ namespace {
};
} // end anonymous namespace
-/// \brief Emit all code that comes at the end of class's
+/// Emit all code that comes at the end of class's
/// destructor. This is to call destructors on members and base classes
/// in reverse order of their construction.
///
@@ -1954,7 +1965,8 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
}
EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false,
- /*Delegating=*/false, curAddr, E);
+ /*Delegating=*/false, curAddr, E,
+ AggValueSlot::DoesNotOverlap);
}
// Go to the next element.
@@ -1989,7 +2001,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CXXCtorType Type,
bool ForVirtualBase,
bool Delegating, Address This,
- const CXXConstructExpr *E) {
+ const CXXConstructExpr *E,
+ AggValueSlot::Overlap_t Overlap) {
CallArgList Args;
// Push the this ptr.
@@ -2002,10 +2015,10 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
const Expr *Arg = E->getArg(0);
- QualType SrcTy = Arg->getType();
- Address Src = EmitLValue(Arg).getAddress();
+ LValue Src = EmitLValue(Arg);
QualType DestTy = getContext().getTypeDeclType(D->getParent());
- EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+ LValue Dest = MakeAddrLValue(This, DestTy);
+ EmitAggregateCopyCtor(Dest, Src, Overlap);
return;
}
@@ -2017,7 +2030,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(),
/*ParamsToSkip*/ 0, Order);
- EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
+ EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args,
+ Overlap, E->getExprLoc());
}
static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
@@ -2049,14 +2063,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
bool ForVirtualBase,
bool Delegating,
Address This,
- CallArgList &Args) {
+ CallArgList &Args,
+ AggValueSlot::Overlap_t Overlap,
+ SourceLocation Loc) {
const CXXRecordDecl *ClassDecl = D->getParent();
// C++11 [class.mfct.non-static]p2:
// If a non-static member function of a class X is called for an object that
// is not of type X, or of a type derived from X, the behavior is undefined.
- // FIXME: Provide a source location here.
- EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(),
+ EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, Loc,
This.getPointer(), getContext().getRecordType(ClassDecl));
if (D->isTrivial() && D->isDefaultConstructor()) {
@@ -2071,9 +2086,12 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
assert(Args.size() == 2 && "unexpected argcount for trivial ctor");
QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
- Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy));
+ Address Src(Args[1].getRValue(*this).getScalarVal(),
+ getNaturalTypeAlignment(SrcTy));
+ LValue SrcLVal = MakeAddrLValue(Src, SrcTy);
QualType DestTy = getContext().getTypeDeclType(ClassDecl);
- EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+ LValue DestLVal = MakeAddrLValue(This, DestTy);
+ EmitAggregateCopyCtor(DestLVal, SrcLVal, Overlap);
return;
}
@@ -2123,8 +2141,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
CallArgList Args;
- CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()),
- /*NeedsCopy=*/false);
+ CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()));
// Forward the parameters.
if (InheritedFromVBase &&
@@ -2163,7 +2180,8 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
}
EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false,
- This, Args);
+ This, Args, AggValueSlot::MayOverlap,
+ E->getLocation());
}
void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
@@ -2188,7 +2206,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
assert(Args.size() >= Params.size() && "too few arguments for call");
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) {
- const RValue &RV = Args[I].RV;
+ const RValue &RV = Args[I].getRValue(*this);
assert(!RV.isComplex() && "complex indirect params not supported");
ParamValue Val = RV.isScalar()
? ParamValue::forDirect(RV.getScalarVal())
@@ -2259,7 +2277,8 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(),
/*ParamsToSkip*/ 1);
- EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args);
+ EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args,
+ AggValueSlot::MayOverlap, E->getExprLoc());
}
void
@@ -2294,7 +2313,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
}
EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false,
- /*Delegating=*/true, This, DelegateArgs);
+ /*Delegating=*/true, This, DelegateArgs,
+ AggValueSlot::MayOverlap, Loc);
}
namespace {
@@ -2325,7 +2345,8 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor
AggValueSlot::forAddr(ThisPtr, Qualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::MayOverlap);
EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot);
@@ -2667,7 +2688,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
SSK = llvm::SanStat_CFI_UnrelatedCast;
break;
case CFITCK_ICall:
- llvm_unreachable("not expecting CFITCK_ICall");
+ case CFITCK_NVMFCall:
+ case CFITCK_VMFCall:
+ llvm_unreachable("unexpected sanitizer kind");
}
std::string TypeName = RD->getQualifiedNameAsString();
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 22055b2cb902..cfd230997ed0 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -281,10 +281,10 @@ void EHScopeStack::popNullFixups() {
BranchFixups.pop_back();
}
-void CodeGenFunction::initFullExprCleanup() {
+Address CodeGenFunction::createCleanupActiveFlag() {
// Create a variable to decide whether the cleanup needs to be run.
- Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(),
- "cleanup.cond");
+ Address active = CreateTempAllocaWithoutCast(
+ Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond");
// Initialize it to false at a site that's guaranteed to be run
// before each evaluation.
@@ -293,10 +293,14 @@ void CodeGenFunction::initFullExprCleanup() {
// Initialize it to true at the current location.
Builder.CreateStore(Builder.getTrue(), active);
+ return active;
+}
+
+void CodeGenFunction::initFullExprCleanupWithFlag(Address ActiveFlag) {
// Set that as the active flag in the cleanup.
EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
assert(!cleanup.hasActiveFlag() && "cleanup already has active flag?");
- cleanup.setActiveFlag(active);
+ cleanup.setActiveFlag(ActiveFlag);
if (cleanup.isNormalCleanup()) cleanup.setTestFlagInNormalCleanup();
if (cleanup.isEHCleanup()) cleanup.setTestFlagInEHCleanup();
@@ -494,6 +498,13 @@ void CodeGenFunction::PopCleanupBlocks(
&LifetimeExtendedCleanupStack[I],
Header.getSize());
I += Header.getSize();
+
+ if (Header.isConditional()) {
+ Address ActiveFlag =
+ reinterpret_cast<Address &>(LifetimeExtendedCleanupStack[I]);
+ initFullExprCleanupWithFlag(ActiveFlag);
+ I += sizeof(ActiveFlag);
+ }
}
LifetimeExtendedCleanupStack.resize(OldLifetimeExtendedSize);
}
@@ -624,7 +635,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF,
si->eraseFromParent();
// Destroy the load.
- assert(condition->getOperand(0) == CGF.NormalCleanupDest);
+ assert(condition->getOperand(0) == CGF.NormalCleanupDest.getPointer());
assert(condition->use_empty());
condition->eraseFromParent();
}
@@ -833,7 +844,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
if (NormalCleanupDestSlot->hasOneUse()) {
NormalCleanupDestSlot->user_back()->eraseFromParent();
NormalCleanupDestSlot->eraseFromParent();
- NormalCleanupDest = nullptr;
+ NormalCleanupDest = Address::invalid();
}
llvm::BasicBlock *BranchAfter = Scope.getBranchAfterBlock(0);
@@ -971,16 +982,21 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
CurrentFuncletPad);
llvm::CleanupPadInst *CPI = nullptr;
- if (!EHPersonality::get(*this).usesFuncletPads()) {
- EHStack.pushTerminate();
- PushedTerminate = true;
- } else {
+
+ const EHPersonality &Personality = EHPersonality::get(*this);
+ if (Personality.usesFuncletPads()) {
llvm::Value *ParentPad = CurrentFuncletPad;
if (!ParentPad)
ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
CurrentFuncletPad = CPI = Builder.CreateCleanupPad(ParentPad);
}
+ // Non-MSVC personalities need to terminate when an EH cleanup throws.
+ if (!Personality.isMSVCPersonality()) {
+ EHStack.pushTerminate();
+ PushedTerminate = true;
+ }
+
// We only actually emit the cleanup code if the cleanup is either
// active or was used before it was deactivated.
if (EHActiveFlag.isValid() || IsActive) {
@@ -1233,8 +1249,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.find(C));
assert(Scope.isActive() && "double deactivation");
- // If it's the top of the stack, just pop it.
- if (C == EHStack.stable_begin()) {
+ // If it's the top of the stack, just pop it, but do so only if it belongs
+ // to the current RunCleanupsScope.
+ if (C == EHStack.stable_begin() &&
+ CurrentCleanupScopeDepth.strictlyEncloses(C)) {
// If it's a normal cleanup, we need to pretend that the
// fallthrough is unreachable.
CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
@@ -1250,10 +1268,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
}
Address CodeGenFunction::getNormalCleanupDestSlot() {
- if (!NormalCleanupDest)
+ if (!NormalCleanupDest.isValid())
NormalCleanupDest =
- CreateTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
- return Address(NormalCleanupDest, CharUnits::fromQuantity(4));
+ CreateDefaultAlignTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
+ return NormalCleanupDest;
}
/// Emits all the code to cause the given temporary to be cleaned up.
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index 105c5629d50c..93be3e6c1502 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -230,7 +230,7 @@ public:
};
/// A cleanup scope which generates the cleanup blocks lazily.
-class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope {
+class alignas(8) EHCleanupScope : public EHScope {
/// The nearest normal cleanup scope enclosing this one.
EHScopeStack::stable_iterator EnclosingNormal;
@@ -627,16 +627,21 @@ struct EHPersonality {
static const EHPersonality MSVC_except_handler;
static const EHPersonality MSVC_C_specific_handler;
static const EHPersonality MSVC_CxxFrameHandler3;
+ static const EHPersonality GNU_Wasm_CPlusPlus;
/// Does this personality use landingpads or the family of pad instructions
/// designed to form funclets?
- bool usesFuncletPads() const { return isMSVCPersonality(); }
+ bool usesFuncletPads() const {
+ return isMSVCPersonality() || isWasmPersonality();
+ }
bool isMSVCPersonality() const {
return this == &MSVC_except_handler || this == &MSVC_C_specific_handler ||
this == &MSVC_CxxFrameHandler3;
}
+ bool isWasmPersonality() const { return this == &GNU_Wasm_CPlusPlus; }
+
bool isMSVCXXPersonality() const { return this == &MSVC_CxxFrameHandler3; }
};
}
diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp
index 5842e7b3ff93..4f525c8aac85 100644
--- a/lib/CodeGen/CGCoroutine.cpp
+++ b/lib/CodeGen/CGCoroutine.cpp
@@ -44,6 +44,15 @@ struct clang::CodeGen::CGCoroData {
// A branch to this block is emitted when coroutine needs to suspend.
llvm::BasicBlock *SuspendBB = nullptr;
+ // The promise type's 'unhandled_exception' handler, if it defines one.
+ Stmt *ExceptionHandler = nullptr;
+
+ // A temporary i1 alloca that stores whether 'await_resume' threw an
+ // exception. If it did, 'true' is stored in this variable, and the coroutine
+ // body must be skipped. If the promise type does not define an exception
+ // handler, this is null.
+ llvm::Value *ResumeEHVar = nullptr;
+
// Stores the jump destination just before the coroutine memory is freed.
// This is the destination that every suspend point jumps to for the cleanup
// branch.
@@ -121,6 +130,16 @@ static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) {
return Prefix;
}
+static bool memberCallExpressionCanThrow(const Expr *E) {
+ if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
+ if (const auto *Proto =
+ CE->getMethodDecl()->getType()->getAs<FunctionProtoType>())
+ if (isNoexceptExceptionSpec(Proto->getExceptionSpecType()) &&
+ Proto->canThrow() == CT_Cannot)
+ return false;
+ return true;
+}
+
// Emit suspend expression which roughly looks like:
//
// auto && x = CommonExpr();
@@ -208,11 +227,36 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
// Emit await_resume expression.
CGF.EmitBlock(ReadyBlock);
+
+ // Exception handling requires additional IR. If the 'await_resume' function
+ // is marked as 'noexcept', we avoid generating this additional IR.
+ CXXTryStmt *TryStmt = nullptr;
+ if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
+ memberCallExpressionCanThrow(S.getResumeExpr())) {
+ Coro.ResumeEHVar =
+ CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
+ Builder.CreateFlagStore(true, Coro.ResumeEHVar);
+
+ auto Loc = S.getResumeExpr()->getExprLoc();
+ auto *Catch = new (CGF.getContext())
+ CXXCatchStmt(Loc, /*exDecl=*/nullptr, Coro.ExceptionHandler);
+ auto *TryBody =
+ CompoundStmt::Create(CGF.getContext(), S.getResumeExpr(), Loc, Loc);
+ TryStmt = CXXTryStmt::Create(CGF.getContext(), Loc, TryBody, Catch);
+ CGF.EnterCXXTryStmt(*TryStmt);
+ }
+
LValueOrRValue Res;
if (forLValue)
Res.LV = CGF.EmitLValue(S.getResumeExpr());
else
Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+
+ if (TryStmt) {
+ Builder.CreateFlagStore(false, Coro.ResumeEHVar);
+ CGF.ExitCXXTryStmt(*TryStmt);
+ }
+
return Res;
}
@@ -315,7 +359,7 @@ namespace {
GetParamRef Visitor;
Visitor.Visit(const_cast<Expr*>(InitExpr));
assert(Visitor.Expr);
- auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr);
+ DeclRefExpr *DREOrig = Visitor.Expr;
auto *PD = DREOrig->getDecl();
auto it = LocalDeclMap.find(PD);
@@ -588,19 +632,40 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
EHStack.pushCleanup<CallCoroEnd>(EHCleanup);
CurCoro.Data->CurrentAwaitKind = AwaitKind::Init;
+ CurCoro.Data->ExceptionHandler = S.getExceptionHandler();
EmitStmt(S.getInitSuspendStmt());
CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB);
CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal;
- if (auto *OnException = S.getExceptionHandler()) {
+ if (CurCoro.Data->ExceptionHandler) {
+ // If we generated IR to record whether an exception was thrown from
+ // 'await_resume', then use that IR to determine whether the coroutine
+ // body should be skipped.
+ // If we didn't generate the IR (perhaps because 'await_resume' was marked
+ // as 'noexcept'), then we skip this check.
+ BasicBlock *ContBB = nullptr;
+ if (CurCoro.Data->ResumeEHVar) {
+ BasicBlock *BodyBB = createBasicBlock("coro.resumed.body");
+ ContBB = createBasicBlock("coro.resumed.cont");
+ Value *SkipBody = Builder.CreateFlagLoad(CurCoro.Data->ResumeEHVar,
+ "coro.resumed.eh");
+ Builder.CreateCondBr(SkipBody, ContBB, BodyBB);
+ EmitBlock(BodyBB);
+ }
+
auto Loc = S.getLocStart();
- CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException);
- auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch);
+ CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr,
+ CurCoro.Data->ExceptionHandler);
+ auto *TryStmt =
+ CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch);
EnterCXXTryStmt(*TryStmt);
emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock());
ExitCXXTryStmt(*TryStmt);
+
+ if (ContBB)
+ EmitBlock(ContBB);
}
else {
emitBodyAndFallthrough(*this, S, S.getBody());
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index aeed4d658a4e..097a1e043047 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -289,8 +289,7 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
<< OC->getIdentifier()->getNameStart() << ')';
}
} else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
- OS << OCD->getClassInterface()->getName() << '('
- << OCD->getName() << ')';
+ OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')';
} else if (isa<ObjCProtocolDecl>(DC)) {
// We can extract the type of the class from the self pointer.
if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) {
@@ -361,18 +360,19 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
return StringRef();
}
-llvm::DIFile::ChecksumKind
+Optional<llvm::DIFile::ChecksumKind>
CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
Checksum.clear();
- if (!CGM.getCodeGenOpts().EmitCodeView)
- return llvm::DIFile::CSK_None;
+ if (!CGM.getCodeGenOpts().EmitCodeView &&
+ CGM.getCodeGenOpts().DwarfVersion < 5)
+ return None;
SourceManager &SM = CGM.getContext().getSourceManager();
bool Invalid;
llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid);
if (Invalid)
- return llvm::DIFile::CSK_None;
+ return None;
llvm::MD5 Hash;
llvm::MD5::MD5Result Result;
@@ -384,51 +384,62 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
return llvm::DIFile::CSK_MD5;
}
+Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
+ FileID FID) {
+ if (!CGM.getCodeGenOpts().EmbedSource)
+ return None;
+
+ bool SourceInvalid = false;
+ StringRef Source = SM.getBufferData(FID, &SourceInvalid);
+
+ if (SourceInvalid)
+ return None;
+
+ return Source;
+}
+
llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (!Loc.isValid())
// If Location is not valid then use main input file.
- return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
- remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksumKind(),
- TheCU->getFile()->getChecksum());
+ return getOrCreateMainFile();
SourceManager &SM = CGM.getContext().getSourceManager();
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
// If the location is not valid then use main input file.
- return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
- remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksumKind(),
- TheCU->getFile()->getChecksum());
+ return getOrCreateMainFile();
// Cache the results.
const char *fname = PLoc.getFilename();
- auto it = DIFileCache.find(fname);
+ auto It = DIFileCache.find(fname);
- if (it != DIFileCache.end()) {
+ if (It != DIFileCache.end()) {
// Verify that the information still exists.
- if (llvm::Metadata *V = it->second)
+ if (llvm::Metadata *V = It->second)
return cast<llvm::DIFile>(V);
}
SmallString<32> Checksum;
- llvm::DIFile::ChecksumKind CSKind =
+ Optional<llvm::DIFile::ChecksumKind> CSKind =
computeChecksum(SM.getFileID(Loc), Checksum);
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
+ if (CSKind)
+ CSInfo.emplace(*CSKind, Checksum);
- llvm::DIFile *F = DBuilder.createFile(remapDIPath(PLoc.getFilename()),
- remapDIPath(getCurrentDirname()),
- CSKind, Checksum);
+ llvm::DIFile *F = DBuilder.createFile(
+ remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getFileID(Loc)));
DIFileCache[fname].reset(F);
return F;
}
llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
- return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
- remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksumKind(),
- TheCU->getFile()->getChecksum());
+ return DBuilder.createFile(
+ remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()),
+ TheCU->getFile()->getChecksum(),
+ CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None);
}
std::string CGDebugInfo::remapDIPath(StringRef Path) const {
@@ -472,7 +483,8 @@ StringRef CGDebugInfo::getCurrentDirname() {
void CGDebugInfo::CreateCompileUnit() {
SmallString<32> Checksum;
- llvm::DIFile::ChecksumKind CSKind = llvm::DIFile::CSK_None;
+ Optional<llvm::DIFile::ChecksumKind> CSKind;
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
// Should we be asking the SourceManager for the main file name, instead of
// accepting it as an argument? This just causes the main file name to
@@ -551,14 +563,19 @@ void CGDebugInfo::CreateCompileUnit() {
break;
}
+ if (CSKind)
+ CSInfo.emplace(*CSKind, Checksum);
+
// Create new compile unit.
// FIXME - Eliminate TheCU.
auto &CGOpts = CGM.getCodeGenOpts();
TheCU = DBuilder.createCompileUnit(
LangTag,
DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSKind, Checksum),
- Producer, LO.Optimize || CGOpts.PrepareForLTO || CGOpts.EmitSummaryIndex,
+ remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getMainFileID())),
+ CGOpts.EmitVersionIdentMetadata ? Producer : "",
+ LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO,
CGOpts.DwarfDebugFlags, RuntimeVers,
CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind,
0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling,
@@ -620,14 +637,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return SelTy;
}
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
- case BuiltinType::Id: \
- return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+ case BuiltinType::Id: \
+ return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
SingletonId);
#include "clang/Basic/OpenCLImageTypes.def"
case BuiltinType::OCLSampler:
- return getOrCreateStructPtrType("opencl_sampler_t",
- OCLSamplerDITy);
+ return getOrCreateStructPtrType("opencl_sampler_t", OCLSamplerDITy);
case BuiltinType::OCLEvent:
return getOrCreateStructPtrType("opencl_event_t", OCLEventDITy);
case BuiltinType::OCLClkEvent:
@@ -645,6 +661,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::SChar:
Encoding = llvm::dwarf::DW_ATE_signed_char;
break;
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
Encoding = llvm::dwarf::DW_ATE_UTF;
@@ -681,6 +698,34 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
// floating point types of the same size.
Encoding = llvm::dwarf::DW_ATE_float;
break;
+ case BuiltinType::ShortAccum:
+ case BuiltinType::Accum:
+ case BuiltinType::LongAccum:
+ case BuiltinType::ShortFract:
+ case BuiltinType::Fract:
+ case BuiltinType::LongFract:
+ case BuiltinType::SatShortFract:
+ case BuiltinType::SatFract:
+ case BuiltinType::SatLongFract:
+ case BuiltinType::SatShortAccum:
+ case BuiltinType::SatAccum:
+ case BuiltinType::SatLongAccum:
+ Encoding = llvm::dwarf::DW_ATE_signed_fixed;
+ break;
+ case BuiltinType::UShortAccum:
+ case BuiltinType::UAccum:
+ case BuiltinType::ULongAccum:
+ case BuiltinType::UShortFract:
+ case BuiltinType::UFract:
+ case BuiltinType::ULongFract:
+ case BuiltinType::SatUShortAccum:
+ case BuiltinType::SatUAccum:
+ case BuiltinType::SatULongAccum:
+ case BuiltinType::SatUShortFract:
+ case BuiltinType::SatUFract:
+ case BuiltinType::SatULongFract:
+ Encoding = llvm::dwarf::DW_ATE_unsigned_fixed;
+ break;
}
switch (BT->getKind()) {
@@ -780,27 +825,49 @@ static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
}
}
-/// In C++ mode, types have linkage, so we can rely on the ODR and
-/// on their mangled names, if they're external.
-static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
- CodeGenModule &CGM,
- llvm::DICompileUnit *TheCU) {
- SmallString<256> FullName;
+// Determines if the tag declaration will require a type identifier.
+static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM,
+ llvm::DICompileUnit *TheCU) {
+ // We only add a type identifier for types with C++ name mangling.
+ if (!hasCXXMangling(TD, TheCU))
+ return false;
+
+ // CodeView types with C++ mangling need a type identifier.
+ if (CGM.getCodeGenOpts().EmitCodeView)
+ return true;
+
+ // Externally visible types with C++ mangling need a type identifier.
+ if (TD->isExternallyVisible())
+ return true;
+
+ return false;
+}
+
+// When emitting CodeView debug information we need to produce a type
+// identifier for all types which have a C++ mangling. Until a GUID is added
+// to the identifier (not currently implemented) the result will not be unique
+// across compilation units.
+// When emitting DWARF debug information, we need to produce a type identifier
+// for all externally visible types with C++ name mangling. This identifier
+// should be unique across ODR-compliant compilation units.
+static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM,
+ llvm::DICompileUnit *TheCU) {
+ SmallString<256> Identifier;
const TagDecl *TD = Ty->getDecl();
- if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible())
- return FullName;
+ if (!needsTypeIdentifier(TD, CGM, TheCU))
+ return Identifier;
// TODO: This is using the RTTI name. Is there a better way to get
// a unique string for a type?
- llvm::raw_svector_ostream Out(FullName);
+ llvm::raw_svector_ostream Out(Identifier);
CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(QualType(Ty, 0), Out);
- return FullName;
+ return Identifier;
}
-/// \return the approproate DWARF tag for a composite type.
+/// \return the appropriate DWARF tag for a composite type.
static llvm::dwarf::Tag getTagForRecord(const RecordDecl *RD) {
- llvm::dwarf::Tag Tag;
+ llvm::dwarf::Tag Tag;
if (RD->isStruct() || RD->isInterface())
Tag = llvm::dwarf::DW_TAG_structure_type;
else if (RD->isUnion())
@@ -828,10 +895,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
uint32_t Align = 0;
// Create the type.
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType(
getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
- llvm::DINode::FlagFwdDecl, FullName);
+ llvm::DINode::FlagFwdDecl, Identifier);
if (CGM.getCodeGenOpts().DebugFwdTemplateParams)
if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
DBuilder.replaceArrays(RetTy, llvm::DINodeArray(),
@@ -926,9 +993,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
// DW_AT_APPLE_BLOCK attribute and are an implementation detail only
// the debugger needs to know about. To allow type uniquing, emit
// them without a name or a location.
- EltTy =
- DBuilder.createStructType(Unit, "", nullptr, LineNo,
- FieldOffset, 0, Flags, nullptr, Elements);
+ EltTy = DBuilder.createStructType(Unit, "", nullptr, LineNo, FieldOffset, 0,
+ Flags, nullptr, Elements);
return DBuilder.createPointerType(EltTy, Size);
}
@@ -943,8 +1009,9 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false);
printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy());
- auto *AliasDecl = cast<TypeAliasTemplateDecl>(
- Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
+ auto *AliasDecl =
+ cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl())
+ ->getTemplatedDecl();
SourceLocation Loc = AliasDecl->getLocation();
return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc),
@@ -981,20 +1048,28 @@ static unsigned getDwarfCC(CallingConv CC) {
return llvm::dwarf::DW_CC_LLVM_vectorcall;
case CC_X86Pascal:
return llvm::dwarf::DW_CC_BORLAND_pascal;
-
- // FIXME: Create new DW_CC_ codes for these calling conventions.
case CC_Win64:
+ return llvm::dwarf::DW_CC_LLVM_Win64;
case CC_X86_64SysV:
+ return llvm::dwarf::DW_CC_LLVM_X86_64SysV;
case CC_AAPCS:
+ return llvm::dwarf::DW_CC_LLVM_AAPCS;
case CC_AAPCS_VFP:
+ return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP;
case CC_IntelOclBicc:
+ return llvm::dwarf::DW_CC_LLVM_IntelOclBicc;
case CC_SpirFunction:
+ return llvm::dwarf::DW_CC_LLVM_SpirFunction;
case CC_OpenCLKernel:
+ return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
case CC_Swift:
+ return llvm::dwarf::DW_CC_LLVM_Swift;
case CC_PreserveMost:
+ return llvm::dwarf::DW_CC_LLVM_PreserveMost;
case CC_PreserveAll:
+ return llvm::dwarf::DW_CC_LLVM_PreserveAll;
case CC_X86RegCall:
- return 0;
+ return llvm::dwarf::DW_CC_LLVM_X86RegCall;
}
return 0;
}
@@ -1102,8 +1177,8 @@ CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
}
llvm::DINode::DIFlags flags = getAccessFlag(AS, RD);
- return DBuilder.createMemberType(scope, name, file, line, SizeInBits,
- Align, offsetInBits, flags, debugType);
+ return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align,
+ offsetInBits, flags, debugType);
}
void CGDebugInfo::CollectRecordLambdaFields(
@@ -1223,10 +1298,6 @@ void CGDebugInfo::CollectRecordFields(
else {
const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record);
- // Debug info for nested types is included in the member list only for
- // CodeView.
- bool IncludeNestedTypes = CGM.getCodeGenOpts().EmitCodeView;
-
// Field number for non-static fields.
unsigned fieldNo = 0;
@@ -1236,6 +1307,13 @@ void CGDebugInfo::CollectRecordFields(
if (const auto *V = dyn_cast<VarDecl>(I)) {
if (V->hasAttr<NoDebugAttr>())
continue;
+
+ // Skip variable template specializations when emitting CodeView. MSVC
+ // doesn't emit them.
+ if (CGM.getCodeGenOpts().EmitCodeView &&
+ isa<VarTemplateSpecializationDecl>(V))
+ continue;
+
// Reuse the existing static member declaration if one exists
auto MI = StaticDataMemberCache.find(V->getCanonicalDecl());
if (MI != StaticDataMemberCache.end()) {
@@ -1252,7 +1330,9 @@ void CGDebugInfo::CollectRecordFields(
// Bump field number for next field.
++fieldNo;
- } else if (IncludeNestedTypes) {
+ } else if (CGM.getCodeGenOpts().EmitCodeView) {
+ // Debug info for nested types is included in the member list only for
+ // CodeView.
if (const auto *nestedType = dyn_cast<TypeDecl>(I))
if (!nestedType->isImplicit() &&
nestedType->getDeclContext() == record)
@@ -1386,7 +1466,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
// deleting dtor.
const auto *DD = dyn_cast<CXXDestructorDecl>(Method);
GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method);
- MicrosoftVTableContext::MethodVFTableLocation ML =
+ MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
VIndex = ML.Index;
@@ -1507,6 +1587,7 @@ void CGDebugInfo::CollectCXXBasesAux(
auto *BaseTy = getOrCreateType(BI.getType(), Unit);
llvm::DINode::DIFlags BFlags = StartingFlags;
uint64_t BaseOffset;
+ uint32_t VBPtrOffset = 0;
if (BI.isVirtual()) {
if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
@@ -1520,6 +1601,10 @@ void CGDebugInfo::CollectCXXBasesAux(
// vbase offset offset in Itanium.
BaseOffset =
4 * CGM.getMicrosoftVTableContext().getVBTableIndex(RD, Base);
+ VBPtrOffset = CGM.getContext()
+ .getASTRecordLayout(RD)
+ .getVBPtrOffset()
+ .getQuantity();
}
BFlags |= llvm::DINode::FlagVirtual;
} else
@@ -1528,8 +1613,8 @@ void CGDebugInfo::CollectCXXBasesAux(
// BI->isVirtual() and bits when not.
BFlags |= getAccessFlag(BI.getAccessSpecifier(), RD);
- llvm::DIType *DTy =
- DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset, BFlags);
+ llvm::DIType *DTy = DBuilder.createInheritance(RecordTy, BaseTy, BaseOffset,
+ VBPtrOffset, BFlags);
EltTys.push_back(DTy);
}
}
@@ -1603,8 +1688,8 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
V = CGM.getCXXABI().EmitNullMemberPointer(MPT);
if (!V)
V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
- TemplateParams.push_back(DBuilder.createTemplateValueParameter(
- TheCU, Name, TTy, V));
+ TemplateParams.push_back(
+ DBuilder.createTemplateValueParameter(TheCU, Name, TTy, V));
} break;
case TemplateArgument::Template:
TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
@@ -1676,9 +1761,8 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
Optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
- llvm::DIType *vtbl_ptr_type =
- DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace,
- "__vtbl_ptr_type");
+ llvm::DIType *vtbl_ptr_type = DBuilder.createPointerType(
+ SubTy, Size, 0, DWARFAddressSpace, "__vtbl_ptr_type");
VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size);
return VTablePtrType;
}
@@ -1722,9 +1806,8 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
// Create a very wide void* type and insert it directly in the element list.
- llvm::DIType *VTableType =
- DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace,
- "__vtbl_ptr_type");
+ llvm::DIType *VTableType = DBuilder.createPointerType(
+ nullptr, VTableWidth, 0, DWARFAddressSpace, "__vtbl_ptr_type");
EltTys.push_back(VTableType);
// The vptr is a pointer to this special vtable type.
@@ -1739,9 +1822,9 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
VPtrTy = getOrCreateVTablePtrType(Unit);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
- llvm::DIType *VPtrMember = DBuilder.createMemberType(
- Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
- llvm::DINode::FlagArtificial, VPtrTy);
+ llvm::DIType *VPtrMember =
+ DBuilder.createMemberType(Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
+ llvm::DINode::FlagArtificial, VPtrTy);
EltTys.push_back(VPtrMember);
}
@@ -2079,7 +2162,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
llvm::raw_svector_ostream OS(ConfigMacros);
const auto &PPOpts = CGM.getPreprocessorOpts();
unsigned I = 0;
- // Translate the macro definitions back into a commmand line.
+ // Translate the macro definitions back into a command line.
for (auto &M : PPOpts.Macros) {
if (++I > 1)
OS << " ";
@@ -2088,9 +2171,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
OS << "\"-" << (Undef ? 'U' : 'D');
for (char c : Macro)
switch (c) {
- case '\\' : OS << "\\\\"; break;
- case '"' : OS << "\\\""; break;
- default: OS << c;
+ case '\\':
+ OS << "\\\\";
+ break;
+ case '"':
+ OS << "\\\"";
+ break;
+ default:
+ OS << c;
}
OS << '\"';
}
@@ -2107,6 +2195,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
: ~1ULL;
llvm::DIBuilder DIB(CGM.getModule());
DIB.createCompileUnit(TheCU->getSourceLanguage(),
+ // TODO: Support "Source" from external AST providers?
DIB.createFile(Mod.getModuleName(), Mod.getPath()),
TheCU->getProducer(), true, StringRef(), 0,
Mod.getASTFile(), llvm::DICompileUnit::FullDebug,
@@ -2162,7 +2251,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
if (!SClassTy)
return nullptr;
- llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0,
+ llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0,
llvm::DINode::FlagZero);
EltTys.push_back(InhTag);
}
@@ -2184,7 +2273,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
EltTys.push_back(PropertyNode);
};
{
- llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
+ llvm::SmallPtrSet<const IdentifierInfo *, 16> PropertySet;
for (const ObjCCategoryDecl *ClassExt : ID->known_extensions())
for (auto *PD : ClassExt->properties()) {
PropertySet.insert(PD->getIdentifier());
@@ -2265,10 +2354,12 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
PropertyNode = DBuilder.createObjCProperty(
PD->getName(), PUnit, PLine,
- hasDefaultGetterName(PD, Getter) ? "" : getSelectorName(
- PD->getGetterName()),
- hasDefaultSetterName(PD, Setter) ? "" : getSelectorName(
- PD->getSetterName()),
+ hasDefaultGetterName(PD, Getter)
+ ? ""
+ : getSelectorName(PD->getGetterName()),
+ hasDefaultSetterName(PD, Setter)
+ ? ""
+ : getSelectorName(PD->getSetterName()),
PD->getPropertyAttributes(),
getOrCreateType(PD->getType(), PUnit));
}
@@ -2291,12 +2382,14 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty,
llvm::DIFile *Unit) {
llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit);
int64_t Count = Ty->getNumElements();
- if (Count == 0)
- // If number of elements are not known then this is an unbounded array.
- // Use Count == -1 to express such arrays.
- Count = -1;
- llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(0, Count);
+ llvm::Metadata *Subscript;
+ QualType QTy(Ty, 0);
+ auto SizeExpr = SizeExprCache.find(QTy);
+ if (SizeExpr != SizeExprCache.end())
+ Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond());
+ else
+ Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1);
llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -2353,8 +2446,12 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
}
}
- // FIXME: Verify this is right for VLAs.
- Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
+ auto SizeNode = SizeExprCache.find(EltTy);
+ if (SizeNode != SizeExprCache.end())
+ Subscripts.push_back(
+ DBuilder.getOrCreateSubrange(0, SizeNode->getSecond()));
+ else
+ Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
EltTy = Ty->getElementType();
}
@@ -2422,8 +2519,7 @@ llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
return DBuilder.createQualifiedType(llvm::dwarf::DW_TAG_atomic_type, FromTy);
}
-llvm::DIType* CGDebugInfo::CreateType(const PipeType *Ty,
- llvm::DIFile *U) {
+llvm::DIType *CGDebugInfo::CreateType(const PipeType *Ty, llvm::DIFile *U) {
return getOrCreateType(Ty->getElementType(), U);
}
@@ -2437,7 +2533,7 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
Align = getDeclAlignIfRequired(ED, CGM.getContext());
}
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
bool isImportedFromModule =
DebugTypeExtRefs && ED->isFromASTFile() && ED->getDefinition();
@@ -2460,7 +2556,7 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
StringRef EDName = ED->getName();
llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType(
llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
- 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName);
+ 0, Size, Align, llvm::DINode::FlagFwdDecl, Identifier);
ReplaceMap.emplace_back(
std::piecewise_construct, std::make_tuple(Ty),
@@ -2480,14 +2576,17 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
Align = getDeclAlignIfRequired(ED, CGM.getContext());
}
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
// Create elements for each enumerator.
SmallVector<llvm::Metadata *, 16> Enumerators;
ED = ED->getDefinition();
+ bool IsSigned = ED->getIntegerType()->isSignedIntegerType();
for (const auto *Enum : ED->enumerators()) {
- Enumerators.push_back(DBuilder.createEnumerator(
- Enum->getName(), Enum->getInitVal().getSExtValue()));
+ const auto &InitVal = Enum->getInitVal();
+ auto Value = IsSigned ? InitVal.getSExtValue() : InitVal.getZExtValue();
+ Enumerators.push_back(
+ DBuilder.createEnumerator(Enum->getName(), Value, !IsSigned));
}
// Return a CompositeType for the enum itself.
@@ -2496,11 +2595,10 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
unsigned Line = getLineNumber(ED->getLocation());
llvm::DIScope *EnumContext = getDeclContextDescriptor(ED);
- llvm::DIType *ClassTy =
- ED->isFixed() ? getOrCreateType(ED->getIntegerType(), DefUnit) : nullptr;
+ llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
Line, Size, Align, EltArray, ClassTy,
- FullName);
+ Identifier, ED->isFixed());
}
llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
@@ -2585,10 +2683,10 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) {
// Unwrap the type as needed for debug information.
Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
- auto it = TypeCache.find(Ty.getAsOpaquePtr());
- if (it != TypeCache.end()) {
+ auto It = TypeCache.find(Ty.getAsOpaquePtr());
+ if (It != TypeCache.end()) {
// Verify that the debug info still exists.
- if (llvm::Metadata *V = it->second)
+ if (llvm::Metadata *V = It->second)
return cast<llvm::DIType>(V);
}
@@ -2623,7 +2721,7 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
return T;
llvm::DIType *Res = CreateTypeNode(Ty, Unit);
- void* TyPtr = Ty.getAsOpaquePtr();
+ void *TyPtr = Ty.getAsOpaquePtr();
// And update the type cache.
TypeCache[TyPtr].reset(Res);
@@ -2801,11 +2899,24 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
uint64_t Size = CGM.getContext().getTypeSize(Ty);
auto Align = getDeclAlignIfRequired(D, CGM.getContext());
- SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+ SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
+
+ // Explicitly record the calling convention for C++ records.
+ auto Flags = llvm::DINode::FlagZero;
+ if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect)
+ Flags |= llvm::DINode::FlagTypePassByReference;
+ else
+ Flags |= llvm::DINode::FlagTypePassByValue;
+
+ // Record if a C++ record is trivial type.
+ if (CXXRD->isTrivial())
+ Flags |= llvm::DINode::FlagTrivial;
+ }
llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align,
- llvm::DINode::FlagZero, FullName);
+ Flags, Identifier);
// Elements of composite types usually have back to the type, creating
// uniquing cycles. Distinct nodes are more efficient.
@@ -2819,14 +2930,14 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
// so they don't tend to be involved in uniquing cycles and there is some
// chance of merging them when linking together two modules. Only make
// them distinct if they are ODR-uniqued.
- if (FullName.empty())
+ if (Identifier.empty())
break;
LLVM_FALLTHROUGH;
case llvm::dwarf::DW_TAG_structure_type:
case llvm::dwarf::DW_TAG_union_type:
case llvm::dwarf::DW_TAG_class_type:
- // Immediatley resolve to a distinct node.
+ // Immediately resolve to a distinct node.
RealDecl =
llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl));
break;
@@ -2901,10 +3012,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
if (DebugKind >= codegenoptions::LimitedDebugInfo) {
if (const NamespaceDecl *NSDecl =
- dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
+ dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
FDContext = getOrCreateNamespace(NSDecl);
else if (const RecordDecl *RDecl =
- dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
+ dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
llvm::DIScope *Mod = getParentModuleOrNull(RDecl);
FDContext = getContextDescriptor(RDecl, Mod ? Mod : TheCU);
}
@@ -2931,8 +3042,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
llvm::APInt ConstVal(32, 1);
QualType ET = CGM.getContext().getAsArrayType(T)->getElementType();
- T = CGM.getContext().getConstantArrayType(ET, ConstVal,
- ArrayType::Normal, 0);
+ T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal,
+ 0);
}
Name = VD->getName();
@@ -2959,8 +3070,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
if (DC->isRecord())
DC = CGM.getContext().getTranslationUnitDecl();
- llvm::DIScope *Mod = getParentModuleOrNull(VD);
- VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
+ llvm::DIScope *Mod = getParentModuleOrNull(VD);
+ VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
}
llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
@@ -2972,8 +3083,8 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
- collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext,
- TParamsArray, Flags);
+ collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray,
+ Flags);
auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
// Build function type.
@@ -2999,20 +3110,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
!FD->isExternallyVisible(),
/* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
TParamsArray.get(), getFunctionDeclaration(FD));
- const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
+ const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
std::make_tuple(CanonDecl),
std::make_tuple(SP));
return SP;
}
-llvm::DISubprogram *
-CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
+llvm::DISubprogram *CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
return getFunctionFwdDeclOrStub(GD, /* Stub = */ false);
}
-llvm::DISubprogram *
-CGDebugInfo::getFunctionStub(GlobalDecl GD) {
+llvm::DISubprogram *CGDebugInfo::getFunctionStub(GlobalDecl GD) {
return getFunctionFwdDeclOrStub(GD, /* Stub = */ true);
}
@@ -3136,7 +3245,8 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
if (FPT->getNumParams() > 1)
SelfDeclTy = FPT->getParamType(0);
if (!SelfDeclTy.isNull())
- Elts.push_back(CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
+ Elts.push_back(
+ CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
// "_cmd" pointer is always second argument.
Elts.push_back(DBuilder.createArtificialType(
getOrCreateType(CGM.getContext().getObjCSelType(), F)));
@@ -3172,7 +3282,8 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
SourceLocation ScopeLoc, QualType FnType,
- llvm::Function *Fn, CGBuilderTy &Builder) {
+ llvm::Function *Fn, bool CurFuncIsThunk,
+ CGBuilderTy &Builder) {
StringRef Name;
StringRef LinkageName;
@@ -3213,11 +3324,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (Name.startswith("\01"))
Name = Name.substr(1);
- if (!HasDecl || D->isImplicit()) {
+ if (!HasDecl || D->isImplicit() || D->hasAttr<ArtificialAttr>()) {
Flags |= llvm::DINode::FlagArtificial;
// Artificial functions should not silently reuse CurLoc.
CurLoc = SourceLocation();
}
+
+ if (CurFuncIsThunk)
+ Flags |= llvm::DINode::FlagThunk;
+
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = getLineNumber(ScopeLoc);
@@ -3238,6 +3353,27 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (HasDecl && isa<FunctionDecl>(D))
DeclCache[D->getCanonicalDecl()].reset(SP);
+ if (CGM.getCodeGenOpts().DwarfVersion >= 5) {
+ // Starting with DWARF V5 method declarations are emitted as children of
+ // the interface type.
+ if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) {
+ const ObjCInterfaceDecl *ID = OMD->getClassInterface();
+ QualType QTy(ID->getTypeForDecl(), 0);
+ auto It = TypeCache.find(QTy.getAsOpaquePtr());
+ if (It != TypeCache.end()) {
+ llvm::DICompositeType *InterfaceDecl =
+ cast<llvm::DICompositeType>(It->second);
+ llvm::DISubprogram *FD = DBuilder.createFunction(
+ InterfaceDecl, Name, LinkageName, Unit, LineNo,
+ getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
+ false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ TParamsArray.get());
+ DBuilder.finalizeSubprogram(FD);
+ ObjCMethodCache[ID].push_back(FD);
+ }
+ }
+ }
+
// Push the function onto the lexical block stack.
LexicalBlockStack.emplace_back(SP);
@@ -3330,8 +3466,7 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
}
void CGDebugInfo::AppendAddressSpaceXDeref(
- unsigned AddressSpace,
- SmallVectorImpl<int64_t> &Expr) const {
+ unsigned AddressSpace, SmallVectorImpl<int64_t> &Expr) const {
Optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(AddressSpace);
if (!DWARFAddressSpace)
@@ -3463,13 +3598,14 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
nullptr, Elements);
}
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
- llvm::Optional<unsigned> ArgNo,
- CGBuilderTy &Builder) {
+llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
+ llvm::Value *Storage,
+ llvm::Optional<unsigned> ArgNo,
+ CGBuilderTy &Builder) {
assert(DebugKind >= codegenoptions::LimitedDebugInfo);
assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
if (VD->hasAttr<NoDebugAttr>())
- return;
+ return nullptr;
bool Unwritten =
VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) &&
@@ -3487,7 +3623,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
// If there is no debug info for this type then do not emit debug info
// for this variable.
if (!Ty)
- return;
+ return nullptr;
// Get location information.
unsigned Line = 0;
@@ -3538,15 +3674,15 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
} else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
// If VD is an anonymous union then Storage represents value for
// all union fields.
- const auto *RD = cast<RecordDecl>(RT->getDecl());
+ const RecordDecl *RD = RT->getDecl();
if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
// GDB has trouble finding local variables in anonymous unions, so we emit
- // artifical local variables for each of the members.
+ // artificial local variables for each of the members.
//
// FIXME: Remove this code as soon as GDB supports this.
// The debug info verifier in LLVM operates based on the assumption that a
- // variable has the same size as its storage and we had to disable the check
- // for artificial variables.
+ // variable has the same size as its storage and we had to disable the
+ // check for artificial variables.
for (const auto *Field : RD->fields()) {
llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
StringRef FieldName = Field->getName();
@@ -3571,25 +3707,26 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
}
// Create the descriptor for the variable.
- auto *D = ArgNo
- ? DBuilder.createParameterVariable(
- Scope, Name, *ArgNo, Unit, Line, Ty,
- CGM.getLangOpts().Optimize, Flags)
- : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty,
- CGM.getLangOpts().Optimize, Flags,
- Align);
+ auto *D = ArgNo ? DBuilder.createParameterVariable(
+ Scope, Name, *ArgNo, Unit, Line, Ty,
+ CGM.getLangOpts().Optimize, Flags)
+ : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty,
+ CGM.getLangOpts().Optimize,
+ Flags, Align);
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
Builder.GetInsertBlock());
+
+ return D;
}
-void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
- llvm::Value *Storage,
- CGBuilderTy &Builder) {
+llvm::DILocalVariable *
+CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage,
+ CGBuilderTy &Builder) {
assert(DebugKind >= codegenoptions::LimitedDebugInfo);
- EmitDeclare(VD, Storage, llvm::None, Builder);
+ return EmitDeclare(VD, Storage, llvm::None, Builder);
}
llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
@@ -3686,7 +3823,7 @@ struct BlockLayoutChunk {
bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) {
return l.OffsetInBits < r.OffsetInBits;
}
-}
+} // namespace
void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
StringRef Name,
@@ -3725,9 +3862,10 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
blockLayout->getElementOffsetInBits(3),
tunit, tunit));
fields.push_back(createFieldType(
- "__descriptor", C.getPointerType(block.NeedsCopyDispose
- ? C.getBlockDescriptorExtendedType()
- : C.getBlockDescriptorType()),
+ "__descriptor",
+ C.getPointerType(block.NeedsCopyDispose
+ ? C.getBlockDescriptorExtendedType()
+ : C.getBlockDescriptorType()),
loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
// We want to sort the captures by offset, not because DWARF
@@ -3806,8 +3944,8 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
}
SmallString<36> typeName;
- llvm::raw_svector_ostream(typeName) << "__block_literal_"
- << CGM.getUniqueBlockCount();
+ llvm::raw_svector_ostream(typeName)
+ << "__block_literal_" << CGM.getUniqueBlockCount();
llvm::DINodeArray fieldsArray = DBuilder.getOrCreateArray(fields);
@@ -3823,8 +3961,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
// Create the descriptor for the parameter.
auto *debugVar = DBuilder.createParameterVariable(
- scope, Name, ArgNo, tunit, line, type,
- CGM.getLangOpts().Optimize, flags);
+ scope, Name, ArgNo, tunit, line, type, CGM.getLangOpts().Optimize, flags);
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(),
@@ -3863,7 +4000,7 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls(
if (FieldName.empty()) {
if (const auto *RT = dyn_cast<RecordType>(Field->getType()))
GVE = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
- Var, DContext);
+ Var, DContext);
continue;
}
// Use VarDecl's Tag, Scope and Line number.
@@ -4090,7 +4227,6 @@ void CGDebugInfo::setDwoId(uint64_t Signature) {
TheCU->setDWOId(Signature);
}
-
void CGDebugInfo::finalize() {
// Creating types might create further types - invalidating the current
// element and the size(), so don't cache/reference them.
@@ -4102,32 +4238,55 @@ void CGDebugInfo::finalize() {
DBuilder.replaceTemporary(llvm::TempDIType(E.Decl), Ty);
}
- for (auto p : ReplaceMap) {
- assert(p.second);
- auto *Ty = cast<llvm::DIType>(p.second);
+ if (CGM.getCodeGenOpts().DwarfVersion >= 5) {
+ // Add methods to interface.
+ for (const auto &P : ObjCMethodCache) {
+ if (P.second.empty())
+ continue;
+
+ QualType QTy(P.first->getTypeForDecl(), 0);
+ auto It = TypeCache.find(QTy.getAsOpaquePtr());
+ assert(It != TypeCache.end());
+
+ llvm::DICompositeType *InterfaceDecl =
+ cast<llvm::DICompositeType>(It->second);
+
+ SmallVector<llvm::Metadata *, 16> EltTys;
+ auto CurrenetElts = InterfaceDecl->getElements();
+ EltTys.append(CurrenetElts.begin(), CurrenetElts.end());
+ for (auto &MD : P.second)
+ EltTys.push_back(MD);
+ llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
+ DBuilder.replaceArrays(InterfaceDecl, Elements);
+ }
+ }
+
+ for (const auto &P : ReplaceMap) {
+ assert(P.second);
+ auto *Ty = cast<llvm::DIType>(P.second);
assert(Ty->isForwardDecl());
- auto it = TypeCache.find(p.first);
- assert(it != TypeCache.end());
- assert(it->second);
+ auto It = TypeCache.find(P.first);
+ assert(It != TypeCache.end());
+ assert(It->second);
DBuilder.replaceTemporary(llvm::TempDIType(Ty),
- cast<llvm::DIType>(it->second));
+ cast<llvm::DIType>(It->second));
}
- for (const auto &p : FwdDeclReplaceMap) {
- assert(p.second);
- llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(p.second));
+ for (const auto &P : FwdDeclReplaceMap) {
+ assert(P.second);
+ llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(P.second));
llvm::Metadata *Repl;
- auto it = DeclCache.find(p.first);
+ auto It = DeclCache.find(P.first);
// If there has been no definition for the declaration, call RAUW
// with ourselves, that will destroy the temporary MDNode and
// replace it with a standard one, avoiding leaking memory.
- if (it == DeclCache.end())
- Repl = p.second;
+ if (It == DeclCache.end())
+ Repl = P.second;
else
- Repl = it->second;
+ Repl = It->second;
if (auto *GVE = dyn_cast_or_null<llvm::DIGlobalVariableExpression>(Repl))
Repl = GVE->getVariable();
@@ -4157,6 +4316,5 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) {
return llvm::DebugLoc();
llvm::MDNode *Scope = LexicalBlockStack.back();
- return llvm::DebugLoc::get(
- getLineNumber(Loc), getColumnNumber(Loc), Scope);
+ return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope);
}
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 4f7b7f2a0d9c..e632806138f0 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -19,6 +19,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/Type.h"
+#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/DenseMap.h"
@@ -66,7 +67,7 @@ class CGDebugInfo {
llvm::DIType *ClassTy = nullptr;
llvm::DICompositeType *ObjTy = nullptr;
llvm::DIType *SelTy = nullptr;
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
llvm::DIType *SingletonId = nullptr;
#include "clang/Basic/OpenCLImageTypes.def"
llvm::DIType *OCLSamplerDITy = nullptr;
@@ -81,6 +82,10 @@ class CGDebugInfo {
llvm::SmallDenseMap<llvm::StringRef, llvm::StringRef> DebugPrefixMap;
+ /// Cache that maps VLA types to size expressions for that type,
+ /// represented by instantiated Metadata nodes.
+ llvm::SmallDenseMap<QualType, llvm::Metadata *> SizeExprCache;
+
struct ObjCInterfaceCacheEntry {
const ObjCInterfaceType *Type;
llvm::DIType *Decl;
@@ -93,6 +98,10 @@ class CGDebugInfo {
/// Cache of previously constructed interfaces which may change.
llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
+ /// Cache of forward declarations for methods belonging to the interface.
+ llvm::DenseMap<const ObjCInterfaceDecl *, std::vector<llvm::DISubprogram *>>
+ ObjCMethodCache;
+
/// Cache of references to clang modules and precompiled headers.
llvm::DenseMap<const Module *, llvm::TrackingMDRef> ModuleCache;
@@ -223,12 +232,12 @@ class CGDebugInfo {
/// Helper function for CollectCXXBases.
/// Adds debug info entries for types in Bases that are not in SeenTypes.
- void CollectCXXBasesAux(const CXXRecordDecl *RD, llvm::DIFile *Unit,
- SmallVectorImpl<llvm::Metadata *> &EltTys,
- llvm::DIType *RecordTy,
- const CXXRecordDecl::base_class_const_range &Bases,
- llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes,
- llvm::DINode::DIFlags StartingFlags);
+ void CollectCXXBasesAux(
+ const CXXRecordDecl *RD, llvm::DIFile *Unit,
+ SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DIType *RecordTy,
+ const CXXRecordDecl::base_class_const_range &Bases,
+ llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes,
+ llvm::DINode::DIFlags StartingFlags);
/// A helper function to collect template parameters.
llvm::DINodeArray CollectTemplateParams(const TemplateParameterList *TPList,
@@ -247,8 +256,7 @@ class CGDebugInfo {
llvm::DIType *createFieldType(StringRef name, QualType type,
SourceLocation loc, AccessSpecifier AS,
- uint64_t offsetInBits,
- uint32_t AlignInBits,
+ uint64_t offsetInBits, uint32_t AlignInBits,
llvm::DIFile *tunit, llvm::DIScope *scope,
const RecordDecl *RD = nullptr);
@@ -309,6 +317,11 @@ public:
void finalize();
+ /// Register VLA size expression debug node with the qualified type.
+ void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) {
+ SizeExprCache[Ty] = SizeExpr;
+ }
+
/// Module debugging: Support for building PCMs.
/// @{
/// Set the main CU's DwoId field to \p Signature.
@@ -356,7 +369,8 @@ public:
/// \param ScopeLoc The location of the function body.
void EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
SourceLocation ScopeLoc, QualType FnType,
- llvm::Function *Fn, CGBuilderTy &Builder);
+ llvm::Function *Fn, bool CurFnIsThunk,
+ CGBuilderTy &Builder);
/// Start a new scope for an inlined function.
void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD);
@@ -379,16 +393,17 @@ public:
/// Emit call to \c llvm.dbg.declare for an automatic variable
/// declaration.
- void EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI,
- CGBuilderTy &Builder);
+ /// Returns a pointer to the DILocalVariable associated with the
+ /// llvm.dbg.declare, or nullptr otherwise.
+ llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl,
+ llvm::Value *AI,
+ CGBuilderTy &Builder);
/// Emit call to \c llvm.dbg.declare for an imported variable
/// declaration in a block.
- void EmitDeclareOfBlockDeclRefVariable(const VarDecl *variable,
- llvm::Value *storage,
- CGBuilderTy &Builder,
- const CGBlockInfo &blockInfo,
- llvm::Instruction *InsertPoint = nullptr);
+ void EmitDeclareOfBlockDeclRefVariable(
+ const VarDecl *variable, llvm::Value *storage, CGBuilderTy &Builder,
+ const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint = nullptr);
/// Emit call to \c llvm.dbg.declare for an argument variable
/// declaration.
@@ -451,10 +466,14 @@ public:
llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent,
SourceLocation LineLoc,
SourceLocation FileLoc);
+
private:
/// Emit call to llvm.dbg.declare for a variable declaration.
- void EmitDeclare(const VarDecl *decl, llvm::Value *AI,
- llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder);
+ /// Returns a pointer to the DILocalVariable associated with the
+ /// llvm.dbg.declare, or nullptr otherwise.
+ llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI,
+ llvm::Optional<unsigned> ArgNo,
+ CGBuilderTy &Builder);
/// Build up structure info for the byref. See \a BuildByRefType.
llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
@@ -482,8 +501,11 @@ private:
std::string remapDIPath(StringRef) const;
/// Compute the file checksum debug info for input file ID.
- llvm::DIFile::ChecksumKind computeChecksum(FileID FID,
- SmallString<32> &Checksum) const;
+ Optional<llvm::DIFile::ChecksumKind>
+ computeChecksum(FileID FID, SmallString<32> &Checksum) const;
+
+ /// Get the source of the given file ID.
+ Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
/// Get the file debug info descriptor for the input location.
llvm::DIFile *getOrCreateFile(SourceLocation Loc);
@@ -637,7 +659,7 @@ public:
~ApplyDebugLocation();
- /// \brief Apply TemporaryLocation if it is valid. Otherwise switch
+ /// Apply TemporaryLocation if it is valid. Otherwise switch
/// to an artificial debug location that has a valid scope, but no
/// line information.
///
@@ -651,7 +673,7 @@ public:
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF) {
return ApplyDebugLocation(CGF, false, SourceLocation());
}
- /// \brief Apply TemporaryLocation if it is valid. Otherwise switch
+ /// Apply TemporaryLocation if it is valid. Otherwise switch
/// to an artificial debug location that has a valid scope, but no
/// line information.
static ApplyDebugLocation
@@ -668,7 +690,6 @@ public:
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF) {
return ApplyDebugLocation(CGF, true, SourceLocation());
}
-
};
/// A scoped helper to set the current debug location to an inlined location.
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 04585a8afbb6..57b2fbadbeec 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -229,18 +229,19 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
LangAS AS = GetGlobalVarAddressSpace(&D);
unsigned TargetAS = getContext().getTargetAddressSpace(AS);
- // Local address space cannot have an initializer.
+ // OpenCL variables in local address space and CUDA shared
+ // variables cannot have an initializer.
llvm::Constant *Init = nullptr;
- if (Ty.getAddressSpace() != LangAS::opencl_local)
- Init = EmitNullConstant(Ty);
- else
+ if (Ty.getAddressSpace() == LangAS::opencl_local ||
+ D.hasAttr<CUDASharedAttr>())
Init = llvm::UndefValue::get(LTy);
+ else
+ Init = EmitNullConstant(Ty);
llvm::GlobalVariable *GV = new llvm::GlobalVariable(
getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,
nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
- setGlobalVisibility(GV, &D, ForDefinition);
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -248,12 +249,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
if (D.getTLSKind())
setTLSMode(GV, D);
- if (D.isExternallyVisible()) {
- if (D.hasAttr<DLLImportAttr>())
- GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
- else if (D.hasAttr<DLLExportAttr>())
- GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
- }
+ setGVProperties(GV, &D);
// Make sure the result is of the correct type.
LangAS ExpectedAS = Ty.getAddressSpace();
@@ -291,8 +287,11 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
// never defer them.
assert(isa<ObjCMethodDecl>(DC) && "unexpected parent code decl");
}
- if (GD.getDecl())
+ if (GD.getDecl()) {
+ // Disable emission of the parent function for the OpenMP device codegen.
+ CGOpenMPRuntime::DisableAutoDeclareTargetRAII NoDeclTarget(*this);
(void)GetAddrOfGlobal(GD);
+ }
return Addr;
}
@@ -344,6 +343,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
OldGV->getThreadLocalMode(),
CGM.getContext().getTargetAddressSpace(D.getType()));
GV->setVisibility(OldGV->getVisibility());
+ GV->setDSOLocal(OldGV->isDSOLocal());
GV->setComdat(OldGV->getComdat());
// Steal the name of the old global
@@ -469,13 +469,11 @@ namespace {
}
};
- struct DestroyNRVOVariable final : EHScopeStack::Cleanup {
- DestroyNRVOVariable(Address addr,
- const CXXDestructorDecl *Dtor,
- llvm::Value *NRVOFlag)
- : Dtor(Dtor), NRVOFlag(NRVOFlag), Loc(addr) {}
+ template <class Derived>
+ struct DestroyNRVOVariable : EHScopeStack::Cleanup {
+ DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag)
+ : NRVOFlag(NRVOFlag), Loc(addr) {}
- const CXXDestructorDecl *Dtor;
llvm::Value *NRVOFlag;
Address Loc;
@@ -494,12 +492,39 @@ namespace {
CGF.EmitBlock(RunDtorBB);
}
+ static_cast<Derived *>(this)->emitDestructorCall(CGF);
+
+ if (NRVO) CGF.EmitBlock(SkipDtorBB);
+ }
+
+ virtual ~DestroyNRVOVariable() = default;
+ };
+
+ struct DestroyNRVOVariableCXX final
+ : DestroyNRVOVariable<DestroyNRVOVariableCXX> {
+ DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor,
+ llvm::Value *NRVOFlag)
+ : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag),
+ Dtor(Dtor) {}
+
+ const CXXDestructorDecl *Dtor;
+
+ void emitDestructorCall(CodeGenFunction &CGF) {
CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
/*ForVirtualBase=*/false,
- /*Delegating=*/false,
- Loc);
+ /*Delegating=*/false, Loc);
+ }
+ };
- if (NRVO) CGF.EmitBlock(SkipDtorBB);
+ struct DestroyNRVOVariableC final
+ : DestroyNRVOVariable<DestroyNRVOVariableC> {
+ DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty)
+ : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {}
+
+ QualType Ty;
+
+ void emitDestructorCall(CodeGenFunction &CGF) {
+ CGF.destroyNonTrivialCStruct(CGF, Loc, Ty);
}
};
@@ -821,11 +846,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
}
-/// canEmitInitWithFewStoresAfterMemset - Decide whether we can emit the
-/// non-zero parts of the specified initializer with equal or fewer than
-/// NumStores scalar stores.
-static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
- unsigned &NumStores) {
+/// Decide whether we can emit the non-zero parts of the specified initializer
+/// with equal or fewer than NumStores scalar stores.
+static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
+ unsigned &NumStores) {
// Zero and Undef never requires any extra stores.
if (isa<llvm::ConstantAggregateZero>(Init) ||
isa<llvm::ConstantPointerNull>(Init) ||
@@ -840,7 +864,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
if (isa<llvm::ConstantArray>(Init) || isa<llvm::ConstantStruct>(Init)) {
for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i));
- if (!canEmitInitWithFewStoresAfterMemset(Elt, NumStores))
+ if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores))
return false;
}
return true;
@@ -850,7 +874,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
dyn_cast<llvm::ConstantDataSequential>(Init)) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
llvm::Constant *Elt = CDS->getElementAsConstant(i);
- if (!canEmitInitWithFewStoresAfterMemset(Elt, NumStores))
+ if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores))
return false;
}
return true;
@@ -860,18 +884,18 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
return false;
}
-/// emitStoresForInitAfterMemset - For inits that
-/// canEmitInitWithFewStoresAfterMemset returned true for, emit the scalar
-/// stores that would be required.
-static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
- bool isVolatile, CGBuilderTy &Builder) {
+/// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
+/// the scalar stores that would be required.
+static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
+ llvm::Constant *Init, Address Loc,
+ bool isVolatile, CGBuilderTy &Builder) {
assert(!Init->isNullValue() && !isa<llvm::UndefValue>(Init) &&
- "called emitStoresForInitAfterMemset for zero or undef value.");
+ "called emitStoresForInitAfterBZero for zero or undef value.");
if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
isa<llvm::ConstantExpr>(Init)) {
- Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile);
+ Builder.CreateStore(Init, Loc, isVolatile);
return;
}
@@ -882,8 +906,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
// If necessary, get a pointer to the element and emit it.
if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
- emitStoresForInitAfterMemset(
- Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i),
+ emitStoresForInitAfterBZero(
+ CGM, Elt,
+ Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()),
isVolatile, Builder);
}
return;
@@ -897,19 +922,19 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
// If necessary, get a pointer to the element and emit it.
if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
- emitStoresForInitAfterMemset(
- Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i),
+ emitStoresForInitAfterBZero(
+ CGM, Elt,
+ Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()),
isVolatile, Builder);
}
}
-/// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset
-/// plus some stores to initialize a local variable instead of using a memcpy
-/// from a constant global. It is beneficial to use memset if the global is all
-/// zeros, or mostly zeros and large.
-static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
- uint64_t GlobalSize) {
- // If a global is all zeros, always use a memset.
+/// Decide whether we should use bzero plus some stores to initialize a local
+/// variable instead of using a memcpy from a constant global. It is beneficial
+/// to use bzero if the global is all zeros, or mostly zeros and large.
+static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init,
+ uint64_t GlobalSize) {
+ // If a global is all zeros, always use a bzero.
if (isa<llvm::ConstantAggregateZero>(Init)) return true;
// If a non-zero global is <= 32 bytes, always use a memcpy. If it is large,
@@ -920,7 +945,114 @@ static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
uint64_t SizeLimit = 32;
return GlobalSize > SizeLimit &&
- canEmitInitWithFewStoresAfterMemset(Init, StoreBudget);
+ canEmitInitWithFewStoresAfterBZero(Init, StoreBudget);
+}
+
+/// A byte pattern.
+///
+/// Can be "any" pattern if the value was padding or known to be undef.
+/// Can be "none" pattern if a sequence doesn't exist.
+class BytePattern {
+ uint8_t Val;
+ enum class ValueType : uint8_t { Specific, Any, None } Type;
+ BytePattern(ValueType Type) : Type(Type) {}
+
+public:
+ BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {}
+ static BytePattern Any() { return BytePattern(ValueType::Any); }
+ static BytePattern None() { return BytePattern(ValueType::None); }
+ bool isAny() const { return Type == ValueType::Any; }
+ bool isNone() const { return Type == ValueType::None; }
+ bool isValued() const { return Type == ValueType::Specific; }
+ uint8_t getValue() const {
+ assert(isValued());
+ return Val;
+ }
+ BytePattern merge(const BytePattern Other) const {
+ if (isNone() || Other.isNone())
+ return None();
+ if (isAny())
+ return Other;
+ if (Other.isAny())
+ return *this;
+ if (getValue() == Other.getValue())
+ return *this;
+ return None();
+ }
+};
+
+/// Figures out whether the constant can be initialized with memset.
+static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) {
+ if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C))
+ return BytePattern(0x00);
+ if (isa<llvm::UndefValue>(C))
+ return BytePattern::Any();
+
+ if (isa<llvm::ConstantInt>(C)) {
+ auto *Int = cast<llvm::ConstantInt>(C);
+ if (Int->getBitWidth() % 8 != 0)
+ return BytePattern::None();
+ const llvm::APInt &Value = Int->getValue();
+ if (Value.isSplat(8))
+ return BytePattern(Value.getLoBits(8).getLimitedValue());
+ return BytePattern::None();
+ }
+
+ if (isa<llvm::ConstantFP>(C)) {
+ auto *FP = cast<llvm::ConstantFP>(C);
+ llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt();
+ if (Bits.getBitWidth() % 8 != 0)
+ return BytePattern::None();
+ if (!Bits.isSplat(8))
+ return BytePattern::None();
+ return BytePattern(Bits.getLimitedValue() & 0xFF);
+ }
+
+ if (isa<llvm::ConstantVector>(C)) {
+ llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue();
+ if (Splat)
+ return constantIsRepeatedBytePattern(Splat);
+ return BytePattern::None();
+ }
+
+ if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) {
+ BytePattern Pattern(BytePattern::Any());
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
+ llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I));
+ Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
+ if (Pattern.isNone())
+ return Pattern;
+ }
+ return Pattern;
+ }
+
+ if (llvm::ConstantDataSequential *CDS =
+ dyn_cast<llvm::ConstantDataSequential>(C)) {
+ BytePattern Pattern(BytePattern::Any());
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ llvm::Constant *Elt = CDS->getElementAsConstant(I);
+ Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
+ if (Pattern.isNone())
+ return Pattern;
+ }
+ return Pattern;
+ }
+
+ // BlockAddress, ConstantExpr, and everything else is scary.
+ return BytePattern::None();
+}
+
+/// Decide whether we should use memset to initialize a local variable instead
+/// of using a memcpy from a constant global. Assumes we've already decided to
+/// not user bzero.
+/// FIXME We could be more clever, as we are for bzero above, and generate
+/// memset followed by stores. It's unclear that's worth the effort.
+static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init,
+ uint64_t GlobalSize) {
+ uint64_t SizeLimit = 32;
+ if (GlobalSize <= SizeLimit)
+ return BytePattern::None();
+ return constantIsRepeatedBytePattern(Init);
}
/// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
@@ -940,6 +1072,9 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
if (!ShouldEmitLifetimeMarkers)
return nullptr;
+ assert(Addr->getType()->getPointerAddressSpace() ==
+ CGM.getDataLayout().getAllocaAddrSpace() &&
+ "Pointer should be in alloca address space");
llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
@@ -949,12 +1084,68 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
}
void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
+ assert(Addr->getType()->getPointerAddressSpace() ==
+ CGM.getDataLayout().getAllocaAddrSpace() &&
+ "Pointer should be in alloca address space");
Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr});
C->setDoesNotThrow();
}
+void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
+ CGDebugInfo *DI, const VarDecl &D, bool EmitDebugInfo) {
+ // For each dimension stores its QualType and corresponding
+ // size-expression Value.
+ SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions;
+
+ // Break down the array into individual dimensions.
+ QualType Type1D = D.getType();
+ while (getContext().getAsVariableArrayType(Type1D)) {
+ auto VlaSize = getVLAElements1D(Type1D);
+ if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
+ Dimensions.emplace_back(C, Type1D.getUnqualifiedType());
+ else {
+ auto SizeExprAddr = CreateDefaultAlignTempAlloca(
+ VlaSize.NumElts->getType(), "__vla_expr");
+ Builder.CreateStore(VlaSize.NumElts, SizeExprAddr);
+ Dimensions.emplace_back(SizeExprAddr.getPointer(),
+ Type1D.getUnqualifiedType());
+ }
+ Type1D = VlaSize.Type;
+ }
+
+ if (!EmitDebugInfo)
+ return;
+
+ // Register each dimension's size-expression with a DILocalVariable,
+ // so that it can be used by CGDebugInfo when instantiating a DISubrange
+ // to describe this array.
+ for (auto &VlaSize : Dimensions) {
+ llvm::Metadata *MD;
+ if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
+ MD = llvm::ConstantAsMetadata::get(C);
+ else {
+ // Create an artificial VarDecl to generate debug info for.
+ IdentifierInfo &NameIdent = getContext().Idents.getOwn(
+ cast<llvm::AllocaInst>(VlaSize.NumElts)->getName());
+ auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType();
+ auto QT = getContext().getIntTypeForBitwidth(
+ VlaExprTy->getScalarSizeInBits(), false);
+ auto *ArtificialDecl = VarDecl::Create(
+ getContext(), const_cast<DeclContext *>(D.getDeclContext()),
+ D.getLocation(), D.getLocation(), &NameIdent, QT,
+ getContext().CreateTypeSourceInfo(QT), SC_Auto);
+ ArtificialDecl->setImplicit();
+
+ MD = DI->EmitDeclareOfAutoVariable(ArtificialDecl, VlaSize.NumElts,
+ Builder);
+ }
+ assert(MD && "No Size expression debug node created");
+ DI->registerVLASizeExpression(VlaSize.Type, MD);
+ }
+}
+
/// EmitAutoVarAlloca - Emit the alloca and debug information for a
/// local variable. Does not emit initialization or destruction.
CodeGenFunction::AutoVarEmission
@@ -975,7 +1166,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if (Ty->isVariablyModifiedType())
EmitVariablyModifiedType(Ty);
+ auto *DI = getDebugInfo();
+ bool EmitDebugInfo = DI && CGM.getCodeGenOpts().getDebugInfo() >=
+ codegenoptions::LimitedDebugInfo;
+
Address address = Address::invalid();
+ Address AllocaAddr = Address::invalid();
if (Ty->isConstantSizeType()) {
bool NRVO = getLangOpts().ElideConstructors &&
D.isNRVOVariable();
@@ -1016,16 +1212,27 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
}
// A normal fixed sized variable becomes an alloca in the entry block,
- // unless it's an NRVO variable.
-
- if (NRVO) {
+ // unless:
+ // - it's an NRVO variable.
+ // - we are compiling OpenMP and it's an OpenMP local variable.
+
+ Address OpenMPLocalAddr =
+ getLangOpts().OpenMP
+ ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+ : Address::invalid();
+ if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
+ address = OpenMPLocalAddr;
+ } else if (NRVO) {
// The named return value optimization: allocate this variable in the
// return slot, so that we can elide the copy when returning this
// variable (C++0x [class.copy]p34).
address = ReturnValue;
if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
- if (!cast<CXXRecordDecl>(RecordTy->getDecl())->hasTrivialDestructor()) {
+ const auto *RD = RecordTy->getDecl();
+ const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
+ if ((CXXRD && !CXXRD->hasTrivialDestructor()) ||
+ RD->isNonTrivialToPrimitiveDestroy()) {
// Create a flag that is used to indicate when the NRVO was applied
// to this variable. Set it to zero to indicate that NRVO was not
// applied.
@@ -1055,7 +1262,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Create the alloca. Note that we set the name separately from
// building the instruction so that it's there even in no-asserts
// builds.
- address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
+ address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(),
+ /*ArraySize=*/nullptr, &AllocaAddr);
// Don't emit lifetime markers for MSVC catch parameters. The lifetime of
// the catch parameter starts in the catchpad instruction, and we can't
@@ -1083,7 +1291,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
!(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) {
uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy);
emission.SizeForLifetimeMarkers =
- EmitLifetimeStart(size, address.getPointer());
+ EmitLifetimeStart(size, AllocaAddr.getPointer());
}
} else {
assert(!emission.useLifetimeMarkers());
@@ -1108,28 +1316,28 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
pushStackRestore(NormalCleanup, Stack);
}
- llvm::Value *elementCount;
- QualType elementType;
- std::tie(elementCount, elementType) = getVLASize(Ty);
-
- llvm::Type *llvmTy = ConvertTypeForMem(elementType);
+ auto VlaSize = getVLASize(Ty);
+ llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type);
// Allocate memory for the array.
- address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
+ address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts,
+ &AllocaAddr);
+
+ // If we have debug info enabled, properly describe the VLA dimensions for
+ // this type by registering the vla size expression for each of the
+ // dimensions.
+ EmitAndRegisterVariableArrayDimensions(DI, D, EmitDebugInfo);
}
setAddrOfLocalVar(&D, address);
emission.Addr = address;
+ emission.AllocaAddr = AllocaAddr;
// Emit debug info for local var declaration.
- if (HaveInsertPoint())
- if (CGDebugInfo *DI = getDebugInfo()) {
- if (CGM.getCodeGenOpts().getDebugInfo() >=
- codegenoptions::LimitedDebugInfo) {
- DI->setLocation(D.getLocation());
- DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
- }
- }
+ if (EmitDebugInfo && HaveInsertPoint()) {
+ DI->setLocation(D.getLocation());
+ (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
+ }
if (D.hasAttr<AnnotateAttr>())
EmitVarAnnotations(&D, address.getPointer());
@@ -1137,23 +1345,36 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Make sure we call @llvm.lifetime.end.
if (emission.useLifetimeMarkers())
EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
- emission.getAllocatedAddress(),
+ emission.getOriginalAllocatedAddress(),
emission.getSizeForLifetimeMarkers());
return emission;
}
+static bool isCapturedBy(const VarDecl &, const Expr *);
+
+/// Determines whether the given __block variable is potentially
+/// captured by the given statement.
+static bool isCapturedBy(const VarDecl &Var, const Stmt *S) {
+ if (const Expr *E = dyn_cast<Expr>(S))
+ return isCapturedBy(Var, E);
+ for (const Stmt *SubStmt : S->children())
+ if (isCapturedBy(Var, SubStmt))
+ return true;
+ return false;
+}
+
/// Determines whether the given __block variable is potentially
/// captured by the given expression.
-static bool isCapturedBy(const VarDecl &var, const Expr *e) {
+static bool isCapturedBy(const VarDecl &Var, const Expr *E) {
// Skip the most common kinds of expressions that make
// hierarchy-walking expensive.
- e = e->IgnoreParenCasts();
+ E = E->IgnoreParenCasts();
- if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) {
- const BlockDecl *block = be->getBlockDecl();
- for (const auto &I : block->captures()) {
- if (I.getVariable() == &var)
+ if (const BlockExpr *BE = dyn_cast<BlockExpr>(E)) {
+ const BlockDecl *Block = BE->getBlockDecl();
+ for (const auto &I : Block->captures()) {
+ if (I.getVariable() == &Var)
return true;
}
@@ -1161,19 +1382,19 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
return false;
}
- if (const StmtExpr *SE = dyn_cast<StmtExpr>(e)) {
+ if (const StmtExpr *SE = dyn_cast<StmtExpr>(E)) {
const CompoundStmt *CS = SE->getSubStmt();
for (const auto *BI : CS->body())
- if (const auto *E = dyn_cast<Expr>(BI)) {
- if (isCapturedBy(var, E))
- return true;
+ if (const auto *BIE = dyn_cast<Expr>(BI)) {
+ if (isCapturedBy(Var, BIE))
+ return true;
}
else if (const auto *DS = dyn_cast<DeclStmt>(BI)) {
// special case declarations
for (const auto *I : DS->decls()) {
if (const auto *VD = dyn_cast<VarDecl>((I))) {
const Expr *Init = VD->getInit();
- if (Init && isCapturedBy(var, Init))
+ if (Init && isCapturedBy(Var, Init))
return true;
}
}
@@ -1185,14 +1406,14 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
return false;
}
- for (const Stmt *SubStmt : e->children())
- if (isCapturedBy(var, cast<Expr>(SubStmt)))
+ for (const Stmt *SubStmt : E->children())
+ if (isCapturedBy(Var, SubStmt))
return true;
return false;
}
-/// \brief Determine whether the given initializer is trivial in the sense
+/// Determine whether the given initializer is trivial in the sense
/// that it requires no code to be generated.
bool CodeGenFunction::isTrivialInitializer(const Expr *Init) {
if (!Init)
@@ -1232,6 +1453,19 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
if (emission.IsByRef)
emitByrefStructureInit(emission);
+ // Initialize the variable here if it doesn't have a initializer and it is a
+ // C struct that is non-trivial to initialize or an array containing such a
+ // struct.
+ if (!Init &&
+ type.isNonTrivialToPrimitiveDefaultInitialize() ==
+ QualType::PDIK_Struct) {
+ LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type);
+ if (emission.IsByRef)
+ drillIntoBlockVariable(*this, Dst, &D);
+ defaultInitNonTrivialCStructVar(Dst);
+ return;
+ }
+
if (isTrivialInitializer(Init))
return;
@@ -1270,58 +1504,66 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
llvm::ConstantInt::get(IntPtrTy,
getContext().getTypeSizeInChars(type).getQuantity());
- llvm::Type *BP = AllocaInt8PtrTy;
+ llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace());
if (Loc.getType() != BP)
Loc = Builder.CreateBitCast(Loc, BP);
- // If the initializer is all or mostly zeros, codegen with memset then do
- // a few stores afterward.
- if (shouldUseMemSetPlusStoresToInitialize(constant,
- CGM.getDataLayout().getTypeAllocSize(constant->getType()))) {
+ // If the initializer is all or mostly the same, codegen with bzero / memset
+ // then do a few stores afterward.
+ uint64_t ConstantSize =
+ CGM.getDataLayout().getTypeAllocSize(constant->getType());
+ if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
isVolatile);
// Zero and undef don't require a stores.
if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) {
- Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo());
- emitStoresForInitAfterMemset(constant, Loc.getPointer(),
- isVolatile, Builder);
- }
- } else {
- // Otherwise, create a temporary global with the initializer then
- // memcpy from the global to the alloca.
- std::string Name = getStaticDeclName(CGM, D);
- unsigned AS = 0;
- if (getLangOpts().OpenCL) {
- AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
- BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
+ Loc = Builder.CreateBitCast(Loc,
+ constant->getType()->getPointerTo(Loc.getAddressSpace()));
+ emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
}
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true,
- llvm::GlobalValue::PrivateLinkage,
- constant, Name, nullptr,
- llvm::GlobalValue::NotThreadLocal, AS);
- GV->setAlignment(Loc.getAlignment().getQuantity());
- GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- Address SrcPtr = Address(GV, Loc.getAlignment());
- if (SrcPtr.getType() != BP)
- SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+ return;
+ }
- Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
+ BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
+ if (!Pattern.isNone()) {
+ uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue();
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
+ isVolatile);
+ return;
}
+
+ // Otherwise, create a temporary global with the initializer then
+ // memcpy from the global to the alloca.
+ std::string Name = getStaticDeclName(CGM, D);
+ unsigned AS = CGM.getContext().getTargetAddressSpace(
+ CGM.getStringLiteralAddressSpace());
+ BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
+
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ CGM.getModule(), constant->getType(), true,
+ llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr,
+ llvm::GlobalValue::NotThreadLocal, AS);
+ GV->setAlignment(Loc.getAlignment().getQuantity());
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ Address SrcPtr = Address(GV, Loc.getAlignment());
+ if (SrcPtr.getType() != BP)
+ SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+
+ Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
}
-/// Emit an expression as an initializer for a variable at the given
-/// location. The expression is not necessarily the normal
-/// initializer for the variable, and the address is not necessarily
+/// Emit an expression as an initializer for an object (variable, field, etc.)
+/// at the given location. The expression is not necessarily the normal
+/// initializer for the object, and the address is not necessarily
/// its normal location.
///
/// \param init the initializing expression
-/// \param var the variable to act as if we're initializing
+/// \param D the object to act as if we're initializing
/// \param loc the address to initialize; its type is a pointer
-/// to the LLVM mapping of the variable's type
+/// to the LLVM mapping of the object's type
/// \param alignment the alignment of the address
-/// \param capturedByInit true if the variable is a __block variable
+/// \param capturedByInit true if \p D is a __block variable
/// whose address is potentially changed by the initializer
void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
LValue lvalue, bool capturedByInit) {
@@ -1349,11 +1591,17 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
if (type->isAtomicType()) {
EmitAtomicInit(const_cast<Expr*>(init), lvalue);
} else {
+ AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap;
+ if (isa<VarDecl>(D))
+ Overlap = AggValueSlot::DoesNotOverlap;
+ else if (auto *FD = dyn_cast<FieldDecl>(D))
+ Overlap = overlapForFieldInit(FD);
// TODO: how can we delay here if D is captured by its initializer?
EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ Overlap));
}
return;
}
@@ -1386,8 +1634,8 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
if (emission.NRVOFlag) {
assert(!type->isArrayType());
CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor();
- EHStack.pushCleanup<DestroyNRVOVariable>(cleanupKind, addr,
- dtor, emission.NRVOFlag);
+ EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor,
+ emission.NRVOFlag);
return;
}
break;
@@ -1406,6 +1654,16 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
case QualType::DK_objc_weak_lifetime:
break;
+
+ case QualType::DK_nontrivial_c_struct:
+ destroyer = CodeGenFunction::destroyNonTrivialCStruct;
+ if (emission.NRVOFlag) {
+ assert(!type->isArrayType());
+ EHStack.pushCleanup<DestroyNRVOVariableC>(cleanupKind, addr,
+ emission.NRVOFlag, type);
+ return;
+ }
+ break;
}
// If we haven't chosen a more specific destroyer, use the default.
@@ -1452,9 +1710,15 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
}
// If this is a block variable, call _Block_object_destroy
- // (on the unforwarded address).
- if (emission.IsByRef)
- enterByrefCleanup(emission);
+ // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC
+ // mode.
+ if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+ BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
+ if (emission.Variable->getType().isObjCGCWeak())
+ Flags |= BLOCK_FIELD_IS_WEAK;
+ enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags,
+ /*LoadBlockVarAddr*/ false);
+ }
}
CodeGenFunction::Destroyer *
@@ -1467,6 +1731,8 @@ CodeGenFunction::getDestroyer(QualType::DestructionKind kind) {
return destroyARCStrongPrecise;
case QualType::DK_objc_weak_lifetime:
return destroyARCWeak;
+ case QualType::DK_nontrivial_c_struct:
+ return destroyNonTrivialCStruct;
}
llvm_unreachable("Unknown DestructionKind");
}
@@ -1506,9 +1772,6 @@ void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) {
void CodeGenFunction::pushLifetimeExtendedDestroy(
CleanupKind cleanupKind, Address addr, QualType type,
Destroyer *destroyer, bool useEHCleanupForArray) {
- assert(!isInConditionalBranch() &&
- "performing lifetime extension from within conditional");
-
// Push an EH-only cleanup for the object now.
// FIXME: When popping normal cleanups, we need to keep this EH cleanup
// around in case a temporary's destructor throws an exception.
@@ -1791,9 +2054,12 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// Use better IR generation for certain implicit parameters.
if (auto IPD = dyn_cast<ImplicitParamDecl>(&D)) {
// The only implicit argument a block has is its literal.
- // We assume this is always passed directly.
+ // This may be passed as an inalloca'ed value on Windows x86.
if (BlockInfo) {
- setBlockContextParameter(IPD, ArgNo, Arg.getDirectValue());
+ llvm::Value *V = Arg.isIndirect()
+ ? Builder.CreateLoad(Arg.getIndirectAddress())
+ : Arg.getDirectValue();
+ setBlockContextParameter(IPD, ArgNo, V);
return;
}
}
@@ -1809,20 +2075,50 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
if (DeclPtr.getType() != IRTy)
DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName());
+ // Indirect argument is in alloca address space, which may be different
+ // from the default address space.
+ auto AllocaAS = CGM.getASTAllocaAddressSpace();
+ auto *V = DeclPtr.getPointer();
+ auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
+ auto DestLangAS =
+ getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
+ if (SrcLangAS != DestLangAS) {
+ assert(getContext().getTargetAddressSpace(SrcLangAS) ==
+ CGM.getDataLayout().getAllocaAddrSpace());
+ auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
+ auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS);
+ DeclPtr = Address(getTargetHooks().performAddrSpaceCast(
+ *this, V, SrcLangAS, DestLangAS, T, true),
+ DeclPtr.getAlignment());
+ }
// Push a destructor cleanup for this parameter if the ABI requires it.
// Don't push a cleanup in a thunk for a method that will also emit a
// cleanup.
- if (!IsScalar && !CurFuncIsThunk &&
- getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
- const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
- if (RD && RD->hasNonTrivialDestructor())
- pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
+ if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk &&
+ Ty->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
+ if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) {
+ assert((DtorKind == QualType::DK_cxx_destructor ||
+ DtorKind == QualType::DK_nontrivial_c_struct) &&
+ "unexpected destructor type");
+ pushDestroy(DtorKind, DeclPtr, Ty);
+ CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
+ EHStack.stable_begin();
+ }
}
} else {
- // Otherwise, create a temporary to hold the value.
- DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
- D.getName() + ".addr");
+ // Check if the parameter address is controlled by OpenMP runtime.
+ Address OpenMPLocalAddr =
+ getLangOpts().OpenMP
+ ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+ : Address::invalid();
+ if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
+ DeclPtr = OpenMPLocalAddr;
+ } else {
+ // Otherwise, create a temporary to hold the value.
+ DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
+ D.getName() + ".addr");
+ }
DoStore = true;
}
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 042997831702..5e237d7e0b69 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -53,7 +53,8 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
case TEK_Aggregate:
CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
return;
}
llvm_unreachable("bad evaluation kind");
@@ -79,6 +80,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
case QualType::DK_objc_strong_lifetime:
case QualType::DK_objc_weak_lifetime:
+ case QualType::DK_nontrivial_c_struct:
// We don't care about releasing objects during process teardown.
assert(!D.getTLSKind() && "should have rejected this");
return;
@@ -173,10 +175,12 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
ConstantAddress DeclAddr(DeclPtr, getContext().getDeclAlign(&D));
if (!T->isReferenceType()) {
- if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
+ if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+ D.hasAttr<OMPThreadPrivateDeclAttr>()) {
(void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition(
&D, DeclAddr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
PerformInit, this);
+ }
if (PerformInit)
EmitDeclInit(*this, D, DeclAddr);
if (CGM.isTypeConstant(D.getType(), true))
@@ -232,7 +236,10 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
llvm::Constant *addr) {
// Create a function which calls the destructor.
llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr);
+ registerGlobalDtorWithAtExit(dtorStub);
+}
+void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
// extern "C" int atexit(void (*f)(void));
llvm::FunctionType *atexitTy =
llvm::FunctionType::get(IntTy, dtorStub->getType(), false);
@@ -309,7 +316,7 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
Fn->setSection(Section);
}
- SetInternalFunctionAttributes(nullptr, Fn, FI);
+ SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
Fn->setCallingConv(getRuntimeCC());
@@ -328,6 +335,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::HWAddress, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
+ if (getLangOpts().Sanitize.has(SanitizerKind::KernelHWAddress) &&
+ !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc))
+ Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
+
if (getLangOpts().Sanitize.has(SanitizerKind::Thread) &&
!isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeThread);
@@ -340,6 +351,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SafeStack);
+ if (getLangOpts().Sanitize.has(SanitizerKind::ShadowCallStack) &&
+ !isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc))
+ Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
+
return Fn;
}
@@ -376,6 +391,10 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
D->hasAttr<CUDASharedAttr>()))
return;
+ if (getLangOpts().OpenMP &&
+ getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
+ return;
+
// Check if we've already initialized this decl.
auto I = DelayedCXXInitPosition.find(D);
if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 1ec084ff3f5b..c9820c242554 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -65,7 +65,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() {
if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
name = "__std_terminate";
else
- name = "\01?terminate@@YAXXZ";
+ name = "?terminate@@YAXXZ";
} else if (getLangOpts().ObjC1 &&
getLangOpts().ObjCRuntime.hasTerminate())
name = "objc_terminate";
@@ -111,21 +111,32 @@ const EHPersonality
EHPersonality::MSVC_C_specific_handler = { "__C_specific_handler", nullptr };
const EHPersonality
EHPersonality::MSVC_CxxFrameHandler3 = { "__CxxFrameHandler3", nullptr };
+const EHPersonality
+EHPersonality::GNU_Wasm_CPlusPlus = { "__gxx_wasm_personality_v0", nullptr };
-static const EHPersonality &getCPersonality(const llvm::Triple &T,
+static const EHPersonality &getCPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ const llvm::Triple &T = Target.getTriple();
+ if (T.isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
if (L.SjLjExceptions)
return EHPersonality::GNU_C_SJLJ;
+ if (L.DWARFExceptions)
+ return EHPersonality::GNU_C;
if (L.SEHExceptions)
return EHPersonality::GNU_C_SEH;
return EHPersonality::GNU_C;
}
-static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
+static const EHPersonality &getObjCPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ const llvm::Triple &T = Target.getTriple();
+ if (T.isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
+
switch (L.ObjCRuntime.getKind()) {
case ObjCRuntime::FragileMacOSX:
- return getCPersonality(T, L);
+ return getCPersonality(Target, L);
case ObjCRuntime::MacOSX:
case ObjCRuntime::iOS:
case ObjCRuntime::WatchOS:
@@ -145,24 +156,37 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
llvm_unreachable("bad runtime kind");
}
-static const EHPersonality &getCXXPersonality(const llvm::Triple &T,
+static const EHPersonality &getCXXPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ const llvm::Triple &T = Target.getTriple();
+ if (T.isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
if (L.SjLjExceptions)
return EHPersonality::GNU_CPlusPlus_SJLJ;
+ if (L.DWARFExceptions)
+ return EHPersonality::GNU_CPlusPlus;
if (L.SEHExceptions)
return EHPersonality::GNU_CPlusPlus_SEH;
+ // Wasm EH is a non-MVP feature for now.
+ if (Target.hasFeature("exception-handling") &&
+ (T.getArch() == llvm::Triple::wasm32 ||
+ T.getArch() == llvm::Triple::wasm64))
+ return EHPersonality::GNU_Wasm_CPlusPlus;
return EHPersonality::GNU_CPlusPlus;
}
/// Determines the personality function to use when both C++
/// and Objective-C exceptions are being caught.
-static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
+static const EHPersonality &getObjCXXPersonality(const TargetInfo &Target,
const LangOptions &L) {
+ if (Target.getTriple().isWindowsMSVCEnvironment())
+ return EHPersonality::MSVC_CxxFrameHandler3;
+
switch (L.ObjCRuntime.getKind()) {
// In the fragile ABI, just use C++ exception handling and hope
// they're not doing crazy exception mixing.
case ObjCRuntime::FragileMacOSX:
- return getCXXPersonality(T, L);
+ return getCXXPersonality(Target, L);
// The ObjC personality defers to the C++ personality for non-ObjC
// handlers. Unlike the C++ case, we use the same personality
@@ -170,7 +194,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
case ObjCRuntime::MacOSX:
case ObjCRuntime::iOS:
case ObjCRuntime::WatchOS:
- return getObjCPersonality(T, L);
+ return getObjCPersonality(Target, L);
case ObjCRuntime::GNUstep:
return EHPersonality::GNU_ObjCXX;
@@ -179,7 +203,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
// mixed EH. Use the ObjC personality just to avoid returning null.
case ObjCRuntime::GCC:
case ObjCRuntime::ObjFW:
- return getObjCPersonality(T, L);
+ return getObjCPersonality(Target, L);
}
llvm_unreachable("bad runtime kind");
}
@@ -194,30 +218,17 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
const FunctionDecl *FD) {
const llvm::Triple &T = CGM.getTarget().getTriple();
const LangOptions &L = CGM.getLangOpts();
+ const TargetInfo &Target = CGM.getTarget();
// Functions using SEH get an SEH personality.
if (FD && FD->usesSEHTry())
return getSEHPersonalityMSVC(T);
- // Try to pick a personality function that is compatible with MSVC if we're
- // not compiling Obj-C. Obj-C users better have an Obj-C runtime that supports
- // the GCC-style personality function.
- if (T.isWindowsMSVCEnvironment() && !L.ObjC1) {
- if (L.SjLjExceptions)
- return EHPersonality::GNU_CPlusPlus_SJLJ;
- if (L.DWARFExceptions)
- return EHPersonality::GNU_CPlusPlus;
- return EHPersonality::MSVC_CxxFrameHandler3;
- }
-
- if (L.CPlusPlus && L.ObjC1)
- return getObjCXXPersonality(T, L);
- else if (L.CPlusPlus)
- return getCXXPersonality(T, L);
- else if (L.ObjC1)
- return getObjCPersonality(T, L);
- else
- return getCPersonality(T, L);
+ if (L.ObjC1)
+ return L.CPlusPlus ? getObjCXXPersonality(Target, L)
+ : getObjCPersonality(Target, L);
+ return L.CPlusPlus ? getCXXPersonality(Target, L)
+ : getCPersonality(Target, L);
}
const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) {
@@ -313,8 +324,7 @@ void CodeGenModule::SimplifyPersonality() {
return;
const EHPersonality &ObjCXX = EHPersonality::get(*this, /*FD=*/nullptr);
- const EHPersonality &CXX =
- getCXXPersonality(getTarget().getTriple(), LangOpts);
+ const EHPersonality &CXX = getCXXPersonality(getTarget(), LangOpts);
if (&ObjCXX == &CXX)
return;
@@ -448,11 +458,9 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) {
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
- if (isNoexceptExceptionSpec(EST)) {
- if (Proto->getNoexceptSpec(getContext()) == FunctionProtoType::NR_Nothrow) {
- // noexcept functions are simple terminate scopes.
- EHStack.pushTerminate();
- }
+ if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) {
+ // noexcept functions are simple terminate scopes.
+ EHStack.pushTerminate();
} else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
// TODO: Revisit exception specifications for the MS ABI. There is a way to
// encode these in an object file but MSVC doesn't do anything with it.
@@ -527,10 +535,8 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
- if (isNoexceptExceptionSpec(EST)) {
- if (Proto->getNoexceptSpec(getContext()) == FunctionProtoType::NR_Nothrow) {
- EHStack.popTerminate();
- }
+ if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) {
+ EHStack.popTerminate();
} else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
// TODO: Revisit exception specifications for the MS ABI. There is a way to
// encode these in an object file but MSVC doesn't do anything with it.
@@ -584,7 +590,7 @@ void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
llvm::BasicBlock *
CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
if (EHPersonality::get(*this).usesFuncletPads())
- return getMSVCDispatchBlock(si);
+ return getFuncletEHDispatchBlock(si);
// The dispatch block for the end of the scope chain is a block that
// just resumes unwinding.
@@ -632,7 +638,7 @@ CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
}
llvm::BasicBlock *
-CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) {
+CodeGenFunction::getFuncletEHDispatchBlock(EHScopeStack::stable_iterator SI) {
// Returning nullptr indicates that the previous dispatch block should unwind
// to caller.
if (SI == EHStack.stable_end())
@@ -646,7 +652,7 @@ CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) {
return DispatchBlock;
if (EHS.getKind() == EHScope::Terminate)
- DispatchBlock = getTerminateHandler();
+ DispatchBlock = getTerminateFunclet();
else
DispatchBlock = createBasicBlock();
CGBuilderTy Builder(*this, DispatchBlock);
@@ -926,10 +932,121 @@ static void emitCatchPadBlock(CodeGenFunction &CGF, EHCatchScope &CatchScope) {
CGF.Builder.restoreIP(SavedIP);
}
+// Wasm uses Windows-style EH instructions, but it merges all catch clauses into
+// one big catchpad, within which we use Itanium's landingpad-style selector
+// comparison instructions.
+static void emitWasmCatchPadBlock(CodeGenFunction &CGF,
+ EHCatchScope &CatchScope) {
+ llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock();
+ assert(DispatchBlock);
+
+ CGBuilderTy::InsertPoint SavedIP = CGF.Builder.saveIP();
+ CGF.EmitBlockAfterUses(DispatchBlock);
+
+ llvm::Value *ParentPad = CGF.CurrentFuncletPad;
+ if (!ParentPad)
+ ParentPad = llvm::ConstantTokenNone::get(CGF.getLLVMContext());
+ llvm::BasicBlock *UnwindBB =
+ CGF.getEHDispatchBlock(CatchScope.getEnclosingEHScope());
+
+ unsigned NumHandlers = CatchScope.getNumHandlers();
+ llvm::CatchSwitchInst *CatchSwitch =
+ CGF.Builder.CreateCatchSwitch(ParentPad, UnwindBB, NumHandlers);
+
+ // We don't use a landingpad instruction, so generate intrinsic calls to
+ // provide exception and selector values.
+ llvm::BasicBlock *WasmCatchStartBlock = CGF.createBasicBlock("catch.start");
+ CatchSwitch->addHandler(WasmCatchStartBlock);
+ CGF.EmitBlockAfterUses(WasmCatchStartBlock);
+
+ // Create a catchpad instruction.
+ SmallVector<llvm::Value *, 4> CatchTypes;
+ for (unsigned I = 0, E = NumHandlers; I < E; ++I) {
+ const EHCatchScope::Handler &Handler = CatchScope.getHandler(I);
+ CatchTypeInfo TypeInfo = Handler.Type;
+ if (!TypeInfo.RTTI)
+ TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+ CatchTypes.push_back(TypeInfo.RTTI);
+ }
+ auto *CPI = CGF.Builder.CreateCatchPad(CatchSwitch, CatchTypes);
+
+ // Create calls to wasm.get.exception and wasm.get.ehselector intrinsics.
+ // Before they are lowered appropriately later, they provide values for the
+ // exception and selector.
+ llvm::Value *GetExnFn =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception);
+ llvm::Value *GetSelectorFn =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_ehselector);
+ llvm::CallInst *Exn = CGF.Builder.CreateCall(GetExnFn, CPI);
+ CGF.Builder.CreateStore(Exn, CGF.getExceptionSlot());
+ llvm::CallInst *Selector = CGF.Builder.CreateCall(GetSelectorFn, CPI);
+
+ llvm::Value *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for);
+
+ // If there's only a single catch-all, branch directly to its handler.
+ if (CatchScope.getNumHandlers() == 1 &&
+ CatchScope.getHandler(0).isCatchAll()) {
+ CGF.Builder.CreateBr(CatchScope.getHandler(0).Block);
+ CGF.Builder.restoreIP(SavedIP);
+ return;
+ }
+
+ // Test against each of the exception types we claim to catch.
+ for (unsigned I = 0, E = NumHandlers;; ++I) {
+ assert(I < E && "ran off end of handlers!");
+ const EHCatchScope::Handler &Handler = CatchScope.getHandler(I);
+ CatchTypeInfo TypeInfo = Handler.Type;
+ if (!TypeInfo.RTTI)
+ TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+
+ // Figure out the next block.
+ llvm::BasicBlock *NextBlock;
+
+ bool EmitNextBlock = false, NextIsEnd = false;
+
+ // If this is the last handler, we're at the end, and the next block is a
+ // block that contains a call to the rethrow function, so we can unwind to
+ // the enclosing EH scope. The call itself will be generated later.
+ if (I + 1 == E) {
+ NextBlock = CGF.createBasicBlock("rethrow");
+ EmitNextBlock = true;
+ NextIsEnd = true;
+
+ // If the next handler is a catch-all, we're at the end, and the
+ // next block is that handler.
+ } else if (CatchScope.getHandler(I + 1).isCatchAll()) {
+ NextBlock = CatchScope.getHandler(I + 1).Block;
+ NextIsEnd = true;
+
+ // Otherwise, we're not at the end and we need a new block.
+ } else {
+ NextBlock = CGF.createBasicBlock("catch.fallthrough");
+ EmitNextBlock = true;
+ }
+
+ // Figure out the catch type's index in the LSDA's type table.
+ llvm::CallInst *TypeIndex = CGF.Builder.CreateCall(TypeIDFn, TypeInfo.RTTI);
+ TypeIndex->setDoesNotThrow();
+
+ llvm::Value *MatchesTypeIndex =
+ CGF.Builder.CreateICmpEQ(Selector, TypeIndex, "matches");
+ CGF.Builder.CreateCondBr(MatchesTypeIndex, Handler.Block, NextBlock);
+
+ if (EmitNextBlock)
+ CGF.EmitBlock(NextBlock);
+ if (NextIsEnd)
+ break;
+ }
+
+ CGF.Builder.restoreIP(SavedIP);
+}
+
/// Emit the structure of the dispatch block for the given catch scope.
/// It is an invariant that the dispatch block already exists.
static void emitCatchDispatchBlock(CodeGenFunction &CGF,
EHCatchScope &catchScope) {
+ if (EHPersonality::get(CGF).isWasmPersonality())
+ return emitWasmCatchPadBlock(CGF, catchScope);
if (EHPersonality::get(CGF).usesFuncletPads())
return emitCatchPadBlock(CGF, catchScope);
@@ -1017,6 +1134,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
unsigned NumHandlers = S.getNumHandlers();
EHCatchScope &CatchScope = cast<EHCatchScope>(*EHStack.begin());
assert(CatchScope.getNumHandlers() == NumHandlers);
+ llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock();
// If the catch was not required, bail out now.
if (!CatchScope.hasEHBranches()) {
@@ -1049,6 +1167,22 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
doImplicitRethrow = isa<CXXDestructorDecl>(CurCodeDecl) ||
isa<CXXConstructorDecl>(CurCodeDecl);
+ // Wasm uses Windows-style EH instructions, but merges all catch clauses into
+ // one big catchpad. So we save the old funclet pad here before we traverse
+ // each catch handler.
+ SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+ CurrentFuncletPad);
+ llvm::BasicBlock *WasmCatchStartBlock = nullptr;
+ if (EHPersonality::get(*this).isWasmPersonality()) {
+ auto *CatchSwitch =
+ cast<llvm::CatchSwitchInst>(DispatchBlock->getFirstNonPHI());
+ WasmCatchStartBlock = CatchSwitch->hasUnwindDest()
+ ? CatchSwitch->getSuccessor(1)
+ : CatchSwitch->getSuccessor(0);
+ auto *CPI = cast<llvm::CatchPadInst>(WasmCatchStartBlock->getFirstNonPHI());
+ CurrentFuncletPad = CPI;
+ }
+
// Perversely, we emit the handlers backwards precisely because we
// want them to appear in source order. In all of these cases, the
// catch block will have exactly one predecessor, which will be a
@@ -1056,7 +1190,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// a catch-all, one of the dispatch blocks will branch to two
// different handlers, and EmitBlockAfterUses will cause the second
// handler to be moved before the first.
+ bool HasCatchAll = false;
for (unsigned I = NumHandlers; I != 0; --I) {
+ HasCatchAll |= Handlers[I - 1].isCatchAll();
llvm::BasicBlock *CatchBlock = Handlers[I-1].Block;
EmitBlockAfterUses(CatchBlock);
@@ -1101,6 +1237,27 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
Builder.CreateBr(ContBB);
}
+ // Because in wasm we merge all catch clauses into one big catchpad, in case
+ // none of the types in catch handlers matches after we test against each of
+ // them, we should unwind to the next EH enclosing scope. We generate a call
+ // to rethrow function here to do that.
+ if (EHPersonality::get(*this).isWasmPersonality() && !HasCatchAll) {
+ assert(WasmCatchStartBlock);
+ // Navigate for the "rethrow" block we created in emitWasmCatchPadBlock().
+ // Wasm uses landingpad-style conditional branches to compare selectors, so
+ // we follow the false destination for each of the cond branches to reach
+ // the rethrow block.
+ llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock;
+ while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) {
+ auto *BI = cast<llvm::BranchInst>(TI);
+ assert(BI->isConditional());
+ RethrowBlock = BI->getSuccessor(1);
+ }
+ assert(RethrowBlock != WasmCatchStartBlock && RethrowBlock->empty());
+ Builder.SetInsertPoint(RethrowBlock);
+ CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
+ }
+
EmitBlock(ContBB);
incrementProfileCounter(&S);
}
@@ -1334,23 +1491,59 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
if (TerminateHandler)
return TerminateHandler;
- CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
-
// Set up the terminate handler. This block is inserted at the very
// end of the function by FinishFunction.
TerminateHandler = createBasicBlock("terminate.handler");
+ CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
Builder.SetInsertPoint(TerminateHandler);
+
llvm::Value *Exn = nullptr;
+ if (getLangOpts().CPlusPlus)
+ Exn = getExceptionFromSlot();
+ llvm::CallInst *terminateCall =
+ CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
+ terminateCall->setDoesNotReturn();
+ Builder.CreateUnreachable();
+
+ // Restore the saved insertion state.
+ Builder.restoreIP(SavedIP);
+
+ return TerminateHandler;
+}
+
+llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() {
+ assert(EHPersonality::get(*this).usesFuncletPads() &&
+ "use getTerminateLandingPad for non-funclet EH");
+
+ llvm::BasicBlock *&TerminateFunclet = TerminateFunclets[CurrentFuncletPad];
+ if (TerminateFunclet)
+ return TerminateFunclet;
+
+ CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
+
+ // Set up the terminate handler. This block is inserted at the very
+ // end of the function by FinishFunction.
+ TerminateFunclet = createBasicBlock("terminate.handler");
+ Builder.SetInsertPoint(TerminateFunclet);
+
+ // Create the cleanuppad using the current parent pad as its token. Use 'none'
+ // if this is a top-level terminate scope, which is the common case.
SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
CurrentFuncletPad);
- if (EHPersonality::get(*this).usesFuncletPads()) {
- llvm::Value *ParentPad = CurrentFuncletPad;
- if (!ParentPad)
- ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
- CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
- } else {
- if (getLangOpts().CPlusPlus)
- Exn = getExceptionFromSlot();
+ llvm::Value *ParentPad = CurrentFuncletPad;
+ if (!ParentPad)
+ ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
+ CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
+
+ // Emit the __std_terminate call.
+ llvm::Value *Exn = nullptr;
+ // In case of wasm personality, we need to pass the exception value to
+ // __clang_call_terminate function.
+ if (getLangOpts().CPlusPlus &&
+ EHPersonality::get(*this).isWasmPersonality()) {
+ llvm::Value *GetExnFn =
+ CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception);
+ Exn = Builder.CreateCall(GetExnFn, CurrentFuncletPad);
}
llvm::CallInst *terminateCall =
CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
@@ -1360,7 +1553,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
// Restore the saved insertion state.
Builder.restoreIP(SavedIP);
- return TerminateHandler;
+ return TerminateFunclet;
}
llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index c7dc8337e19e..3097caacb31c 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -61,18 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
/// block.
+Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty,
+ CharUnits Align,
+ const Twine &Name,
+ llvm::Value *ArraySize) {
+ auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
+ Alloca->setAlignment(Align.getQuantity());
+ return Address(Alloca, Align);
+}
+
+/// CreateTempAlloca - This creates a alloca and inserts it into the entry
+/// block. The alloca is casted to default address space if necessary.
Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
const Twine &Name,
llvm::Value *ArraySize,
- bool CastToDefaultAddrSpace) {
- auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
- Alloca->setAlignment(Align.getQuantity());
- llvm::Value *V = Alloca;
+ Address *AllocaAddr) {
+ auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
+ if (AllocaAddr)
+ *AllocaAddr = Alloca;
+ llvm::Value *V = Alloca.getPointer();
// Alloca always returns a pointer in alloca address space, which may
// be different from the type defined by the language. For example,
// in C++ the auto variables are in the default address space. Therefore
// cast alloca to the default address space when necessary.
- if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
+ if (getASTAllocaAddressSpace() != LangAS::Default) {
auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
// When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
@@ -125,17 +137,26 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
}
Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
- bool CastToDefaultAddrSpace) {
+ Address *Alloca) {
// FIXME: Should we prefer the preferred type alignment here?
- return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name,
- CastToDefaultAddrSpace);
+ return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca);
}
Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
- const Twine &Name,
- bool CastToDefaultAddrSpace) {
- return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr,
- CastToDefaultAddrSpace);
+ const Twine &Name, Address *Alloca) {
+ return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name,
+ /*ArraySize=*/nullptr, Alloca);
+}
+
+Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty, CharUnits Align,
+ const Twine &Name) {
+ return CreateTempAllocaWithoutCast(ConvertTypeForMem(Ty), Align, Name);
+}
+
+Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty,
+ const Twine &Name) {
+ return CreateMemTempWithoutCast(Ty, getContext().getTypeAlignInChars(Ty),
+ Name);
}
/// EvaluateExprAsBool - Perform the usual unary conversions on the specified
@@ -187,7 +208,7 @@ RValue CodeGenFunction::EmitAnyExpr(const Expr *E,
llvm_unreachable("bad evaluation kind");
}
-/// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will
+/// EmitAnyExprToTemp - Similar to EmitAnyExpr(), however, the result will
/// always be accessible even if no aggregate location is provided.
RValue CodeGenFunction::EmitAnyExprToTemp(const Expr *E) {
AggValueSlot AggSlot = AggValueSlot::ignored();
@@ -214,7 +235,8 @@ void CodeGenFunction::EmitAnyExprToMem(const Expr *E,
EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals,
AggValueSlot::IsDestructed_t(IsInit),
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsAliased_t(!IsInit)));
+ AggValueSlot::IsAliased_t(!IsInit),
+ AggValueSlot::MayOverlap));
return;
}
@@ -347,7 +369,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
static Address createReferenceTemporary(CodeGenFunction &CGF,
const MaterializeTemporaryExpr *M,
- const Expr *Inner) {
+ const Expr *Inner,
+ Address *Alloca = nullptr) {
auto &TCG = CGF.getTargetHooks();
switch (M->getStorageDuration()) {
case SD_FullExpression:
@@ -380,7 +403,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF,
return Address(C, alignment);
}
}
- return CGF.CreateMemTemp(Ty, "ref.tmp");
+ return CGF.CreateMemTemp(Ty, "ref.tmp", Alloca);
}
case SD_Thread:
case SD_Static:
@@ -432,7 +455,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
E->getType().getQualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
break;
}
}
@@ -456,7 +480,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
}
// Create and initialize the reference temporary.
- Address Object = createReferenceTemporary(*this, M, E);
+ Address Alloca = Address::invalid();
+ Address Object = createReferenceTemporary(*this, M, E, &Alloca);
if (auto *Var = dyn_cast<llvm::GlobalVariable>(
Object.getPointer()->stripPointerCasts())) {
Object = Address(llvm::ConstantExpr::getBitCast(
@@ -475,13 +500,13 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
case SD_Automatic:
case SD_FullExpression:
if (auto *Size = EmitLifetimeStart(
- CGM.getDataLayout().getTypeAllocSize(Object.getElementType()),
- Object.getPointer())) {
+ CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()),
+ Alloca.getPointer())) {
if (M->getStorageDuration() == SD_Automatic)
pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
- Object, Size);
+ Alloca, Size);
else
- pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object,
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca,
Size);
}
break;
@@ -873,7 +898,7 @@ static llvm::Value *getArrayIndexingBound(
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
return CGF.Builder.getInt(CAT->getSize());
else if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
- return CGF.getVLASize(VAT).first;
+ return CGF.getVLASize(VAT).NumElts;
// Ignore pass_object_size here. It's not applicable on decayed pointers.
}
}
@@ -1034,8 +1059,12 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
// Derived-to-base conversions.
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
- Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo,
- TBAAInfo);
+ // TODO: Support accesses to members of base classes in TBAA. For now, we
+ // conservatively pretend that the complete object is of the base class
+ // type.
+ if (TBAAInfo)
+ *TBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+ Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo);
auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
return GetAddressOfBaseClass(Addr, Derived,
CE->path_begin(), CE->path_end(),
@@ -1785,7 +1814,7 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
return RValue::get(Vec);
}
-/// @brief Generates lvalue for partial ext_vector access.
+/// Generates lvalue for partial ext_vector access.
Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) {
Address VectorAddress = LV.getExtVectorAddress();
const VectorType *ExprVT = LV.getType()->getAs<VectorType>();
@@ -1807,7 +1836,7 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) {
return VectorBasePtrPlusIx;
}
-/// @brief Load of global gamed gegisters are always calls to intrinsics.
+/// Load of global gamed gegisters are always calls to intrinsics.
RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) {
assert((LV.getType()->isIntegerType() || LV.getType()->isPointerType()) &&
"Bad type for register variable");
@@ -2067,7 +2096,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
Dst.isVolatileQualified());
}
-/// @brief Store of global named registers are always calls to intrinsics.
+/// Store of global named registers are always calls to intrinsics.
void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) {
assert((Dst.getType()->isIntegerType() || Dst.getType()->isPointerType()) &&
"Bad type for register variable");
@@ -2206,6 +2235,22 @@ static LValue EmitThreadPrivateVarDeclLValue(
return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
}
+static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF,
+ const VarDecl *VD, QualType T) {
+ for (const auto *D : VD->redecls()) {
+ if (!VD->hasAttrs())
+ continue;
+ if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
+ if (Attr->getMapType() == OMPDeclareTargetDeclAttr::MT_Link) {
+ QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
+ Address Addr =
+ CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
+ }
+ }
+ return Address::invalid();
+}
+
Address
CodeGenFunction::EmitLoadOfReference(LValue RefLVal,
LValueBaseInfo *PointeeBaseInfo,
@@ -2255,6 +2300,13 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
CGF.CGM.getCXXABI().usesThreadWrapperFunction())
return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
+ // Check if the variable is marked as declare target with link clause in
+ // device codegen.
+ if (CGF.getLangOpts().OpenMPIsDevice) {
+ Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T);
+ if (Addr.isValid())
+ return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+ }
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
@@ -2263,9 +2315,11 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
Address Addr(V, Alignment);
// Emit reference to the private copy of the variable if it is an OpenMP
// threadprivate variable.
- if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
+ if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd &&
+ VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy,
E->getExprLoc());
+ }
LValue LV = VD->getType()->isReferenceType() ?
CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
AlignmentSource::Decl) :
@@ -2446,7 +2500,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// Check for OpenMP threadprivate variables.
- if (getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+ if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+ VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
return EmitThreadPrivateVarDeclLValue(
*this, VD, T, addr, getTypes().ConvertTypeForMem(VD->getType()),
E->getExprLoc());
@@ -2579,7 +2634,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
StringRef NameItems[] = {
PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
- if (auto *BD = dyn_cast<BlockDecl>(CurCodeDecl)) {
+ if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
std::string Name = SL->getString();
if (!Name.empty()) {
unsigned Discriminator =
@@ -2678,7 +2733,7 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
return Builder.CreatePtrToInt(V, TargetTy);
}
-/// \brief Emit a representation of a SourceLocation for passing to a handler
+/// Emit a representation of a SourceLocation for passing to a handler
/// in a sanitizer runtime library. The format for this data is:
/// \code
/// struct SourceLocation {
@@ -2737,7 +2792,7 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
}
namespace {
-/// \brief Specify under what conditions this check can be recovered
+/// Specify under what conditions this check can be recovered
enum class CheckRecoverableKind {
/// Always terminate program execution if this check fails.
Unrecoverable,
@@ -2945,6 +3000,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind);
llvm::CallInst *CheckCall;
+ llvm::Constant *SlowPathFn;
if (WithDiag) {
llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
auto *InfoPtr =
@@ -2953,20 +3009,20 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
- llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction(
+ SlowPathFn = CGM.getModule().getOrInsertFunction(
"__cfi_slowpath_diag",
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy},
false));
CheckCall = Builder.CreateCall(
- SlowPathDiagFn,
- {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
+ SlowPathFn, {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
} else {
- llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
+ SlowPathFn = CGM.getModule().getOrInsertFunction(
"__cfi_slowpath",
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false));
CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
}
+ CGM.setDSOLocal(cast<llvm::GlobalValue>(SlowPathFn->stripPointerCasts()));
CheckCall->setDoesNotThrow();
EmitBlock(Cont);
@@ -2980,6 +3036,7 @@ void CodeGenFunction::EmitCfiCheckStub() {
llvm::Function *F = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false),
llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M);
+ CGM.setDSOLocal(F);
llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F);
// FIXME: consider emitting an intrinsic call like
// call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2)
@@ -3018,6 +3075,11 @@ void CodeGenFunction::EmitCfiCheckFail() {
StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args,
SourceLocation());
+ // This function should not be affected by blacklist. This function does
+ // not have a source location, but "src:*" would still apply. Revert any
+ // changes to SanOpts made in StartFunction.
+ SanOpts = CGM.getLangOpts().Sanitize;
+
llvm::Value *Data =
EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, ArgData.getLocation());
@@ -3306,7 +3368,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
// The element count here is the total number of non-VLA elements.
- llvm::Value *numElements = getVLASize(vla).first;
+ llvm::Value *numElements = getVLASize(vla).NumElts;
// Effectively, the multiply by the VLA size is part of the GEP.
// GEP indexes are signed, and scaling an index isn't permitted to
@@ -3540,7 +3602,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, TBAAInfo,
BaseTy, VLA->getElementType(), IsLowerBound);
// The element count here is the total number of non-VLA elements.
- llvm::Value *NumElements = getVLASize(VLA).first;
+ llvm::Value *NumElements = getVLASize(VLA).NumElts;
// Effectively, the multiply by the VLA size is part of the GEP.
// GEP indexes are signed, and scaling an index isn't permitted to
@@ -3808,6 +3870,18 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
}
Address addr = base.getAddress();
+ if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) {
+ if (CGM.getCodeGenOpts().StrictVTablePointers &&
+ ClassDef->isDynamicClass()) {
+ // Getting to any field of dynamic object requires stripping dynamic
+ // information provided by invariant.group. This is because accessing
+ // fields may leak the real address of dynamic object, which could result
+ // in miscompilation when leaked pointer would be compared.
+ auto *stripped = Builder.CreateStripInvariantGroup(addr.getPointer());
+ addr = Address(stripped, addr.getAlignment());
+ }
+ }
+
unsigned RecordCVR = base.getVRQualifiers();
if (rec->isUnion()) {
// For unions, there is no pointer adjustment.
@@ -3816,7 +3890,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
hasAnyVptr(FieldType, getContext()))
// Because unions can easily skip invariant.barriers, we need to add
// a barrier every time CXXRecord field with vptr is referenced.
- addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()),
+ addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()),
addr.getAlignment());
} else {
// For structs, we GEP to the field that the record layout suggests.
@@ -4160,7 +4234,35 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) {
assert(OpaqueValueMappingData::shouldBindAsLValue(e));
- return getOpaqueLValueMapping(e);
+ return getOrCreateOpaqueLValueMapping(e);
+}
+
+LValue
+CodeGenFunction::getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e) {
+ assert(OpaqueValueMapping::shouldBindAsLValue(e));
+
+ llvm::DenseMap<const OpaqueValueExpr*,LValue>::iterator
+ it = OpaqueLValues.find(e);
+
+ if (it != OpaqueLValues.end())
+ return it->second;
+
+ assert(e->isUnique() && "LValue for a nonunique OVE hasn't been emitted");
+ return EmitLValue(e->getSourceExpr());
+}
+
+RValue
+CodeGenFunction::getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e) {
+ assert(!OpaqueValueMapping::shouldBindAsLValue(e));
+
+ llvm::DenseMap<const OpaqueValueExpr*,RValue>::iterator
+ it = OpaqueRValues.find(e);
+
+ if (it != OpaqueRValues.end())
+ return it->second;
+
+ assert(e->isUnique() && "RValue for a nonunique OVE hasn't been emitted");
+ return EmitAnyExpr(e->getSourceExpr());
}
RValue CodeGenFunction::EmitRValueForField(LValue LV,
@@ -4476,8 +4578,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
CalleeType = getContext().getCanonicalType(CalleeType);
- const auto *FnType =
- cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
+ auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType();
CGCallee Callee = OrigCallee;
@@ -4486,8 +4587,12 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
if (llvm::Constant *PrefixSig =
CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
SanitizerScope SanScope(this);
+ // Remove any (C++17) exception specifications, to allow calling e.g. a
+ // noexcept function through a non-noexcept pointer.
+ auto ProtoTy =
+ getContext().getFunctionTypeWithExceptionSpec(PointeeType, EST_None);
llvm::Constant *FTRTTIConst =
- CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true);
+ CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty};
llvm::StructType *PrefixStructTy = llvm::StructType::get(
CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true);
@@ -4527,6 +4632,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
}
}
+ const auto *FnType = cast<FunctionType>(PointeeType);
+
// If we are checking indirect calls and this call is indirect, check that the
// function pointer is a member of the bit set for the function type.
if (SanOpts.has(SanitizerKind::CFIICall) &&
@@ -4707,6 +4814,12 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
// If this semantic expression is an opaque value, bind it
// to the result of its source expression.
if (const auto *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
+ // Skip unique OVEs.
+ if (ov->isUnique()) {
+ assert(ov != resultExpr &&
+ "A unique OVE cannot be used as the result expression");
+ continue;
+ }
// If this is the result expression, we may need to evaluate
// directly into the slot.
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 0f05cab66d7e..291740478329 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "CodeGenFunction.h"
+#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CodeGenModule.h"
+#include "ConstantEmitter.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclTemplate.h"
@@ -22,6 +24,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
using namespace clang;
using namespace CodeGen;
@@ -36,23 +39,6 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
AggValueSlot Dest;
bool IsResultUnused;
- /// We want to use 'dest' as the return slot except under two
- /// conditions:
- /// - The destination slot requires garbage collection, so we
- /// need to use the GC API.
- /// - The destination slot is potentially aliased.
- bool shouldUseDestForReturnSlot() const {
- return !(Dest.requiresGCollection() || Dest.isPotentiallyAliased());
- }
-
- ReturnValueSlot getReturnValueSlot() const {
- if (!shouldUseDestForReturnSlot())
- return ReturnValueSlot();
-
- return ReturnValueSlot(Dest.getAddress(), Dest.isVolatile(),
- IsResultUnused);
- }
-
AggValueSlot EnsureSlot(QualType T) {
if (!Dest.isIgnored()) return Dest;
return CGF.CreateAggTemp(T, "agg.tmp.ensured");
@@ -62,6 +48,15 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
Dest = CGF.CreateAggTemp(T, "agg.tmp.ensured");
}
+ // Calls `Fn` with a valid return value slot, potentially creating a temporary
+ // to do so. If a temporary is created, an appropriate copy into `Dest` will
+ // be emitted, as will lifetime markers.
+ //
+ // The given function should take a ReturnValueSlot, and return an RValue that
+ // points to said slot.
+ void withReturnValueSlot(const Expr *E,
+ llvm::function_ref<RValue(ReturnValueSlot)> Fn);
+
public:
AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused)
: CGF(cgf), Builder(CGF.Builder), Dest(Dest),
@@ -76,8 +71,15 @@ public:
/// then loads the result into DestPtr.
void EmitAggLoadOfLValue(const Expr *E);
+ enum ExprValueKind {
+ EVK_RValue,
+ EVK_NonRValue
+ };
+
/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
- void EmitFinalDestCopy(QualType type, const LValue &src);
+ /// SrcIsRValue is true if source comes from an RValue.
+ void EmitFinalDestCopy(QualType type, const LValue &src,
+ ExprValueKind SrcValueKind = EVK_NonRValue);
void EmitFinalDestCopy(QualType type, RValue src);
void EmitCopy(QualType type, const AggValueSlot &dest,
const AggValueSlot &src);
@@ -85,7 +87,7 @@ public:
void EmitMoveFromReturnSlot(const Expr *E, RValue Src);
void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
- QualType elementType, InitListExpr *E);
+ QualType ArrayQTy, InitListExpr *E);
AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T))
@@ -144,6 +146,7 @@ public:
void VisitPointerToDataMemberBinaryOperator(const BinaryOperator *BO);
void VisitBinAssign(const BinaryOperator *E);
void VisitBinComma(const BinaryOperator *E);
+ void VisitBinCmp(const BinaryOperator *E);
void VisitObjCMessageExpr(ObjCMessageExpr *E);
void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
@@ -217,7 +220,7 @@ void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
EmitFinalDestCopy(E->getType(), LV);
}
-/// \brief True if the given aggregate type requires special GC API calls.
+/// True if the given aggregate type requires special GC API calls.
bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
// Only record types have members that might require garbage collection.
const RecordType *RecordTy = T->getAs<RecordType>();
@@ -234,38 +237,78 @@ bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
return Record->hasObjectMember();
}
-/// \brief Perform the final move to DestPtr if for some reason
-/// getReturnValueSlot() didn't use it directly.
-///
-/// The idea is that you do something like this:
-/// RValue Result = EmitSomething(..., getReturnValueSlot());
-/// EmitMoveFromReturnSlot(E, Result);
-///
-/// If nothing interferes, this will cause the result to be emitted
-/// directly into the return value slot. Otherwise, a final move
-/// will be performed.
-void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue src) {
- if (shouldUseDestForReturnSlot()) {
- // Logically, Dest.getAddr() should equal Src.getAggregateAddr().
- // The possibility of undef rvalues complicates that a lot,
- // though, so we can't really assert.
- return;
+void AggExprEmitter::withReturnValueSlot(
+ const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) {
+ QualType RetTy = E->getType();
+ bool RequiresDestruction =
+ Dest.isIgnored() &&
+ RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct;
+
+ // If it makes no observable difference, save a memcpy + temporary.
+ //
+ // We need to always provide our own temporary if destruction is required.
+ // Otherwise, EmitCall will emit its own, notice that it's "unused", and end
+ // its lifetime before we have the chance to emit a proper destructor call.
+ bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
+ (RequiresDestruction && !Dest.getAddress().isValid());
+
+ Address RetAddr = Address::invalid();
+ Address RetAllocaAddr = Address::invalid();
+
+ EHScopeStack::stable_iterator LifetimeEndBlock;
+ llvm::Value *LifetimeSizePtr = nullptr;
+ llvm::IntrinsicInst *LifetimeStartInst = nullptr;
+ if (!UseTemp) {
+ RetAddr = Dest.getAddress();
+ } else {
+ RetAddr = CGF.CreateMemTemp(RetTy, "tmp", &RetAllocaAddr);
+ uint64_t Size =
+ CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy));
+ LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAllocaAddr.getPointer());
+ if (LifetimeSizePtr) {
+ LifetimeStartInst =
+ cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint()));
+ assert(LifetimeStartInst->getIntrinsicID() ==
+ llvm::Intrinsic::lifetime_start &&
+ "Last insertion wasn't a lifetime.start?");
+
+ CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
+ NormalEHLifetimeMarker, RetAllocaAddr, LifetimeSizePtr);
+ LifetimeEndBlock = CGF.EHStack.stable_begin();
+ }
}
- // Otherwise, copy from there to the destination.
- assert(Dest.getPointer() != src.getAggregatePointer());
- EmitFinalDestCopy(E->getType(), src);
+ RValue Src =
+ EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused));
+
+ if (RequiresDestruction)
+ CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy);
+
+ if (!UseTemp)
+ return;
+
+ assert(Dest.getPointer() != Src.getAggregatePointer());
+ EmitFinalDestCopy(E->getType(), Src);
+
+ if (!RequiresDestruction && LifetimeStartInst) {
+ // If there's no dtor to run, the copy was the last use of our temporary.
+ // Since we're not guaranteed to be in an ExprWithCleanups, clean up
+ // eagerly.
+ CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst);
+ CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAllocaAddr.getPointer());
+ }
}
/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src) {
assert(src.isAggregate() && "value must be aggregate value!");
LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddress(), type);
- EmitFinalDestCopy(type, srcLV);
+ EmitFinalDestCopy(type, srcLV, EVK_RValue);
}
/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
-void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
+void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src,
+ ExprValueKind SrcValueKind) {
// If Dest is ignored, then we're evaluating an aggregate expression
// in a context that doesn't care about the result. Note that loads
// from volatile l-values force the existence of a non-ignored
@@ -273,9 +316,32 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
if (Dest.isIgnored())
return;
+ // Copy non-trivial C structs here.
+ LValue DstLV = CGF.MakeAddrLValue(
+ Dest.getAddress(), Dest.isVolatile() ? type.withVolatile() : type);
+
+ if (SrcValueKind == EVK_RValue) {
+ if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct) {
+ if (Dest.isPotentiallyAliased())
+ CGF.callCStructMoveAssignmentOperator(DstLV, src);
+ else
+ CGF.callCStructMoveConstructor(DstLV, src);
+ return;
+ }
+ } else {
+ if (type.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+ if (Dest.isPotentiallyAliased())
+ CGF.callCStructCopyAssignmentOperator(DstLV, src);
+ else
+ CGF.callCStructCopyConstructor(DstLV, src);
+ return;
+ }
+ }
+
AggValueSlot srcAgg =
AggValueSlot::forLValue(src, AggValueSlot::IsDestructed,
- needsGC(type), AggValueSlot::IsAliased);
+ needsGC(type), AggValueSlot::IsAliased,
+ AggValueSlot::MayOverlap);
EmitCopy(type, Dest, srcAgg);
}
@@ -286,7 +352,7 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
const AggValueSlot &src) {
if (dest.requiresGCollection()) {
- CharUnits sz = CGF.getContext().getTypeSizeInChars(type);
+ CharUnits sz = dest.getPreferredSize(CGF.getContext(), type);
llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity());
CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF,
dest.getAddress(),
@@ -298,11 +364,13 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
// If the result of the assignment is used, copy the LHS there also.
// It's volatile if either side is. Use the minimum alignment of
// the two sides.
- CGF.EmitAggregateCopy(dest.getAddress(), src.getAddress(), type,
+ LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type);
+ LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type);
+ CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(),
dest.isVolatile() || src.isVolatile());
}
-/// \brief Emit the initializer for a std::initializer_list initialized with a
+/// Emit the initializer for a std::initializer_list initialized with a
/// real initializer list.
void
AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
@@ -367,7 +435,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
}
}
-/// \brief Determine if E is a trivial array filler, that is, one that is
+/// Determine if E is a trivial array filler, that is, one that is
/// equivalent to zero-initialization.
static bool isTrivialFiller(Expr *E) {
if (!E)
@@ -390,14 +458,17 @@ static bool isTrivialFiller(Expr *E) {
return false;
}
-/// \brief Emit initialization of an array from an initializer list.
+/// Emit initialization of an array from an initializer list.
void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
- QualType elementType, InitListExpr *E) {
+ QualType ArrayQTy, InitListExpr *E) {
uint64_t NumInitElements = E->getNumInits();
uint64_t NumArrayElements = AType->getNumElements();
assert(NumInitElements <= NumArrayElements);
+ QualType elementType =
+ CGF.getContext().getAsArrayType(ArrayQTy)->getElementType();
+
// DestPtr is an array*. Construct an elementType* by drilling
// down a level.
llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0);
@@ -409,6 +480,29 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
CharUnits elementAlign =
DestPtr.getAlignment().alignmentOfArrayElement(elementSize);
+ // Consider initializing the array by copying from a global. For this to be
+ // more efficient than per-element initialization, the size of the elements
+ // with explicit initializers should be large enough.
+ if (NumInitElements * elementSize.getQuantity() > 16 &&
+ elementType.isTriviallyCopyableType(CGF.getContext())) {
+ CodeGen::CodeGenModule &CGM = CGF.CGM;
+ ConstantEmitter Emitter(CGM);
+ LangAS AS = ArrayQTy.getAddressSpace();
+ if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) {
+ auto GV = new llvm::GlobalVariable(
+ CGM.getModule(), C->getType(),
+ CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true),
+ llvm::GlobalValue::PrivateLinkage, C, "constinit",
+ /* InsertBefore= */ nullptr, llvm::GlobalVariable::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(AS));
+ Emitter.finalize(GV);
+ CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy);
+ GV->setAlignment(Align.getQuantity());
+ EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align));
+ return;
+ }
+ }
+
// Exception safety requires us to destroy all the
// already-constructed members if an initializer throws.
// For that, we'll need an EH cleanup.
@@ -540,7 +634,11 @@ void AggExprEmitter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E){
}
void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) {
- EmitFinalDestCopy(e->getType(), CGF.getOpaqueLValueMapping(e));
+ // If this is a unique OVE, just visit its source expression.
+ if (e->isUnique())
+ Visit(e->getSourceExpr());
+ else
+ EmitFinalDestCopy(e->getType(), CGF.getOrCreateOpaqueLValueMapping(e));
}
void
@@ -586,12 +684,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
CGF.EmitDynamicCast(LV.getAddress(), cast<CXXDynamicCastExpr>(E));
else
CGF.CGM.ErrorUnsupported(E, "non-simple lvalue dynamic_cast");
-
+
if (!Dest.isIgnored())
CGF.CGM.ErrorUnsupported(E, "lvalue dynamic_cast with a destination");
break;
}
-
+
case CK_ToUnion: {
// Evaluate even if the destination is ignored.
if (Dest.isIgnored()) {
@@ -651,7 +749,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
if (isToAtomic) {
AggValueSlot valueDest = Dest;
if (!valueDest.isIgnored() && CGF.CGM.isPaddedAtomicType(atomicType)) {
- // Zero-initialize. (Strictly speaking, we only need to intialize
+ // Zero-initialize. (Strictly speaking, we only need to initialize
// the padding at the end, but this is simpler.)
if (!Dest.isZeroed())
CGF.EmitNullInitialization(Dest.getAddress(), atomicType);
@@ -665,6 +763,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
valueDest.isExternallyDestructed(),
valueDest.requiresGCollection(),
valueDest.isPotentiallyAliased(),
+ AggValueSlot::DoesNotOverlap,
AggValueSlot::IsZeroed);
}
@@ -762,13 +861,15 @@ void AggExprEmitter::VisitCallExpr(const CallExpr *E) {
return;
}
- RValue RV = CGF.EmitCallExpr(E, getReturnValueSlot());
- EmitMoveFromReturnSlot(E, RV);
+ withReturnValueSlot(E, [&](ReturnValueSlot Slot) {
+ return CGF.EmitCallExpr(E, Slot);
+ });
}
void AggExprEmitter::VisitObjCMessageExpr(ObjCMessageExpr *E) {
- RValue RV = CGF.EmitObjCMessageExpr(E, getReturnValueSlot());
- EmitMoveFromReturnSlot(E, RV);
+ withReturnValueSlot(E, [&](ReturnValueSlot Slot) {
+ return CGF.EmitObjCMessageExpr(E, Slot);
+ });
}
void AggExprEmitter::VisitBinComma(const BinaryOperator *E) {
@@ -781,6 +882,150 @@ void AggExprEmitter::VisitStmtExpr(const StmtExpr *E) {
CGF.EmitCompoundStmt(*E->getSubStmt(), true, Dest);
}
+enum CompareKind {
+ CK_Less,
+ CK_Greater,
+ CK_Equal,
+};
+
+static llvm::Value *EmitCompare(CGBuilderTy &Builder, CodeGenFunction &CGF,
+ const BinaryOperator *E, llvm::Value *LHS,
+ llvm::Value *RHS, CompareKind Kind,
+ const char *NameSuffix = "") {
+ QualType ArgTy = E->getLHS()->getType();
+ if (const ComplexType *CT = ArgTy->getAs<ComplexType>())
+ ArgTy = CT->getElementType();
+
+ if (const auto *MPT = ArgTy->getAs<MemberPointerType>()) {
+ assert(Kind == CK_Equal &&
+ "member pointers may only be compared for equality");
+ return CGF.CGM.getCXXABI().EmitMemberPointerComparison(
+ CGF, LHS, RHS, MPT, /*IsInequality*/ false);
+ }
+
+ // Compute the comparison instructions for the specified comparison kind.
+ struct CmpInstInfo {
+ const char *Name;
+ llvm::CmpInst::Predicate FCmp;
+ llvm::CmpInst::Predicate SCmp;
+ llvm::CmpInst::Predicate UCmp;
+ };
+ CmpInstInfo InstInfo = [&]() -> CmpInstInfo {
+ using FI = llvm::FCmpInst;
+ using II = llvm::ICmpInst;
+ switch (Kind) {
+ case CK_Less:
+ return {"cmp.lt", FI::FCMP_OLT, II::ICMP_SLT, II::ICMP_ULT};
+ case CK_Greater:
+ return {"cmp.gt", FI::FCMP_OGT, II::ICMP_SGT, II::ICMP_UGT};
+ case CK_Equal:
+ return {"cmp.eq", FI::FCMP_OEQ, II::ICMP_EQ, II::ICMP_EQ};
+ }
+ llvm_unreachable("Unrecognised CompareKind enum");
+ }();
+
+ if (ArgTy->hasFloatingRepresentation())
+ return Builder.CreateFCmp(InstInfo.FCmp, LHS, RHS,
+ llvm::Twine(InstInfo.Name) + NameSuffix);
+ if (ArgTy->isIntegralOrEnumerationType() || ArgTy->isPointerType()) {
+ auto Inst =
+ ArgTy->hasSignedIntegerRepresentation() ? InstInfo.SCmp : InstInfo.UCmp;
+ return Builder.CreateICmp(Inst, LHS, RHS,
+ llvm::Twine(InstInfo.Name) + NameSuffix);
+ }
+
+ llvm_unreachable("unsupported aggregate binary expression should have "
+ "already been handled");
+}
+
+void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) {
+ using llvm::BasicBlock;
+ using llvm::PHINode;
+ using llvm::Value;
+ assert(CGF.getContext().hasSameType(E->getLHS()->getType(),
+ E->getRHS()->getType()));
+ const ComparisonCategoryInfo &CmpInfo =
+ CGF.getContext().CompCategories.getInfoForType(E->getType());
+ assert(CmpInfo.Record->isTriviallyCopyable() &&
+ "cannot copy non-trivially copyable aggregate");
+
+ QualType ArgTy = E->getLHS()->getType();
+
+ // TODO: Handle comparing these types.
+ if (ArgTy->isVectorType())
+ return CGF.ErrorUnsupported(
+ E, "aggregate three-way comparison with vector arguments");
+ if (!ArgTy->isIntegralOrEnumerationType() && !ArgTy->isRealFloatingType() &&
+ !ArgTy->isNullPtrType() && !ArgTy->isPointerType() &&
+ !ArgTy->isMemberPointerType() && !ArgTy->isAnyComplexType()) {
+ return CGF.ErrorUnsupported(E, "aggregate three-way comparison");
+ }
+ bool IsComplex = ArgTy->isAnyComplexType();
+
+ // Evaluate the operands to the expression and extract their values.
+ auto EmitOperand = [&](Expr *E) -> std::pair<Value *, Value *> {
+ RValue RV = CGF.EmitAnyExpr(E);
+ if (RV.isScalar())
+ return {RV.getScalarVal(), nullptr};
+ if (RV.isAggregate())
+ return {RV.getAggregatePointer(), nullptr};
+ assert(RV.isComplex());
+ return RV.getComplexVal();
+ };
+ auto LHSValues = EmitOperand(E->getLHS()),
+ RHSValues = EmitOperand(E->getRHS());
+
+ auto EmitCmp = [&](CompareKind K) {
+ Value *Cmp = EmitCompare(Builder, CGF, E, LHSValues.first, RHSValues.first,
+ K, IsComplex ? ".r" : "");
+ if (!IsComplex)
+ return Cmp;
+ assert(K == CompareKind::CK_Equal);
+ Value *CmpImag = EmitCompare(Builder, CGF, E, LHSValues.second,
+ RHSValues.second, K, ".i");
+ return Builder.CreateAnd(Cmp, CmpImag, "and.eq");
+ };
+ auto EmitCmpRes = [&](const ComparisonCategoryInfo::ValueInfo *VInfo) {
+ return Builder.getInt(VInfo->getIntValue());
+ };
+
+ Value *Select;
+ if (ArgTy->isNullPtrType()) {
+ Select = EmitCmpRes(CmpInfo.getEqualOrEquiv());
+ } else if (CmpInfo.isEquality()) {
+ Select = Builder.CreateSelect(
+ EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()),
+ EmitCmpRes(CmpInfo.getNonequalOrNonequiv()), "sel.eq");
+ } else if (!CmpInfo.isPartial()) {
+ Value *SelectOne =
+ Builder.CreateSelect(EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()),
+ EmitCmpRes(CmpInfo.getGreater()), "sel.lt");
+ Select = Builder.CreateSelect(EmitCmp(CK_Equal),
+ EmitCmpRes(CmpInfo.getEqualOrEquiv()),
+ SelectOne, "sel.eq");
+ } else {
+ Value *SelectEq = Builder.CreateSelect(
+ EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()),
+ EmitCmpRes(CmpInfo.getUnordered()), "sel.eq");
+ Value *SelectGT = Builder.CreateSelect(EmitCmp(CK_Greater),
+ EmitCmpRes(CmpInfo.getGreater()),
+ SelectEq, "sel.gt");
+ Select = Builder.CreateSelect(
+ EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()), SelectGT, "sel.lt");
+ }
+ // Create the return value in the destination slot.
+ EnsureDest(E->getType());
+ LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
+
+ // Emit the address of the first (and only) field in the comparison category
+ // type, and initialize it from the constant integer value selected above.
+ LValue FieldLV = CGF.EmitLValueForFieldInitialization(
+ DestLV, *CmpInfo.Record->field_begin());
+ CGF.EmitStoreThroughLValue(RValue::get(Select), FieldLV, /*IsInit*/ true);
+
+ // All done! The result is in the Dest slot.
+}
+
void AggExprEmitter::VisitBinaryOperator(const BinaryOperator *E) {
if (E->getOpcode() == BO_PtrMemD || E->getOpcode() == BO_PtrMemI)
VisitPointerToDataMemberBinaryOperator(E);
@@ -890,7 +1135,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
EmitCopy(E->getLHS()->getType(),
AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
needsGC(E->getLHS()->getType()),
- AggValueSlot::IsAliased),
+ AggValueSlot::IsAliased,
+ AggValueSlot::MayOverlap),
Dest);
return;
}
@@ -911,7 +1157,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
AggValueSlot LHSSlot =
AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
needsGC(E->getLHS()->getType()),
- AggValueSlot::IsAliased);
+ AggValueSlot::IsAliased,
+ AggValueSlot::MayOverlap);
// A non-volatile aggregate destination might have volatile member.
if (!LHSSlot.isVolatile() &&
CGF.hasVolatileMember(E->getLHS()->getType()))
@@ -1089,6 +1336,7 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) {
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased,
+ AggValueSlot::MayOverlap,
Dest.isZeroed()));
return;
case TEK_Scalar:
@@ -1156,11 +1404,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Handle initialization of an array.
if (E->getType()->isArrayType()) {
- QualType elementType =
- CGF.getContext().getAsArrayType(E->getType())->getElementType();
-
auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType());
- EmitArrayInit(Dest.getAddress(), AType, elementType, E);
+ EmitArrayInit(Dest.getAddress(), AType, E->getType(), E);
return;
}
@@ -1190,11 +1435,12 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
Address V = CGF.GetAddressOfDirectBaseInCompleteClass(
Dest.getAddress(), CXXRD, BaseRD,
/*isBaseVirtual*/ false);
- AggValueSlot AggSlot =
- AggValueSlot::forAddr(V, Qualifiers(),
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot AggSlot = AggValueSlot::forAddr(
+ V, Qualifiers(),
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual()));
CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot);
if (QualType::DestructionKind dtorKind =
@@ -1375,7 +1621,9 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
// If the subexpression is an ArrayInitLoopExpr, share its cleanup.
auto elementSlot = AggValueSlot::forLValue(
elementLV, AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased);
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
AggExprEmitter(CGF, elementSlot, false)
.VisitArrayInitLoopExpr(InnerLoop, outerBegin);
} else
@@ -1425,6 +1673,8 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
// If this is an initlist expr, sum up the size of sizes of the (present)
// elements. If this is something weird, assume the whole thing is non-zero.
const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
+ while (ILE && ILE->isTransparent())
+ ILE = dyn_cast<InitListExpr>(ILE->getInit(0));
if (!ILE || !CGF.getTypes().isZeroInitializable(ILE->getType()))
return CGF.getContext().getTypeSizeInChars(E->getType());
@@ -1491,7 +1741,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
}
// If the type is 16-bytes or smaller, prefer individual stores over memset.
- CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType());
+ CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType());
if (Size <= CharUnits::fromQuantity(16))
return;
@@ -1537,16 +1787,42 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
LValue LV = MakeAddrLValue(Temp, E->getType());
EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
return LV;
}
-void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
- Address SrcPtr, QualType Ty,
- bool isVolatile,
- bool isAssignment) {
+AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit(
+ const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) {
+ // Virtual bases are initialized first, in address order, so there's never
+ // any overlap during their initialization.
+ //
+ // FIXME: Under P0840, this is no longer true: the tail padding of a vbase
+ // of a field could be reused by a vbase of a containing class.
+ if (IsVirtual)
+ return AggValueSlot::DoesNotOverlap;
+
+ // If the base class is laid out entirely within the nvsize of the derived
+ // class, its tail padding cannot yet be initialized, so we can issue
+ // stores at the full width of the base class.
+ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+ if (Layout.getBaseClassOffset(BaseRD) +
+ getContext().getASTRecordLayout(BaseRD).getSize() <=
+ Layout.getNonVirtualSize())
+ return AggValueSlot::DoesNotOverlap;
+
+ // The tail padding may contain values we need to preserve.
+ return AggValueSlot::MayOverlap;
+}
+
+void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
+ AggValueSlot::Overlap_t MayOverlap,
+ bool isVolatile) {
assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");
+ Address DestPtr = Dest.getAddress();
+ Address SrcPtr = Src.getAddress();
+
if (getLangOpts().CPlusPlus) {
if (const RecordType *RT = Ty->getAs<RecordType>()) {
CXXRecordDecl *Record = cast<CXXRecordDecl>(RT->getDecl());
@@ -1562,7 +1838,7 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
return;
}
}
-
+
// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first
@@ -1574,12 +1850,11 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
// implementation handles this case safely. If there is a libc that does not
// safely handle this, we can add a target hook.
- // Get data size info for this aggregate. If this is an assignment,
- // don't copy the tail padding, because we might be assigning into a
- // base subobject where the tail padding is claimed. Otherwise,
- // copying it is fine.
+ // Get data size info for this aggregate. Don't copy the tail padding if this
+ // might be a potentially-overlapping subobject, since the tail padding might
+ // be occupied by a different object. Otherwise, copying it is fine.
std::pair<CharUnits, CharUnits> TypeInfo;
- if (isAssignment)
+ if (MayOverlap)
TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
else
TypeInfo = getContext().getTypeInfoInChars(Ty);
@@ -1591,22 +1866,11 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
getContext().getAsArrayType(Ty))) {
QualType BaseEltTy;
SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr);
- TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy);
- std::pair<CharUnits, CharUnits> LastElementTypeInfo;
- if (!isAssignment)
- LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
+ TypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
assert(!TypeInfo.first.isZero());
SizeVal = Builder.CreateNUWMul(
SizeVal,
llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
- if (!isAssignment) {
- SizeVal = Builder.CreateNUWSub(
- SizeVal,
- llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
- SizeVal = Builder.CreateNUWAdd(
- SizeVal, llvm::ConstantInt::get(
- SizeTy, LastElementTypeInfo.first.getQuantity()));
- }
}
}
if (!SizeVal) {
@@ -1657,4 +1921,10 @@ void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
// the optimizer wishes to expand it in to scalar memory operations.
if (llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty))
Inst->setMetadata(llvm::LLVMContext::MD_tbaa_struct, TBAAStructTag);
+
+ if (CGM.getCodeGenOpts().NewStructPathTBAA) {
+ TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForMemoryTransfer(
+ Dest.getTBAAInfo(), Src.getTBAAInfo());
+ CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
+ }
}
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index c32f1e5415da..8955d8a4a83c 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -242,11 +242,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
}
}
- Address This = Address::invalid();
- if (IsArrow)
- This = EmitPointerWithAlignment(Base);
- else
- This = EmitLValue(Base).getAddress();
+ LValue This;
+ if (IsArrow) {
+ LValueBaseInfo BaseInfo;
+ TBAAAccessInfo TBAAInfo;
+ Address ThisValue = EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo);
+ This = MakeAddrLValue(ThisValue, Base->getType(), BaseInfo, TBAAInfo);
+ } else {
+ This = EmitLValue(Base);
+ }
if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) {
@@ -261,10 +265,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
// when it isn't necessary; just produce the proper effect here.
LValue RHS = isa<CXXOperatorCallExpr>(CE)
? MakeNaturalAlignAddrLValue(
- (*RtlArgs)[0].RV.getScalarVal(),
+ (*RtlArgs)[0].getRValue(*this).getScalarVal(),
(*(CE->arg_begin() + 1))->getType())
: EmitLValue(*CE->arg_begin());
- EmitAggregateAssign(This, RHS.getAddress(), CE->getType());
+ EmitAggregateAssign(This, RHS, CE->getType());
return RValue::get(This.getPointer());
}
@@ -272,8 +276,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) {
// Trivial move and copy ctor are the same.
assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
- Address RHS = EmitLValue(*CE->arg_begin()).getAddress();
- EmitAggregateCopy(This, RHS, (*CE->arg_begin())->getType());
+ const Expr *Arg = *CE->arg_begin();
+ LValue RHS = EmitLValue(Arg);
+ LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType());
+ // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's
+ // constructing a new complete object of type Ctor.
+ EmitAggregateCopy(Dest, RHS, Arg->getType(),
+ AggValueSlot::DoesNotOverlap);
return RValue::get(This.getPointer());
}
llvm_unreachable("unknown trivial member function");
@@ -335,7 +344,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
assert(ReturnValue.isNull() && "Destructor shouldn't have return value");
if (UseVirtualCall) {
CGM.getCXXABI().EmitVirtualDestructorCall(
- *this, Dtor, Dtor_Complete, This, cast<CXXMemberCallExpr>(CE));
+ *this, Dtor, Dtor_Complete, This.getAddress(),
+ cast<CXXMemberCallExpr>(CE));
} else {
CGCallee Callee;
if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -364,15 +374,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
Ctor);
} else if (UseVirtualCall) {
- Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty,
- CE->getLocStart());
+ Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty);
} else {
if (SanOpts.has(SanitizerKind::CFINVCall) &&
MD->getParent()->isDynamicClass()) {
llvm::Value *VTable;
const CXXRecordDecl *RD;
std::tie(VTable, RD) =
- CGM.getCXXABI().LoadVTablePtr(*this, This, MD->getParent());
+ CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(),
+ MD->getParent());
EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart());
}
@@ -388,8 +398,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
}
if (MD->isVirtual()) {
- This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
- *this, CalleeDecl, This, UseVirtualCall);
+ Address NewThisAddr =
+ CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
+ *this, CalleeDecl, This.getAddress(), UseVirtualCall);
+ This.setAddress(NewThisAddr);
}
return EmitCXXMemberOrOperatorCall(
@@ -622,7 +634,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
// Call the constructor.
EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating,
- Dest.getAddress(), E);
+ Dest.getAddress(), E, Dest.mayOverlap());
}
}
@@ -924,7 +936,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF,
}
static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
- QualType AllocType, Address NewPtr) {
+ QualType AllocType, Address NewPtr,
+ AggValueSlot::Overlap_t MayOverlap) {
// FIXME: Refactor with EmitExprAsInit.
switch (CGF.getEvaluationKind(AllocType)) {
case TEK_Scalar:
@@ -940,7 +953,8 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
= AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ MayOverlap);
CGF.EmitAggExpr(Init, Slot);
return;
}
@@ -1009,7 +1023,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
AggValueSlot::forAddr(CurPtr, ElementType.getQualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
EmitAggExpr(ILE->getInit(0), Slot);
// Move past these elements.
@@ -1074,7 +1089,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
// an array, and we have an array filler, we can fold together the two
// initialization loops.
StoreAnyExprIntoOneUnit(*this, ILE->getInit(i),
- ILE->getInit(i)->getType(), CurPtr);
+ ILE->getInit(i)->getType(), CurPtr,
+ AggValueSlot::DoesNotOverlap);
CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(),
Builder.getSize(1),
"array.exp.next"),
@@ -1227,7 +1243,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
}
// Emit the initializer into this element.
- StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr);
+ StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr,
+ AggValueSlot::DoesNotOverlap);
// Leave the Cleanup if we entered one.
if (CleanupDominator) {
@@ -1258,7 +1275,8 @@ static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E,
CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements,
AllocSizeWithoutCookie);
else if (const Expr *Init = E->getInitializer())
- StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr);
+ StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr,
+ AggValueSlot::DoesNotOverlap);
}
/// Emit a call to an operator new or operator delete function, as implicitly
@@ -1298,19 +1316,19 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
}
RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
- const Expr *Arg,
+ const CallExpr *TheCall,
bool IsDelete) {
CallArgList Args;
- const Stmt *ArgS = Arg;
- EmitCallArgs(Args, *Type->param_type_begin(), llvm::makeArrayRef(ArgS));
+ EmitCallArgs(Args, Type->getParamTypes(), TheCall->arguments());
// Find the allocation or deallocation function that we're calling.
ASTContext &Ctx = getContext();
DeclarationName Name = Ctx.DeclarationNames
.getCXXOperatorName(IsDelete ? OO_Delete : OO_New);
+
for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name))
if (auto *FD = dyn_cast<FunctionDecl>(Decl))
if (Ctx.hasSameType(FD->getType(), QualType(Type, 0)))
- return EmitNewDeleteCall(*this, cast<FunctionDecl>(Decl), Type, Args);
+ return EmitNewDeleteCall(*this, FD, Type, Args);
llvm_unreachable("predeclared global operator new/delete is missing");
}
@@ -1481,7 +1499,7 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
AllocAlign);
for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
auto &Arg = NewArgs[I + NumNonPlacementArgs];
- Cleanup->setPlacementArg(I, Arg.RV, Arg.Ty);
+ Cleanup->setPlacementArg(I, Arg.getRValue(CGF), Arg.Ty);
}
return;
@@ -1512,8 +1530,8 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
AllocAlign);
for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
auto &Arg = NewArgs[I + NumNonPlacementArgs];
- Cleanup->setPlacementArg(I, DominatingValue<RValue>::save(CGF, Arg.RV),
- Arg.Ty);
+ Cleanup->setPlacementArg(
+ I, DominatingValue<RValue>::save(CGF, Arg.getRValue(CGF)), Arg.Ty);
}
CGF.initFullExprCleanup();
@@ -1678,13 +1696,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
llvm::Type *elementTy = ConvertTypeForMem(allocType);
Address result = Builder.CreateElementBitCast(allocation, elementTy);
- // Passing pointer through invariant.group.barrier to avoid propagation of
+ // Passing pointer through launder.invariant.group to avoid propagation of
// vptrs information which may be included in previous type.
// To not break LTO with different optimizations levels, we do it regardless
// of optimization level.
if (CGM.getCodeGenOpts().StrictVTablePointers &&
allocator->isReservedGlobalPlacementOperator())
- result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()),
+ result = Address(Builder.CreateLaunderInvariantGroup(result.getPointer()),
result.getAlignment());
EmitNewInitializer(*this, E, allocType, elementTy, result, numElements,
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 9094d3f8a91c..fb176093a741 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -155,8 +155,9 @@ public:
}
ComplexPairTy VisitOpaqueValueExpr(OpaqueValueExpr *E) {
if (E->isGLValue())
- return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc());
- return CGF.getOpaqueRValueMapping(E).getComplexVal();
+ return EmitLoadOfLValue(CGF.getOrCreateOpaqueLValueMapping(E),
+ E->getExprLoc());
+ return CGF.getOrCreateOpaqueRValueMapping(E).getComplexVal();
}
ComplexPairTy VisitPseudoObjectExpr(PseudoObjectExpr *E) {
@@ -594,7 +595,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) {
return ComplexPairTy(ResR, ResI);
}
-/// \brief Emit a libcall for a binary operation on complex types.
+/// Emit a libcall for a binary operation on complex types.
ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName,
const BinOpInfo &Op) {
CallArgList Args;
@@ -628,11 +629,11 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName,
llvm::Instruction *Call;
RValue Res = CGF.EmitCall(FuncInfo, Callee, ReturnValueSlot(), Args, &Call);
- cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getBuiltinCC());
+ cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getRuntimeCC());
return Res.getComplexVal();
}
-/// \brief Lookup the libcall name for a given floating point type complex
+/// Lookup the libcall name for a given floating point type complex
/// multiply.
static StringRef getComplexMultiplyLibCallName(llvm::Type *Ty) {
switch (Ty->getTypeID()) {
@@ -1055,7 +1056,7 @@ ComplexPairTy ComplexExprEmitter::VisitInitListExpr(InitListExpr *E) {
return Visit(E->getInit(0));
}
- // Empty init list intializes to null
+ // Empty init list initializes to null
assert(E->getNumInits() == 0 && "Unexpected number of inits");
QualType Ty = E->getType()->castAs<ComplexType>()->getElementType();
llvm::Type* LTy = CGF.ConvertType(Ty);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index d1b9e13a6f93..cfd0b859233a 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -635,6 +635,72 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM,
return ConstantAddress(GV, Align);
}
+static llvm::Constant *
+EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType,
+ llvm::Type *CommonElementType, unsigned ArrayBound,
+ SmallVectorImpl<llvm::Constant *> &Elements,
+ llvm::Constant *Filler) {
+ // Figure out how long the initial prefix of non-zero elements is.
+ unsigned NonzeroLength = ArrayBound;
+ if (Elements.size() < NonzeroLength && Filler->isNullValue())
+ NonzeroLength = Elements.size();
+ if (NonzeroLength == Elements.size()) {
+ while (NonzeroLength > 0 && Elements[NonzeroLength - 1]->isNullValue())
+ --NonzeroLength;
+ }
+
+ if (NonzeroLength == 0) {
+ return llvm::ConstantAggregateZero::get(
+ CGM.getTypes().ConvertType(QualType(DestType, 0)));
+ }
+
+ // Add a zeroinitializer array filler if we have lots of trailing zeroes.
+ unsigned TrailingZeroes = ArrayBound - NonzeroLength;
+ if (TrailingZeroes >= 8) {
+ assert(Elements.size() >= NonzeroLength &&
+ "missing initializer for non-zero element");
+
+ // If all the elements had the same type up to the trailing zeroes, emit a
+ // struct of two arrays (the nonzero data and the zeroinitializer).
+ if (CommonElementType && NonzeroLength >= 8) {
+ llvm::Constant *Initial = llvm::ConstantArray::get(
+ llvm::ArrayType::get(CommonElementType, NonzeroLength),
+ makeArrayRef(Elements).take_front(NonzeroLength));
+ Elements.resize(2);
+ Elements[0] = Initial;
+ } else {
+ Elements.resize(NonzeroLength + 1);
+ }
+
+ auto *FillerType =
+ CommonElementType
+ ? CommonElementType
+ : CGM.getTypes().ConvertType(DestType->getElementType());
+ FillerType = llvm::ArrayType::get(FillerType, TrailingZeroes);
+ Elements.back() = llvm::ConstantAggregateZero::get(FillerType);
+ CommonElementType = nullptr;
+ } else if (Elements.size() != ArrayBound) {
+ // Otherwise pad to the right size with the filler if necessary.
+ Elements.resize(ArrayBound, Filler);
+ if (Filler->getType() != CommonElementType)
+ CommonElementType = nullptr;
+ }
+
+ // If all elements have the same type, just emit an array constant.
+ if (CommonElementType)
+ return llvm::ConstantArray::get(
+ llvm::ArrayType::get(CommonElementType, ArrayBound), Elements);
+
+ // We have mixed types. Use a packed struct.
+ llvm::SmallVector<llvm::Type *, 16> Types;
+ Types.reserve(Elements.size());
+ for (llvm::Constant *Elt : Elements)
+ Types.push_back(Elt->getType());
+ llvm::StructType *SType =
+ llvm::StructType::get(CGM.getLLVMContext(), Types, true);
+ return llvm::ConstantStruct::get(SType, Elements);
+}
+
/// This class only needs to handle two cases:
/// 1) Literals (this is used by APValue emission to emit literals).
/// 2) Arrays, structs and unions (outside C++11 mode, we don't currently
@@ -832,60 +898,47 @@ public:
}
llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) {
- llvm::ArrayType *AType =
- cast<llvm::ArrayType>(ConvertType(ILE->getType()));
- llvm::Type *ElemTy = AType->getElementType();
+ auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType());
+ assert(CAT && "can't emit array init for non-constant-bound array");
unsigned NumInitElements = ILE->getNumInits();
- unsigned NumElements = AType->getNumElements();
+ unsigned NumElements = CAT->getSize().getZExtValue();
// Initialising an array requires us to automatically
// initialise any elements that have not been initialised explicitly
unsigned NumInitableElts = std::min(NumInitElements, NumElements);
- QualType EltType = CGM.getContext().getAsArrayType(T)->getElementType();
+ QualType EltType = CAT->getElementType();
// Initialize remaining array elements.
- llvm::Constant *fillC;
- if (Expr *filler = ILE->getArrayFiller())
+ llvm::Constant *fillC = nullptr;
+ if (Expr *filler = ILE->getArrayFiller()) {
fillC = Emitter.tryEmitAbstractForMemory(filler, EltType);
- else
- fillC = Emitter.emitNullForMemory(EltType);
- if (!fillC)
- return nullptr;
-
- // Try to use a ConstantAggregateZero if we can.
- if (fillC->isNullValue() && !NumInitableElts)
- return llvm::ConstantAggregateZero::get(AType);
+ if (!fillC)
+ return nullptr;
+ }
// Copy initializer elements.
SmallVector<llvm::Constant*, 16> Elts;
- Elts.reserve(NumInitableElts + NumElements);
+ if (fillC && fillC->isNullValue())
+ Elts.reserve(NumInitableElts + 1);
+ else
+ Elts.reserve(NumElements);
- bool RewriteType = false;
+ llvm::Type *CommonElementType = nullptr;
for (unsigned i = 0; i < NumInitableElts; ++i) {
Expr *Init = ILE->getInit(i);
llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
if (!C)
return nullptr;
- RewriteType |= (C->getType() != ElemTy);
+ if (i == 0)
+ CommonElementType = C->getType();
+ else if (C->getType() != CommonElementType)
+ CommonElementType = nullptr;
Elts.push_back(C);
}
- RewriteType |= (fillC->getType() != ElemTy);
- Elts.resize(NumElements, fillC);
-
- if (RewriteType) {
- // FIXME: Try to avoid packing the array
- std::vector<llvm::Type*> Types;
- Types.reserve(NumInitableElts + NumElements);
- for (unsigned i = 0, e = Elts.size(); i < e; ++i)
- Types.push_back(Elts[i]->getType());
- llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
- Types, true);
- return llvm::ConstantStruct::get(SType, Elts);
- }
-
- return llvm::ConstantArray::get(AType, Elts);
+ return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts,
+ fillC);
}
llvm::Constant *EmitRecordInitialization(InitListExpr *ILE, QualType T) {
@@ -1881,40 +1934,31 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
case APValue::Union:
return ConstStructBuilder::BuildStruct(*this, Value, DestType);
case APValue::Array: {
- const ArrayType *CAT = CGM.getContext().getAsArrayType(DestType);
+ const ConstantArrayType *CAT =
+ CGM.getContext().getAsConstantArrayType(DestType);
unsigned NumElements = Value.getArraySize();
unsigned NumInitElts = Value.getArrayInitializedElts();
// Emit array filler, if there is one.
llvm::Constant *Filler = nullptr;
- if (Value.hasArrayFiller())
+ if (Value.hasArrayFiller()) {
Filler = tryEmitAbstractForMemory(Value.getArrayFiller(),
CAT->getElementType());
-
- // Emit initializer elements.
- llvm::Type *CommonElementType =
- CGM.getTypes().ConvertType(CAT->getElementType());
-
- // Try to use a ConstantAggregateZero if we can.
- if (Filler && Filler->isNullValue() && !NumInitElts) {
- llvm::ArrayType *AType =
- llvm::ArrayType::get(CommonElementType, NumElements);
- return llvm::ConstantAggregateZero::get(AType);
+ if (!Filler)
+ return nullptr;
}
+ // Emit initializer elements.
SmallVector<llvm::Constant*, 16> Elts;
- Elts.reserve(NumElements);
- for (unsigned I = 0; I < NumElements; ++I) {
- llvm::Constant *C = Filler;
- if (I < NumInitElts) {
- C = tryEmitPrivateForMemory(Value.getArrayInitializedElt(I),
- CAT->getElementType());
- } else if (!Filler) {
- assert(Value.hasArrayFiller() &&
- "Missing filler for implicit elements of initializer");
- C = tryEmitPrivateForMemory(Value.getArrayFiller(),
- CAT->getElementType());
- }
+ if (Filler && Filler->isNullValue())
+ Elts.reserve(NumInitElts + 1);
+ else
+ Elts.reserve(NumElements);
+
+ llvm::Type *CommonElementType = nullptr;
+ for (unsigned I = 0; I < NumInitElts; ++I) {
+ llvm::Constant *C = tryEmitPrivateForMemory(
+ Value.getArrayInitializedElt(I), CAT->getElementType());
if (!C) return nullptr;
if (I == 0)
@@ -1924,20 +1968,8 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
Elts.push_back(C);
}
- if (!CommonElementType) {
- // FIXME: Try to avoid packing the array
- std::vector<llvm::Type*> Types;
- Types.reserve(NumElements);
- for (unsigned i = 0, e = Elts.size(); i < e; ++i)
- Types.push_back(Elts[i]->getType());
- llvm::StructType *SType =
- llvm::StructType::get(CGM.getLLVMContext(), Types, true);
- return llvm::ConstantStruct::get(SType, Elts);
- }
-
- llvm::ArrayType *AType =
- llvm::ArrayType::get(CommonElementType, NumElements);
- return llvm::ConstantArray::get(AType, Elts);
+ return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts,
+ Filler);
}
case APValue::MemberPointer:
return CGM.getCXXABI().EmitMemberPointer(Value, DestType);
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index c46215067a68..783f74c5026d 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -165,7 +165,7 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
// If a unary op has a widened operand, the op cannot overflow.
if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
- return IsWidenedIntegerOp(Ctx, UO->getSubExpr());
+ return !UO->canOverflow();
// We usually don't need overflow checks for binops with widened operands.
// Multiplication with promoted unsigned operands is a special case.
@@ -387,6 +387,9 @@ public:
Value *VisitIntegerLiteral(const IntegerLiteral *E) {
return Builder.getInt(E->getValue());
}
+ Value *VisitFixedPointLiteral(const FixedPointLiteral *E) {
+ return Builder.getInt(E->getValue());
+ }
Value *VisitFloatingLiteral(const FloatingLiteral *E) {
return llvm::ConstantFP::get(VMContext, E->getValue());
}
@@ -422,10 +425,11 @@ public:
Value *VisitOpaqueValueExpr(OpaqueValueExpr *E) {
if (E->isGLValue())
- return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc());
+ return EmitLoadOfLValue(CGF.getOrCreateOpaqueLValueMapping(E),
+ E->getExprLoc());
// Otherwise, assume the mapping is the scalar directly.
- return CGF.getOpaqueRValueMapping(E).getScalarVal();
+ return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal();
}
Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant,
@@ -1144,7 +1148,7 @@ Value *ScalarExprEmitter::EmitNullValue(QualType Ty) {
return CGF.EmitFromMemory(CGF.CGM.EmitNullConstant(Ty), Ty);
}
-/// \brief Emit a sanitization check for the given "binary" operation (which
+/// Emit a sanitization check for the given "binary" operation (which
/// might actually be a unary increment which has been lowered to a binary
/// operation). The check passes if all values in \p Checks (which are \c i1),
/// are \c true.
@@ -1617,6 +1621,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CE->getLocStart());
}
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+ const QualType SrcType = E->getType();
+
+ if (SrcType.mayBeNotDynamicClass() && DestTy.mayBeDynamicClass()) {
+ // Casting to pointer that could carry dynamic information (provided by
+ // invariant.group) requires launder.
+ Src = Builder.CreateLaunderInvariantGroup(Src);
+ } else if (SrcType.mayBeDynamicClass() && DestTy.mayBeNotDynamicClass()) {
+ // Casting to pointer that does not carry dynamic information (provided
+ // by invariant.group) requires stripping it. Note that we don't do it
+ // if the source could not be dynamic type and destination could be
+ // dynamic because dynamic information is already laundered. It is
+ // because launder(strip(src)) == launder(src), so there is no need to
+ // add extra strip before launder.
+ Src = Builder.CreateStripInvariantGroup(Src);
+ }
+ }
+
return Builder.CreateBitCast(Src, DstTy);
}
case CK_AddressSpaceConversion: {
@@ -1753,12 +1775,31 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
llvm::Value* IntResult =
Builder.CreateIntCast(Src, MiddleTy, InputSigned, "conv");
- return Builder.CreateIntToPtr(IntResult, DestLLVMTy);
+ auto *IntToPtr = Builder.CreateIntToPtr(IntResult, DestLLVMTy);
+
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+ // Going from integer to pointer that could be dynamic requires reloading
+ // dynamic information from invariant.group.
+ if (DestTy.mayBeDynamicClass())
+ IntToPtr = Builder.CreateLaunderInvariantGroup(IntToPtr);
+ }
+ return IntToPtr;
}
- case CK_PointerToIntegral:
+ case CK_PointerToIntegral: {
assert(!DestTy->isBooleanType() && "bool should use PointerToBool");
- return Builder.CreatePtrToInt(Visit(E), ConvertType(DestTy));
+ auto *PtrExpr = Visit(E);
+
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+ const QualType SrcType = E->getType();
+
+ // Casting to integer requires stripping dynamic information as it does
+ // not carries it.
+ if (SrcType.mayBeDynamicClass())
+ PtrExpr = Builder.CreateStripInvariantGroup(PtrExpr);
+ }
+ return Builder.CreatePtrToInt(PtrExpr, ConvertType(DestTy));
+ }
case CK_ToVoid: {
CGF.EmitIgnoredExpr(E);
return nullptr;
@@ -1873,7 +1914,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
return Builder.CreateNSWAdd(InVal, Amount, Name);
// Fall through.
case LangOptions::SOB_Trapping:
- if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr()))
+ if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
}
@@ -1955,11 +1996,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
} else if (type->isIntegerType()) {
// Note that signed integer inc/dec with width less than int can't
// overflow because of promotion rules; we're just eliding a few steps here.
- bool CanOverflow = value->getType()->getIntegerBitWidth() >=
- CGF.IntTy->getIntegerBitWidth();
- if (CanOverflow && type->isSignedIntegerOrEnumerationType()) {
+ if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) {
value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
- } else if (CanOverflow && type->isUnsignedIntegerType() &&
+ } else if (E->canOverflow() && type->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
value =
EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc));
@@ -1975,7 +2014,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
// VLA types don't have constant size.
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(type)) {
- llvm::Value *numElts = CGF.getVLASize(vla).first;
+ llvm::Value *numElts = CGF.getVLASize(vla).NumElts;
if (!isInc) numElts = Builder.CreateNSWNeg(numElts, "vla.negsize");
if (CGF.getLangOpts().isSignedOverflowDefined())
value = Builder.CreateGEP(value, numElts, "vla.inc");
@@ -2273,16 +2312,13 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
CGF.EmitIgnoredExpr(E->getArgumentExpr());
}
- QualType eltType;
- llvm::Value *numElts;
- std::tie(numElts, eltType) = CGF.getVLASize(VAT);
-
- llvm::Value *size = numElts;
+ auto VlaSize = CGF.getVLASize(VAT);
+ llvm::Value *size = VlaSize.NumElts;
// Scale the number of non-VLA elements by the non-VLA element size.
- CharUnits eltSize = CGF.getContext().getTypeSizeInChars(eltType);
+ CharUnits eltSize = CGF.getContext().getTypeSizeInChars(VlaSize.Type);
if (!eltSize.isOne())
- size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), numElts);
+ size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), size);
return size;
}
@@ -2769,7 +2805,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(elementType)) {
// The element count here is the total number of non-VLA elements.
- llvm::Value *numElements = CGF.getVLASize(vla).first;
+ llvm::Value *numElements = CGF.getVLASize(vla).NumElts;
// Effectively, the multiply by the VLA size is part of the GEP.
// GEP indexes are signed, and scaling an index isn't permitted to
@@ -2964,10 +3000,9 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
// For a variable-length array, this is going to be non-constant.
if (const VariableArrayType *vla
= CGF.getContext().getAsVariableArrayType(elementType)) {
- llvm::Value *numElements;
- std::tie(numElements, elementType) = CGF.getVLASize(vla);
-
- divisor = numElements;
+ auto VlaSize = CGF.getVLASize(vla);
+ elementType = VlaSize.Type;
+ divisor = VlaSize.NumElts;
// Scale the number of non-VLA elements by the non-VLA element size.
CharUnits eltSize = CGF.getContext().getTypeSizeInChars(elementType);
@@ -3243,6 +3278,23 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,
Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp");
} else {
// Unsigned integers and pointers.
+
+ if (CGF.CGM.getCodeGenOpts().StrictVTablePointers &&
+ !isa<llvm::ConstantPointerNull>(LHS) &&
+ !isa<llvm::ConstantPointerNull>(RHS)) {
+
+ // Dynamic information is required to be stripped for comparisons,
+ // because it could leak the dynamic information. Based on comparisons
+ // of pointers to dynamic objects, the optimizer can replace one pointer
+ // with another, which might be incorrect in presence of invariant
+ // groups. Comparison with null is safe because null does not carry any
+ // dynamic information.
+ if (LHSTy.mayBeDynamicClass())
+ LHS = Builder.CreateStripInvariantGroup(LHS);
+ if (RHSTy.mayBeDynamicClass())
+ RHS = Builder.CreateStripInvariantGroup(RHS);
+ }
+
Result = Builder.CreateICmp(UICmpOpc, LHS, RHS, "cmp");
}
@@ -3433,6 +3485,12 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
// Insert an entry into the phi node for the edge with the value of RHSCond.
PN->addIncoming(RHSCond, RHSBlock);
+ // Artificial location to preserve the scope information
+ {
+ auto NL = ApplyDebugLocation::CreateArtificial(CGF);
+ PN->setDebugLoc(Builder.getCurrentDebugLocation());
+ }
+
// ZExt result to int.
return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
}
diff --git a/lib/CodeGen/CGGPUBuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp
index 48156b1b26b7..b5375ffb8db7 100644
--- a/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/lib/CodeGen/CGGPUBuiltin.cpp
@@ -83,8 +83,9 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
/* ParamsToSkip = */ 0);
// We don't know how to emit non-scalar varargs.
- if (std::any_of(Args.begin() + 1, Args.end(),
- [](const CallArg &A) { return !A.RV.isScalar(); })) {
+ if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) {
+ return !A.getRValue(*this).isScalar();
+ })) {
CGM.ErrorUnsupported(E, "non-scalar arg to printf");
return RValue::get(llvm::ConstantInt::get(IntTy, 0));
}
@@ -97,7 +98,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
} else {
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
- ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
+ ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType());
// Using llvm::StructType is correct only because printf doesn't accept
// aggregates. If we had to handle aggregates here, we'd have to manually
@@ -109,7 +110,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
- llvm::Value *Arg = Args[I].RV.getScalarVal();
+ llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal();
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
}
BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
@@ -117,6 +118,6 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
// Invoke vprintf and return.
llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
- return RValue::get(
- Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr}));
+ return RValue::get(Builder.CreateCall(
+ VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
}
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index 15608c105dc7..9d5f23ff9a2a 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -32,62 +32,62 @@ class Attr;
class ASTContext;
namespace CodeGen {
-/// \brief Attributes that may be specified on loops.
+/// Attributes that may be specified on loops.
struct LoopAttributes {
explicit LoopAttributes(bool IsParallel = false);
void clear();
- /// \brief Generate llvm.loop.parallel metadata for loads and stores.
+ /// Generate llvm.loop.parallel metadata for loads and stores.
bool IsParallel;
- /// \brief State of loop vectorization or unrolling.
+ /// State of loop vectorization or unrolling.
enum LVEnableState { Unspecified, Enable, Disable, Full };
- /// \brief Value for llvm.loop.vectorize.enable metadata.
+ /// Value for llvm.loop.vectorize.enable metadata.
LVEnableState VectorizeEnable;
- /// \brief Value for llvm.loop.unroll.* metadata (enable, disable, or full).
+ /// Value for llvm.loop.unroll.* metadata (enable, disable, or full).
LVEnableState UnrollEnable;
- /// \brief Value for llvm.loop.vectorize.width metadata.
+ /// Value for llvm.loop.vectorize.width metadata.
unsigned VectorizeWidth;
- /// \brief Value for llvm.loop.interleave.count metadata.
+ /// Value for llvm.loop.interleave.count metadata.
unsigned InterleaveCount;
- /// \brief llvm.unroll.
+ /// llvm.unroll.
unsigned UnrollCount;
- /// \brief Value for llvm.loop.distribute.enable metadata.
+ /// Value for llvm.loop.distribute.enable metadata.
LVEnableState DistributeEnable;
};
-/// \brief Information used when generating a structured loop.
+/// Information used when generating a structured loop.
class LoopInfo {
public:
- /// \brief Construct a new LoopInfo for the loop with entry Header.
+ /// Construct a new LoopInfo for the loop with entry Header.
LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc);
- /// \brief Get the loop id metadata for this loop.
+ /// Get the loop id metadata for this loop.
llvm::MDNode *getLoopID() const { return LoopID; }
- /// \brief Get the header block of this loop.
+ /// Get the header block of this loop.
llvm::BasicBlock *getHeader() const { return Header; }
- /// \brief Get the set of attributes active for this loop.
+ /// Get the set of attributes active for this loop.
const LoopAttributes &getAttributes() const { return Attrs; }
private:
- /// \brief Loop ID metadata.
+ /// Loop ID metadata.
llvm::MDNode *LoopID;
- /// \brief Header block of this loop.
+ /// Header block of this loop.
llvm::BasicBlock *Header;
- /// \brief The attributes for this loop.
+ /// The attributes for this loop.
LoopAttributes Attrs;
};
-/// \brief A stack of loop information corresponding to loop nesting levels.
+/// A stack of loop information corresponding to loop nesting levels.
/// This stack can be used to prepare attributes which are applied when a loop
/// is emitted.
class LoopInfoStack {
@@ -97,70 +97,70 @@ class LoopInfoStack {
public:
LoopInfoStack() {}
- /// \brief Begin a new structured loop. The set of staged attributes will be
+ /// Begin a new structured loop. The set of staged attributes will be
/// applied to the loop and then cleared.
void push(llvm::BasicBlock *Header, const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc);
- /// \brief Begin a new structured loop. Stage attributes from the Attrs list.
+ /// Begin a new structured loop. Stage attributes from the Attrs list.
/// The staged attributes are applied to the loop and then cleared.
void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx,
llvm::ArrayRef<const Attr *> Attrs, const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc);
- /// \brief End the current loop.
+ /// End the current loop.
void pop();
- /// \brief Return the top loop id metadata.
+ /// Return the top loop id metadata.
llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); }
- /// \brief Return true if the top loop is parallel.
+ /// Return true if the top loop is parallel.
bool getCurLoopParallel() const {
return hasInfo() ? getInfo().getAttributes().IsParallel : false;
}
- /// \brief Function called by the CodeGenFunction when an instruction is
+ /// Function called by the CodeGenFunction when an instruction is
/// created.
void InsertHelper(llvm::Instruction *I) const;
- /// \brief Set the next pushed loop as parallel.
+ /// Set the next pushed loop as parallel.
void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; }
- /// \brief Set the next pushed loop 'vectorize.enable'
+ /// Set the next pushed loop 'vectorize.enable'
void setVectorizeEnable(bool Enable = true) {
StagedAttrs.VectorizeEnable =
Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
}
- /// \brief Set the next pushed loop as a distribution candidate.
+ /// Set the next pushed loop as a distribution candidate.
void setDistributeState(bool Enable = true) {
StagedAttrs.DistributeEnable =
Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
}
- /// \brief Set the next pushed loop unroll state.
+ /// Set the next pushed loop unroll state.
void setUnrollState(const LoopAttributes::LVEnableState &State) {
StagedAttrs.UnrollEnable = State;
}
- /// \brief Set the vectorize width for the next loop pushed.
+ /// Set the vectorize width for the next loop pushed.
void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; }
- /// \brief Set the interleave count for the next loop pushed.
+ /// Set the interleave count for the next loop pushed.
void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; }
- /// \brief Set the unroll count for the next loop pushed.
+ /// Set the unroll count for the next loop pushed.
void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
private:
- /// \brief Returns true if there is LoopInfo on the stack.
+ /// Returns true if there is LoopInfo on the stack.
bool hasInfo() const { return !Active.empty(); }
- /// \brief Return the LoopInfo for the current loop. HasInfo should be called
+ /// Return the LoopInfo for the current loop. HasInfo should be called
/// first to ensure LoopInfo is present.
const LoopInfo &getInfo() const { return Active.back(); }
- /// \brief The set of attributes that will be applied to the next pushed loop.
+ /// The set of attributes that will be applied to the next pushed loop.
LoopAttributes StagedAttrs;
- /// \brief Stack of active loops.
+ /// Stack of active loops.
llvm::SmallVector<LoopInfo, 4> Active;
};
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
new file mode 100644
index 000000000000..922e0934b866
--- /dev/null
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -0,0 +1,885 @@
+//===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions to generate various special functions for C
+// structs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "clang/AST/NonTrivialTypeVisitor.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <array>
+
+using namespace clang;
+using namespace CodeGen;
+
+// Return the size of a field in number of bits.
+static uint64_t getFieldSize(const FieldDecl *FD, QualType FT,
+ ASTContext &Ctx) {
+ if (FD && FD->isBitField())
+ return FD->getBitWidthValue(Ctx);
+ return Ctx.getTypeSize(FT);
+}
+
+namespace {
+enum { DstIdx = 0, SrcIdx = 1 };
+const char *ValNameStr[2] = {"dst", "src"};
+
+template <class Derived> struct StructVisitor {
+ StructVisitor(ASTContext &Ctx) : Ctx(Ctx) {}
+
+ template <class... Ts>
+ void visitStructFields(QualType QT, CharUnits CurStructOffset, Ts... Args) {
+ const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
+
+ // Iterate over the fields of the struct.
+ for (const FieldDecl *FD : RD->fields()) {
+ QualType FT = FD->getType();
+ FT = QT.isVolatileQualified() ? FT.withVolatile() : FT;
+ asDerived().visit(FT, FD, CurStructOffset, Args...);
+ }
+
+ asDerived().flushTrivialFields(Args...);
+ }
+
+ template <class... Ts> void visitTrivial(Ts... Args) {}
+
+ template <class... Ts> void visitCXXDestructor(Ts... Args) {
+ llvm_unreachable("field of a C++ struct type is not expected");
+ }
+
+ template <class... Ts> void flushTrivialFields(Ts... Args) {}
+
+ uint64_t getFieldOffsetInBits(const FieldDecl *FD) {
+ return FD ? Ctx.getASTRecordLayout(FD->getParent())
+ .getFieldOffset(FD->getFieldIndex())
+ : 0;
+ }
+
+ CharUnits getFieldOffset(const FieldDecl *FD) {
+ return Ctx.toCharUnitsFromBits(getFieldOffsetInBits(FD));
+ }
+
+ Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+ ASTContext &getContext() { return Ctx; }
+ ASTContext &Ctx;
+};
+
+template <class Derived, bool IsMove>
+struct CopyStructVisitor : StructVisitor<Derived>,
+ CopiedTypeVisitor<Derived, IsMove> {
+ using StructVisitor<Derived>::asDerived;
+ using Super = CopiedTypeVisitor<Derived, IsMove>;
+
+ CopyStructVisitor(ASTContext &Ctx) : StructVisitor<Derived>(Ctx) {}
+
+ template <class... Ts>
+ void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffsset,
+ Ts &&... Args) {
+ if (PCK)
+ asDerived().flushTrivialFields(std::forward<Ts>(Args)...);
+ }
+
+ template <class... Ts>
+ void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffsset,
+ Ts &&... Args) {
+ if (const auto *AT = asDerived().getContext().getAsArrayType(FT)) {
+ asDerived().visitArray(PCK, AT, FT.isVolatileQualified(), FD,
+ CurStructOffsset, std::forward<Ts>(Args)...);
+ return;
+ }
+
+ Super::visitWithKind(PCK, FT, FD, CurStructOffsset,
+ std::forward<Ts>(Args)...);
+ }
+
+ template <class... Ts>
+ void visitTrivial(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset,
+ Ts... Args) {
+ assert(!FT.isVolatileQualified() && "volatile field not expected");
+ ASTContext &Ctx = asDerived().getContext();
+ uint64_t FieldSize = getFieldSize(FD, FT, Ctx);
+
+ // Ignore zero-sized fields.
+ if (FieldSize == 0)
+ return;
+
+ uint64_t FStartInBits = asDerived().getFieldOffsetInBits(FD);
+ uint64_t FEndInBits = FStartInBits + FieldSize;
+ uint64_t RoundedFEnd = llvm::alignTo(FEndInBits, Ctx.getCharWidth());
+
+ // Set Start if this is the first field of a sequence of trivial fields.
+ if (Start == End)
+ Start = CurStructOffset + Ctx.toCharUnitsFromBits(FStartInBits);
+ End = CurStructOffset + Ctx.toCharUnitsFromBits(RoundedFEnd);
+ }
+
+ CharUnits Start = CharUnits::Zero(), End = CharUnits::Zero();
+};
+
+// This function creates the mangled name of a special function of a non-trivial
+// C struct. Since there is no ODR in C, the function is mangled based on the
+// struct contents and not the name. The mangled name has the following
+// structure:
+//
+// <function-name> ::= <prefix> <alignment-info> "_" <struct-field-info>
+// <prefix> ::= "__destructor_" | "__default_constructor_" |
+// "__copy_constructor_" | "__move_constructor_" |
+// "__copy_assignment_" | "__move_assignment_"
+// <alignment-info> ::= <dst-alignment> ["_" <src-alignment>]
+// <struct-field-info> ::= <field-info>+
+// <field-info> ::= <struct-or-scalar-field-info> | <array-field-info>
+// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> |
+// <trivial-field-info>
+// <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n"
+// <num-elements> <innermost-element-info> "_AE"
+// <innermost-element-info> ::= <struct-or-scalar-field-info>
+// <strong-field-info> ::= "_s" ["b"] ["v"] <field-offset>
+// <trivial-field-info> ::= "_t" ["v"] <field-offset> "_" <field-size>
+
+template <class Derived> struct GenFuncNameBase {
+ std::string getVolatileOffsetStr(bool IsVolatile, CharUnits Offset) {
+ std::string S;
+ if (IsVolatile)
+ S = "v";
+ S += llvm::to_string(Offset.getQuantity());
+ return S;
+ }
+
+ void visitARCStrong(QualType FT, const FieldDecl *FD,
+ CharUnits CurStructOffset) {
+ appendStr("_s");
+ if (FT->isBlockPointerType())
+ appendStr("b");
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset));
+ }
+
+ void visitARCWeak(QualType FT, const FieldDecl *FD,
+ CharUnits CurStructOffset) {
+ appendStr("_w");
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset));
+ }
+
+ void visitStruct(QualType QT, const FieldDecl *FD,
+ CharUnits CurStructOffset) {
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ asDerived().visitStructFields(QT, FieldOffset);
+ }
+
+ template <class FieldKind>
+ void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile,
+ const FieldDecl *FD, CharUnits CurStructOffset) {
+ // String for non-volatile trivial fields is emitted when
+ // flushTrivialFields is called.
+ if (!FK)
+ return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset);
+
+ CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+ ASTContext &Ctx = asDerived().getContext();
+ const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
+ unsigned NumElts = Ctx.getConstantArrayElementCount(CAT);
+ QualType EltTy = Ctx.getBaseElementType(CAT);
+ CharUnits EltSize = Ctx.getTypeSizeInChars(EltTy);
+ appendStr("_AB" + llvm::to_string(FieldOffset.getQuantity()) + "s" +
+ llvm::to_string(EltSize.getQuantity()) + "n" +
+ llvm::to_string(NumElts));
+ EltTy = IsVolatile ? EltTy.withVolatile() : EltTy;
+ asDerived().visitWithKind(FK, EltTy, nullptr, FieldOffset);
+ appendStr("_AE");
+ }
+
+ void appendStr(StringRef Str) { Name += Str; }
+
+ std::string getName(QualType QT, bool IsVolatile) {
+ QT = IsVolatile ? QT.withVolatile() : QT;
+ asDerived().visitStructFields(QT, CharUnits::Zero());
+ return Name;
+ }
+
+ Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+ std::string Name;
+};
+
+template <class Derived>
+struct GenUnaryFuncName : StructVisitor<Derived>, GenFuncNameBase<Derived> {
+ GenUnaryFuncName(StringRef Prefix, CharUnits DstAlignment, ASTContext &Ctx)
+ : StructVisitor<Derived>(Ctx) {
+ this->appendStr(Prefix);
+ this->appendStr(llvm::to_string(DstAlignment.getQuantity()));
+ }
+};
+
+// Helper function to create a null constant.
+static llvm::Constant *getNullForVariable(Address Addr) {
+ llvm::Type *Ty = Addr.getElementType();
+ return llvm::ConstantPointerNull::get(cast<llvm::PointerType>(Ty));
+}
+
+template <bool IsMove>
+struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>,
+ GenFuncNameBase<GenBinaryFuncName<IsMove>> {
+
+ GenBinaryFuncName(StringRef Prefix, CharUnits DstAlignment,
+ CharUnits SrcAlignment, ASTContext &Ctx)
+ : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>(Ctx) {
+ this->appendStr(Prefix);
+ this->appendStr(llvm::to_string(DstAlignment.getQuantity()));
+ this->appendStr("_" + llvm::to_string(SrcAlignment.getQuantity()));
+ }
+
+ void flushTrivialFields() {
+ if (this->Start == this->End)
+ return;
+
+ this->appendStr("_t" + llvm::to_string(this->Start.getQuantity()) + "w" +
+ llvm::to_string((this->End - this->Start).getQuantity()));
+
+ this->Start = this->End = CharUnits::Zero();
+ }
+
+ void visitVolatileTrivial(QualType FT, const FieldDecl *FD,
+ CharUnits CurStackOffset) {
+ // Because volatile fields can be bit-fields and are individually copied,
+ // their offset and width are in bits.
+ uint64_t OffsetInBits =
+ this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD);
+ this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" +
+ llvm::to_string(getFieldSize(FD, FT, this->Ctx)));
+ }
+};
+
+struct GenDefaultInitializeFuncName
+ : GenUnaryFuncName<GenDefaultInitializeFuncName>,
+ DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName> {
+ using Super = DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName>;
+ GenDefaultInitializeFuncName(CharUnits DstAlignment, ASTContext &Ctx)
+ : GenUnaryFuncName<GenDefaultInitializeFuncName>("__default_constructor_",
+ DstAlignment, Ctx) {}
+ void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(PDIK, AT, FT.isVolatileQualified(), FD, CurStructOffset);
+ return;
+ }
+
+ Super::visitWithKind(PDIK, FT, FD, CurStructOffset);
+ }
+};
+
+struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>,
+ DestructedTypeVisitor<GenDestructorFuncName> {
+ using Super = DestructedTypeVisitor<GenDestructorFuncName>;
+ GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx)
+ : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment,
+ Ctx) {}
+ void visitWithKind(QualType::DestructionKind DK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(DK, AT, FT.isVolatileQualified(), FD, CurStructOffset);
+ return;
+ }
+
+ Super::visitWithKind(DK, FT, FD, CurStructOffset);
+ }
+};
+
+// Helper function that creates CGFunctionInfo for an N-ary special function.
+template <size_t N>
+static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM,
+ FunctionArgList &Args) {
+ ASTContext &Ctx = CGM.getContext();
+ llvm::SmallVector<ImplicitParamDecl *, N> Params;
+ QualType ParamTy = Ctx.getPointerType(Ctx.VoidPtrTy);
+
+ for (unsigned I = 0; I < N; ++I)
+ Params.push_back(ImplicitParamDecl::Create(
+ Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(ValNameStr[I]), ParamTy,
+ ImplicitParamDecl::Other));
+
+ for (auto &P : Params)
+ Args.push_back(P);
+
+ return CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+}
+
+// Template classes that are used as bases for classes that emit special
+// functions.
+template <class Derived> struct GenFuncBase {
+ template <size_t N>
+ void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, N> Addrs) {
+ this->asDerived().callSpecialFunction(
+ FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs);
+ }
+
+ template <class FieldKind, size_t N>
+ void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile,
+ const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, N> Addrs) {
+ // Non-volatile trivial fields are copied when flushTrivialFields is called.
+ if (!FK)
+ return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset,
+ Addrs);
+
+ CodeGenFunction &CGF = *this->CGF;
+ ASTContext &Ctx = CGF.getContext();
+
+ // Compute the end address.
+ QualType BaseEltQT;
+ std::array<Address, N> StartAddrs = Addrs;
+ for (unsigned I = 0; I < N; ++I)
+ StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD);
+ Address DstAddr = StartAddrs[DstIdx];
+ llvm::Value *NumElts = CGF.emitArrayLength(AT, BaseEltQT, DstAddr);
+ unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity();
+ llvm::Value *BaseEltSizeVal =
+ llvm::ConstantInt::get(NumElts->getType(), BaseEltSize);
+ llvm::Value *SizeInBytes =
+ CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts);
+ Address BC = CGF.Builder.CreateBitCast(DstAddr, CGF.CGM.Int8PtrTy);
+ llvm::Value *DstArrayEnd =
+ CGF.Builder.CreateInBoundsGEP(BC.getPointer(), SizeInBytes);
+ DstArrayEnd = CGF.Builder.CreateBitCast(DstArrayEnd, CGF.CGM.Int8PtrPtrTy,
+ "dstarray.end");
+ llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock();
+
+ // Create the header block and insert the phi instructions.
+ llvm::BasicBlock *HeaderBB = CGF.createBasicBlock("loop.header");
+ CGF.EmitBlock(HeaderBB);
+ llvm::PHINode *PHIs[N];
+
+ for (unsigned I = 0; I < N; ++I) {
+ PHIs[I] = CGF.Builder.CreatePHI(CGF.CGM.Int8PtrPtrTy, 2, "addr.cur");
+ PHIs[I]->addIncoming(StartAddrs[I].getPointer(), PreheaderBB);
+ }
+
+ // Create the exit and loop body blocks.
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("loop.exit");
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("loop.body");
+
+ // Emit the comparison and conditional branch instruction that jumps to
+ // either the exit or the loop body.
+ llvm::Value *Done =
+ CGF.Builder.CreateICmpEQ(PHIs[DstIdx], DstArrayEnd, "done");
+ CGF.Builder.CreateCondBr(Done, ExitBB, LoopBB);
+
+ // Visit the element of the array in the loop body.
+ CGF.EmitBlock(LoopBB);
+ QualType EltQT = AT->getElementType();
+ CharUnits EltSize = Ctx.getTypeSizeInChars(EltQT);
+ std::array<Address, N> NewAddrs = Addrs;
+
+ for (unsigned I = 0; I < N; ++I)
+ NewAddrs[I] = Address(
+ PHIs[I], StartAddrs[I].getAlignment().alignmentAtOffset(EltSize));
+
+ EltQT = IsVolatile ? EltQT.withVolatile() : EltQT;
+ this->asDerived().visitWithKind(FK, EltQT, nullptr, CharUnits::Zero(),
+ NewAddrs);
+
+ LoopBB = CGF.Builder.GetInsertBlock();
+
+ for (unsigned I = 0; I < N; ++I) {
+ // Instrs to update the destination and source addresses.
+ // Update phi instructions.
+ NewAddrs[I] = getAddrWithOffset(NewAddrs[I], EltSize);
+ PHIs[I]->addIncoming(NewAddrs[I].getPointer(), LoopBB);
+ }
+
+ // Insert an unconditional branch to the header block.
+ CGF.Builder.CreateBr(HeaderBB);
+ CGF.EmitBlock(ExitBB);
+ }
+
+ /// Return an address with the specified offset from the passed address.
+ Address getAddrWithOffset(Address Addr, CharUnits Offset) {
+ assert(Addr.isValid() && "invalid address");
+ if (Offset.getQuantity() == 0)
+ return Addr;
+ Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy);
+ Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(),
+ CharUnits::One());
+ return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy);
+ }
+
+ Address getAddrWithOffset(Address Addr, CharUnits StructFieldOffset,
+ const FieldDecl *FD) {
+ return getAddrWithOffset(Addr, StructFieldOffset +
+ asDerived().getFieldOffset(FD));
+ }
+
+ template <size_t N>
+ llvm::Function *
+ getFunction(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
+ std::array<CharUnits, N> Alignments, CodeGenModule &CGM) {
+ // If the special function already exists in the module, return it.
+ if (llvm::Function *F = CGM.getModule().getFunction(FuncName)) {
+ bool WrongType = false;
+ if (!F->getReturnType()->isVoidTy())
+ WrongType = true;
+ else {
+ for (const llvm::Argument &Arg : F->args())
+ if (Arg.getType() != CGM.Int8PtrPtrTy)
+ WrongType = true;
+ }
+
+ if (WrongType) {
+ std::string FuncName = F->getName();
+ SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation();
+ CGM.Error(Loc, "special function " + FuncName +
+ " for non-trivial C struct has incorrect type");
+ return nullptr;
+ }
+ return F;
+ }
+
+ ASTContext &Ctx = CGM.getContext();
+ FunctionArgList Args;
+ const CGFunctionInfo &FI = getFunctionInfo<N>(CGM, Args);
+ llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
+ llvm::Function *F =
+ llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ FuncName, &CGM.getModule());
+ F->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ CGM.SetLLVMFunctionAttributes(nullptr, FI, F);
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F);
+ IdentifierInfo *II = &Ctx.Idents.get(FuncName);
+ FunctionDecl *FD = FunctionDecl::Create(
+ Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
+ II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ CodeGenFunction NewCGF(CGM);
+ setCGF(&NewCGF);
+ CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
+
+ for (unsigned I = 0; I < N; ++I) {
+ llvm::Value *V = CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[I]));
+ Addrs[I] = Address(V, Alignments[I]);
+ }
+
+ asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs);
+ CGF->FinishFunction();
+ return F;
+ }
+
+ template <size_t N>
+ void callFunc(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
+ CodeGenFunction &CallerCGF) {
+ std::array<CharUnits, N> Alignments;
+ llvm::Value *Ptrs[N];
+
+ for (unsigned I = 0; I < N; ++I) {
+ Alignments[I] = Addrs[I].getAlignment();
+ Ptrs[I] =
+ CallerCGF.Builder.CreateBitCast(Addrs[I], CallerCGF.CGM.Int8PtrPtrTy)
+ .getPointer();
+ }
+
+ if (llvm::Function *F =
+ getFunction(FuncName, QT, Addrs, Alignments, CallerCGF.CGM))
+ CallerCGF.EmitNounwindRuntimeCall(F, Ptrs);
+ }
+
+ Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+ void setCGF(CodeGenFunction *F) { CGF = F; }
+
+ CodeGenFunction *CGF = nullptr;
+};
+
+template <class Derived, bool IsMove>
+struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>,
+ GenFuncBase<Derived> {
+ GenBinaryFunc(ASTContext &Ctx) : CopyStructVisitor<Derived, IsMove>(Ctx) {}
+
+ void flushTrivialFields(std::array<Address, 2> Addrs) {
+ CharUnits Size = this->End - this->Start;
+
+ if (Size.getQuantity() == 0)
+ return;
+
+ Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], this->Start);
+ Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], this->Start);
+
+ // Emit memcpy.
+ if (Size.getQuantity() >= 16 || !llvm::isPowerOf2_32(Size.getQuantity())) {
+ llvm::Value *SizeVal =
+ llvm::ConstantInt::get(this->CGF->SizeTy, Size.getQuantity());
+ DstAddr =
+ this->CGF->Builder.CreateElementBitCast(DstAddr, this->CGF->Int8Ty);
+ SrcAddr =
+ this->CGF->Builder.CreateElementBitCast(SrcAddr, this->CGF->Int8Ty);
+ this->CGF->Builder.CreateMemCpy(DstAddr, SrcAddr, SizeVal, false);
+ } else {
+ llvm::Type *Ty = llvm::Type::getIntNTy(
+ this->CGF->getLLVMContext(),
+ Size.getQuantity() * this->CGF->getContext().getCharWidth());
+ DstAddr = this->CGF->Builder.CreateElementBitCast(DstAddr, Ty);
+ SrcAddr = this->CGF->Builder.CreateElementBitCast(SrcAddr, Ty);
+ llvm::Value *SrcVal = this->CGF->Builder.CreateLoad(SrcAddr, false);
+ this->CGF->Builder.CreateStore(SrcVal, DstAddr, false);
+ }
+
+ this->Start = this->End = CharUnits::Zero();
+ }
+
+ template <class... Ts>
+ void visitVolatileTrivial(QualType FT, const FieldDecl *FD, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ LValue DstLV, SrcLV;
+ if (FD) {
+ QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0);
+ llvm::PointerType *PtrTy = this->CGF->ConvertType(RT)->getPointerTo();
+ Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset);
+ LValue DstBase = this->CGF->MakeAddrLValue(
+ this->CGF->Builder.CreateBitCast(DstAddr, PtrTy), FT);
+ DstLV = this->CGF->EmitLValueForField(DstBase, FD);
+ Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], Offset);
+ LValue SrcBase = this->CGF->MakeAddrLValue(
+ this->CGF->Builder.CreateBitCast(SrcAddr, PtrTy), FT);
+ SrcLV = this->CGF->EmitLValueForField(SrcBase, FD);
+ } else {
+ llvm::PointerType *Ty = this->CGF->ConvertType(FT)->getPointerTo();
+ Address DstAddr = this->CGF->Builder.CreateBitCast(Addrs[DstIdx], Ty);
+ Address SrcAddr = this->CGF->Builder.CreateBitCast(Addrs[SrcIdx], Ty);
+ DstLV = this->CGF->MakeAddrLValue(DstAddr, FT);
+ SrcLV = this->CGF->MakeAddrLValue(SrcAddr, FT);
+ }
+ RValue SrcVal = this->CGF->EmitLoadOfLValue(SrcLV, SourceLocation());
+ this->CGF->EmitStoreThroughLValue(SrcVal, DstLV);
+ }
+};
+
+// These classes that emit the special functions for a non-trivial struct.
+struct GenDestructor : StructVisitor<GenDestructor>,
+ GenFuncBase<GenDestructor>,
+ DestructedTypeVisitor<GenDestructor> {
+ using Super = DestructedTypeVisitor<GenDestructor>;
+ GenDestructor(ASTContext &Ctx) : StructVisitor<GenDestructor>(Ctx) {}
+
+ void visitWithKind(QualType::DestructionKind DK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset,
+ std::array<Address, 1> Addrs) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(DK, AT, FT.isVolatileQualified(), FD, CurStructOffset, Addrs);
+ return;
+ }
+
+ Super::visitWithKind(DK, FT, FD, CurStructOffset, Addrs);
+ }
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+ CGF->destroyARCStrongImprecise(
+ *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 1> Addrs) {
+ CGF->destroyARCWeak(
+ *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 1> Addrs) {
+ CGF->callCStructDestructor(
+ CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT));
+ }
+};
+
+struct GenDefaultInitialize
+ : StructVisitor<GenDefaultInitialize>,
+ GenFuncBase<GenDefaultInitialize>,
+ DefaultInitializedTypeVisitor<GenDefaultInitialize> {
+ using Super = DefaultInitializedTypeVisitor<GenDefaultInitialize>;
+ typedef GenFuncBase<GenDefaultInitialize> GenFuncBaseTy;
+
+ GenDefaultInitialize(ASTContext &Ctx)
+ : StructVisitor<GenDefaultInitialize>(Ctx) {}
+
+ void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT,
+ const FieldDecl *FD, CharUnits CurStructOffset,
+ std::array<Address, 1> Addrs) {
+ if (const auto *AT = getContext().getAsArrayType(FT)) {
+ visitArray(PDIK, AT, FT.isVolatileQualified(), FD, CurStructOffset,
+ Addrs);
+ return;
+ }
+
+ Super::visitWithKind(PDIK, FT, FD, CurStructOffset, Addrs);
+ }
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+ CGF->EmitNullInitialization(
+ getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 1> Addrs) {
+ CGF->EmitNullInitialization(
+ getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+ }
+
+ template <class FieldKind, size_t... Is>
+ void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile,
+ const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 1> Addrs) {
+ if (!FK)
+ return visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs);
+
+ ASTContext &Ctx = getContext();
+ CharUnits Size = Ctx.getTypeSizeInChars(QualType(AT, 0));
+ QualType EltTy = Ctx.getBaseElementType(QualType(AT, 0));
+
+ if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) {
+ GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStackOffset, Addrs);
+ return;
+ }
+
+ llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity());
+ Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty);
+ CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal,
+ IsVolatile);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 1> Addrs) {
+ CGF->callCStructDefaultConstructor(
+ CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT));
+ }
+};
+
+struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> {
+ GenCopyConstructor(ASTContext &Ctx)
+ : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ llvm::Value *SrcVal = CGF->EmitLoadOfScalar(
+ Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation());
+ llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal);
+ CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->EmitARCCopyWeak(Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> {
+ GenMoveConstructor(ASTContext &Ctx)
+ : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT);
+ llvm::Value *SrcVal =
+ CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal();
+ CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV);
+ CGF->EmitStoreOfScalar(SrcVal, CGF->MakeAddrLValue(Addrs[DstIdx], QT),
+ /* isInitialization */ true);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->EmitARCMoveWeak(Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> {
+ GenCopyAssignment(ASTContext &Ctx)
+ : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ llvm::Value *SrcVal = CGF->EmitLoadOfScalar(
+ Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation());
+ CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal,
+ false);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->emitARCCopyAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructCopyAssignmentOperator(
+ CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> {
+ GenMoveAssignment(ASTContext &Ctx)
+ : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {}
+
+ void visitARCStrong(QualType QT, const FieldDecl *FD,
+ CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT);
+ llvm::Value *SrcVal =
+ CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal();
+ CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV);
+ LValue DstLV = CGF->MakeAddrLValue(Addrs[DstIdx], QT);
+ llvm::Value *DstVal =
+ CGF->EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+ CGF->EmitStoreOfScalar(SrcVal, DstLV);
+ CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime);
+ }
+
+ void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset,
+ std::array<Address, 2> Addrs) {
+ Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+ Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+ CGF->emitARCMoveAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]);
+ }
+
+ void callSpecialFunction(QualType FT, CharUnits Offset,
+ std::array<Address, 2> Addrs) {
+ CGF->callCStructMoveAssignmentOperator(
+ CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+ CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+ }
+};
+
+} // namespace
+
+void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF,
+ Address Addr, QualType Type) {
+ CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Type));
+}
+
+// Default-initialize a variable that is a non-trivial struct or an array of
+// such structure.
+void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) {
+ GenDefaultInitialize Gen(getContext());
+ Address DstPtr = Builder.CreateBitCast(Dst.getAddress(), CGM.Int8PtrPtrTy);
+ Gen.setCGF(this);
+ QualType QT = Dst.getType();
+ QT = Dst.isVolatile() ? QT.withVolatile() : QT;
+ Gen.visit(QT, nullptr, CharUnits::Zero(), std::array<Address, 1>({{DstPtr}}));
+}
+
+template <class G, size_t N>
+static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT,
+ bool IsVolatile, CodeGenFunction &CGF,
+ std::array<Address, N> Addrs) {
+ for (unsigned I = 0; I < N; ++I)
+ Addrs[I] = CGF.Builder.CreateBitCast(Addrs[I], CGF.CGM.Int8PtrPtrTy);
+ QT = IsVolatile ? QT.withVolatile() : QT;
+ Gen.callFunc(FuncName, QT, Addrs, CGF);
+}
+
+// Functions to emit calls to the special functions of a non-trivial C struct.
+void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) {
+ bool IsVolatile = Dst.isVolatile();
+ Address DstPtr = Dst.getAddress();
+ QualType QT = Dst.getType();
+ GenDefaultInitializeFuncName GenName(DstPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenDefaultInitialize(getContext()), FuncName, QT,
+ IsVolatile, *this, std::array<Address, 1>({{DstPtr}}));
+}
+
+void CodeGenFunction::callCStructDestructor(LValue Dst) {
+ bool IsVolatile = Dst.isVolatile();
+ Address DstPtr = Dst.getAddress();
+ QualType QT = Dst.getType();
+ GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile,
+ *this, std::array<Address, 1>({{DstPtr}}));
+}
+
+void CodeGenFunction::callCStructCopyConstructor(LValue Dst, LValue Src) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<false> GenName("__copy_constructor_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenCopyConstructor(getContext()), FuncName, QT,
+ IsVolatile, *this,
+ std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src
+
+) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<false> GenName("__copy_assignment_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenCopyAssignment(getContext()), FuncName, QT, IsVolatile,
+ *this, std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructMoveConstructor(LValue Dst, LValue Src) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<true> GenName("__move_constructor_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenMoveConstructor(getContext()), FuncName, QT,
+ IsVolatile, *this,
+ std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src
+
+) {
+ bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+ Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+ QualType QT = Dst.getType();
+ GenBinaryFuncName<true> GenName("__move_assignment_", DstPtr.getAlignment(),
+ SrcPtr.getAlignment(), getContext());
+ std::string FuncName = GenName.getName(QT, IsVolatile);
+ callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile,
+ *this, std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index f26263d9472d..81c1201c0e06 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -259,7 +259,7 @@ llvm::Value *CodeGenFunction::EmitObjCProtocolExpr(const ObjCProtocolExpr *E) {
return CGM.getObjCRuntime().GenerateProtocolRef(*this, E->getProtocol());
}
-/// \brief Adjust the type of an Objective-C object that doesn't match up due
+/// Adjust the type of an Objective-C object that doesn't match up due
/// to type erasure at various points, e.g., related result types or the use
/// of parameterized classes.
static RValue AdjustObjCObjectType(CodeGenFunction &CGF, QualType ExpT,
@@ -803,7 +803,7 @@ PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM,
Kind = Native;
}
-/// \brief Generate an Objective-C property getter function.
+/// Generate an Objective-C property getter function.
///
/// The given Decl must be an ObjCImplementationDecl. \@synthesize
/// is illegal within a category.
@@ -1008,12 +1008,14 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
/*init*/ true);
return;
}
- case TEK_Aggregate:
+ case TEK_Aggregate: {
// The return value slot is guaranteed to not be aliased, but
// that's not necessarily the same as "on the stack", so
// we still potentially need objc_memmove_collectable.
- EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType);
+ EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType),
+ /* Src= */ LV, ivarType, overlapForReturnValue());
return;
+ }
case TEK_Scalar: {
llvm::Value *value;
if (propType->isReferenceType()) {
@@ -1334,7 +1336,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
EmitStmt(&assign);
}
-/// \brief Generate an Objective-C property setter function.
+/// Generate an Objective-C property setter function.
///
/// The given Decl must be an ObjCImplementationDecl. \@synthesize
/// is illegal within a category.
@@ -1438,7 +1440,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
EmitAggExpr(IvarInit->getInit(),
AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
}
// constructor returns 'self'.
CodeGenTypes &Types = CGM.getTypes();
@@ -1814,22 +1817,6 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
}
-static bool IsForwarding(StringRef Name) {
- return llvm::StringSwitch<bool>(Name)
- .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV
- "objc_autorelease", // ARCInstKind::Autorelease
- "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV
- "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV
- "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease
- "objc_retainedObject", // ARCInstKind::NoopCast
- "objc_retain", // ARCInstKind::Retain
- "objc_unretainedObject", // ARCInstKind::NoopCast
- "objc_unretainedPointer", // ARCInstKind::NoopCast
- "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV
- true)
- .Default(false);
-}
-
static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
llvm::FunctionType *FTy,
StringRef Name) {
@@ -1847,9 +1834,6 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
// performance.
F->addFnAttr(llvm::Attribute::NonLazyBind);
}
-
- if (IsForwarding(Name))
- F->arg_begin()->addAttr(llvm::Attribute::Returned);
}
return RTF;
@@ -2052,7 +2036,7 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
// Call the marker asm if we made one, which we do only at -O0.
if (marker)
- CGF.Builder.CreateCall(marker);
+ CGF.Builder.CreateCall(marker, None, CGF.getBundlesForFunclet(marker));
}
/// Retain the given object which is the result of a function call.
@@ -2070,7 +2054,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
/// Claim a possibly-autoreleased return value at +0. This is only
/// valid to do in contexts which do not rely on the retain to keep
-/// the object valid for for all of its uses; for example, when
+/// the object valid for all of its uses; for example, when
/// the value is ignored, or when it is being assigned to an
/// __unsafe_unretained variable.
///
@@ -2325,6 +2309,21 @@ void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) {
"objc_copyWeak");
}
+void CodeGenFunction::emitARCCopyAssignWeak(QualType Ty, Address DstAddr,
+ Address SrcAddr) {
+ llvm::Value *Object = EmitARCLoadWeakRetained(SrcAddr);
+ Object = EmitObjCConsumeObject(Ty, Object);
+ EmitARCStoreWeak(DstAddr, Object, false);
+}
+
+void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr,
+ Address SrcAddr) {
+ llvm::Value *Object = EmitARCLoadWeakRetained(SrcAddr);
+ Object = EmitObjCConsumeObject(Ty, Object);
+ EmitARCStoreWeak(DstAddr, Object, false);
+ EmitARCDestroyWeak(SrcAddr);
+}
+
/// Produce the code to do a objc_autoreleasepool_push.
/// call i8* \@objc_autoreleasePoolPush(void)
llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
@@ -3261,19 +3260,19 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
"__assign_helper_atomic_property_",
&CGM.getModule());
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
DeclRefExpr DstExpr(&DstDecl, false, DestTy,
VK_RValue, SourceLocation());
UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation());
+ VK_LValue, OK_Ordinary, SourceLocation(), false);
DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
VK_RValue, SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation());
+ VK_LValue, OK_Ordinary, SourceLocation(), false);
Expr *Args[2] = { &DST, &SRC };
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
@@ -3342,8 +3341,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
llvm::Function *Fn =
llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
"__copy_helper_atomic_property_", &CGM.getModule());
-
- CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
StartFunction(FD, C.VoidTy, Fn, FI, args);
@@ -3351,7 +3350,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
VK_RValue, SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation());
+ VK_LValue, OK_Ordinary, SourceLocation(), false);
CXXConstructExpr *CXXConstExpr =
cast<CXXConstructExpr>(PID->getGetterCXXConstructor());
@@ -3384,7 +3383,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
Qualifiers(),
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
FinishFunction();
HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index c8b8be7f4552..6a0554b46b1c 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -34,11 +34,24 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
+#include <cctype>
using namespace clang;
using namespace CodeGen;
namespace {
+
+std::string SymbolNameForMethod( StringRef ClassName,
+ StringRef CategoryName, const Selector MethodName,
+ bool isClassMethod) {
+ std::string MethodNameColonStripped = MethodName.getAsString();
+ std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(),
+ ':', '_');
+ return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" +
+ CategoryName + "_" + MethodNameColonStripped).str();
+}
+
/// Class that lazily initialises the runtime function. Avoids inserting the
/// types and the function declaration into a module if they're not used, and
/// avoids constructing the type more than once if it's used more than once.
@@ -80,8 +93,7 @@ public:
if (!Function) {
if (!FunctionName)
return nullptr;
- Function =
- cast<llvm::Constant>(CGM->CreateRuntimeFunction(FTy, FunctionName));
+ Function = CGM->CreateRuntimeFunction(FTy, FunctionName);
}
return Function;
}
@@ -114,6 +126,10 @@ protected:
/// Pointer to i8 - LLVM type of char*, for all of the places where the
/// runtime needs to deal with C strings.
llvm::PointerType *PtrToInt8Ty;
+ /// struct objc_protocol type
+ llvm::StructType *ProtocolTy;
+ /// Protocol * type.
+ llvm::PointerType *ProtocolPtrTy;
/// Instance Method Pointer type. This is a pointer to a function that takes,
/// at a minimum, an object and a selector, and is the generic type for
/// Objective-C methods. Due to differences between variadic / non-variadic
@@ -156,11 +172,29 @@ protected:
llvm::IntegerType *Int32Ty;
/// 64-bit integer type, to save us needing to look it up every time it's used.
llvm::IntegerType *Int64Ty;
+ /// The type of struct objc_property.
+ llvm::StructType *PropertyMetadataTy;
/// Metadata kind used to tie method lookups to message sends. The GNUstep
/// runtime provides some LLVM passes that can use this to do things like
/// automatic IMP caching and speculative inlining.
unsigned msgSendMDKind;
+ /// Helper to check if we are targeting a specific runtime version or later.
+ bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) {
+ const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
+ return (R.getKind() == kind) &&
+ (R.getVersion() >= VersionTuple(major, minor));
+ }
+
+ std::string SymbolForProtocol(StringRef Name) {
+ return (StringRef("._OBJC_PROTOCOL_") + Name).str();
+ }
+
+ std::string SymbolForProtocolRef(StringRef Name) {
+ return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str();
+ }
+
+
/// Helper function that generates a constant string and returns a pointer to
/// the start of the string. The result of this function can be used anywhere
/// where the C code specifies const char*.
@@ -174,39 +208,28 @@ protected:
/// string value. This allows the linker to combine the strings between
/// different modules. Used for EH typeinfo names, selector strings, and a
/// few other things.
- llvm::Constant *ExportUniqueString(const std::string &Str, StringRef Prefix) {
- std::string Name = Prefix.str() + Str;
- auto *ConstStr = TheModule.getGlobalVariable(Name);
+ llvm::Constant *ExportUniqueString(const std::string &Str,
+ const std::string &prefix,
+ bool Private=false) {
+ std::string name = prefix + Str;
+ auto *ConstStr = TheModule.getGlobalVariable(name);
if (!ConstStr) {
llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str);
- ConstStr = new llvm::GlobalVariable(TheModule, value->getType(), true,
- llvm::GlobalValue::LinkOnceODRLinkage,
- value, Name);
+ auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true,
+ llvm::GlobalValue::LinkOnceODRLinkage, value, name);
+ if (Private)
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ ConstStr = GV;
}
return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
ConstStr, Zeros);
}
- /// Generates a global structure, initialized by the elements in the vector.
- /// The element types must match the types of the structure elements in the
- /// first argument.
- llvm::GlobalVariable *MakeGlobal(llvm::Constant *C,
- CharUnits Align,
- StringRef Name="",
- llvm::GlobalValue::LinkageTypes linkage
- =llvm::GlobalValue::InternalLinkage) {
- auto GV = new llvm::GlobalVariable(TheModule, C->getType(), false,
- linkage, C, Name);
- GV->setAlignment(Align.getQuantity());
- return GV;
- }
-
/// Returns a property name and encoding string.
llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD,
const Decl *Container) {
- const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
- if ((R.getKind() == ObjCRuntime::GNUstep) &&
- (R.getVersion() >= VersionTuple(1, 6))) {
+ assert(!isRuntime(ObjCRuntime::GNUstep, 2));
+ if (isRuntime(ObjCRuntime::GNUstep, 1, 6)) {
std::string NameAndAttributes;
std::string TypeStr =
CGM.getContext().getObjCEncodingForPropertyDecl(PD, Container);
@@ -222,7 +245,7 @@ protected:
/// Push the property attributes into two structure fields.
void PushPropertyAttributes(ConstantStructBuilder &Fields,
- ObjCPropertyDecl *property, bool isSynthesized=true, bool
+ const ObjCPropertyDecl *property, bool isSynthesized=true, bool
isDynamic=true) {
int attrs = property->getPropertyAttributes();
// For read-only properties, clear the copy and retain flags
@@ -249,6 +272,46 @@ protected:
Fields.addInt(Int8Ty, 0);
}
+ virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields,
+ int count) {
+ // int count;
+ Fields.addInt(IntTy, count);
+ // int size; (only in GNUstep v2 ABI.
+ if (isRuntime(ObjCRuntime::GNUstep, 2)) {
+ llvm::DataLayout td(&TheModule);
+ Fields.addInt(IntTy, td.getTypeSizeInBits(PropertyMetadataTy) /
+ CGM.getContext().getCharWidth());
+ }
+ // struct objc_property_list *next;
+ Fields.add(NULLPtr);
+ // struct objc_property properties[]
+ return Fields.beginArray(PropertyMetadataTy);
+ }
+ virtual void PushProperty(ConstantArrayBuilder &PropertiesArray,
+ const ObjCPropertyDecl *property,
+ const Decl *OCD,
+ bool isSynthesized=true, bool
+ isDynamic=true) {
+ auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy);
+ ASTContext &Context = CGM.getContext();
+ Fields.add(MakePropertyEncodingString(property, OCD));
+ PushPropertyAttributes(Fields, property, isSynthesized, isDynamic);
+ auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) {
+ if (accessor) {
+ std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor);
+ llvm::Constant *TypeEncoding = MakeConstantString(TypeStr);
+ Fields.add(MakeConstantString(accessor->getSelector().getAsString()));
+ Fields.add(TypeEncoding);
+ } else {
+ Fields.add(NULLPtr);
+ Fields.add(NULLPtr);
+ }
+ };
+ addPropertyMethod(property->getGetterMethodDecl());
+ addPropertyMethod(property->getSetterMethodDecl());
+ Fields.finishAndAddTo(PropertiesArray);
+ }
+
/// Ensures that the value has the required type, by inserting a bitcast if
/// required. This function lets us avoid inserting bitcasts that are
/// redundant.
@@ -268,7 +331,8 @@ protected:
/// LLVM context.
llvm::LLVMContext &VMContext;
-private:
+protected:
+
/// Placeholder for the class. Lots of things refer to the class before we've
/// actually emitted it. We use this alias as a placeholder, and then replace
/// it with a pointer to the class structure before finally emitting the
@@ -352,6 +416,7 @@ private:
/// Function used for non-object declared property setters.
LazyRuntimeFunction SetStructPropertyFn;
+protected:
/// The version of the runtime that this class targets. Must match the
/// version in the runtime.
int RuntimeVersion;
@@ -362,14 +427,18 @@ private:
/// Objective-C 1 property structures when targeting the GCC runtime or it
/// will abort.
const int ProtocolVersion;
-
+ /// The version of the class ABI. This value is used in the class structure
+ /// and indicates how various fields should be interpreted.
+ const int ClassABIVersion;
/// Generates an instance variable list structure. This is a structure
/// containing a size and an array of structures containing instance variable
/// metadata. This is used purely for introspection in the fragile ABI. In
/// the non-fragile ABI, it's used for instance variable fixup.
- llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
- ArrayRef<llvm::Constant *> IvarTypes,
- ArrayRef<llvm::Constant *> IvarOffsets);
+ virtual llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
+ ArrayRef<llvm::Constant *> IvarTypes,
+ ArrayRef<llvm::Constant *> IvarOffsets,
+ ArrayRef<llvm::Constant *> IvarAlign,
+ ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership);
/// Generates a method list structure. This is a structure containing a size
/// and an array of structures containing method metadata.
@@ -378,20 +447,20 @@ private:
/// pointer allowing them to be chained together in a linked list.
llvm::Constant *GenerateMethodList(StringRef ClassName,
StringRef CategoryName,
- ArrayRef<Selector> MethodSels,
- ArrayRef<llvm::Constant *> MethodTypes,
+ ArrayRef<const ObjCMethodDecl*> Methods,
bool isClassMethodList);
/// Emits an empty protocol. This is used for \@protocol() where no protocol
/// is found. The runtime will (hopefully) fix up the pointer to refer to the
/// real protocol.
- llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName);
+ virtual llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName);
/// Generates a list of property metadata structures. This follows the same
/// pattern as method and instance variable metadata lists.
- llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID,
- SmallVectorImpl<Selector> &InstanceMethodSels,
- SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes);
+ llvm::Constant *GeneratePropertyList(const Decl *Container,
+ const ObjCContainerDecl *OCD,
+ bool isClassProperty=false,
+ bool protocolOptionalProperties=false);
/// Generates a list of referenced protocols. Classes, categories, and
/// protocols all use this structure.
@@ -422,22 +491,42 @@ private:
/// Generates a method list. This is used by protocols to define the required
/// and optional methods.
- llvm::Constant *GenerateProtocolMethodList(
- ArrayRef<llvm::Constant *> MethodNames,
- ArrayRef<llvm::Constant *> MethodTypes);
+ virtual llvm::Constant *GenerateProtocolMethodList(
+ ArrayRef<const ObjCMethodDecl*> Methods);
+ /// Emits optional and required method lists.
+ template<class T>
+ void EmitProtocolMethodList(T &&Methods, llvm::Constant *&Required,
+ llvm::Constant *&Optional) {
+ SmallVector<const ObjCMethodDecl*, 16> RequiredMethods;
+ SmallVector<const ObjCMethodDecl*, 16> OptionalMethods;
+ for (const auto *I : Methods)
+ if (I->isOptional())
+ OptionalMethods.push_back(I);
+ else
+ RequiredMethods.push_back(I);
+ Required = GenerateProtocolMethodList(RequiredMethods);
+ Optional = GenerateProtocolMethodList(OptionalMethods);
+ }
/// Returns a selector with the specified type encoding. An empty string is
/// used to return an untyped selector (with the types field set to NULL).
- llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+ virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
const std::string &TypeEncoding);
+ /// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this
+ /// contains the class and ivar names, in the v2 ABI this contains the type
+ /// encoding as well.
+ virtual std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID,
+ const ObjCIvarDecl *Ivar) {
+ const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString()
+ + '.' + Ivar->getNameAsString();
+ return Name;
+ }
/// Returns the variable used to store the offset of an instance variable.
llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
const ObjCIvarDecl *Ivar);
/// Emits a reference to a class. This allows the linker to object if there
/// is no class of the matching name.
-
-protected:
void EmitClassRef(const std::string &className);
/// Emits a pointer to the named class
@@ -476,7 +565,7 @@ protected:
public:
CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
- unsigned protocolClassVersion);
+ unsigned protocolClassVersion, unsigned classABI=1);
ConstantAddress GenerateConstantString(const StringLiteral *) override;
@@ -499,6 +588,14 @@ public:
Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override;
llvm::Value *GetSelector(CodeGenFunction &CGF,
const ObjCMethodDecl *Method) override;
+ virtual llvm::Constant *GetConstantSelector(Selector Sel,
+ const std::string &TypeEncoding) {
+ llvm_unreachable("Runtime unable to generate constant selector");
+ }
+ llvm::Constant *GetConstantSelector(const ObjCMethodDecl *M) {
+ return GetConstantSelector(M->getSelector(),
+ CGM.getContext().getObjCEncodingForMethodDecl(M));
+ }
llvm::Constant *GetEHType(QualType T) override;
llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
@@ -698,7 +795,10 @@ class CGObjCGNUstep : public CGObjCGNU {
}
public:
- CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
+ CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNUstep(Mod, 9, 3, 1) {}
+ CGObjCGNUstep(CodeGenModule &Mod, unsigned ABI, unsigned ProtocolABI,
+ unsigned ClassABI) :
+ CGObjCGNU(Mod, ABI, ProtocolABI, ClassABI) {
const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
llvm::StructType *SlotStructTy =
@@ -707,7 +807,7 @@ class CGObjCGNUstep : public CGObjCGNU {
// Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender);
SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy,
SelectorTy, IdTy);
- // Slot_t objc_msg_lookup_super(struct objc_super*, SEL);
+ // Slot_t objc_slot_lookup_super(struct objc_super*, SEL);
SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
PtrToObjCSuperTy, SelectorTy);
// If we're in ObjC++ mode, then we want to make
@@ -784,6 +884,951 @@ class CGObjCGNUstep : public CGObjCGNU {
}
};
+/// GNUstep Objective-C ABI version 2 implementation.
+/// This is the ABI that provides a clean break with the legacy GCC ABI and
+/// cleans up a number of things that were added to work around 1980s linkers.
+class CGObjCGNUstep2 : public CGObjCGNUstep {
+ /// The section for selectors.
+ static constexpr const char *const SelSection = "__objc_selectors";
+ /// The section for classes.
+ static constexpr const char *const ClsSection = "__objc_classes";
+ /// The section for references to classes.
+ static constexpr const char *const ClsRefSection = "__objc_class_refs";
+ /// The section for categories.
+ static constexpr const char *const CatSection = "__objc_cats";
+ /// The section for protocols.
+ static constexpr const char *const ProtocolSection = "__objc_protocols";
+ /// The section for protocol references.
+ static constexpr const char *const ProtocolRefSection = "__objc_protocol_refs";
+ /// The section for class aliases
+ static constexpr const char *const ClassAliasSection = "__objc_class_aliases";
+ /// The section for constexpr constant strings
+ static constexpr const char *const ConstantStringSection = "__objc_constant_string";
+ /// The GCC ABI superclass message lookup function. Takes a pointer to a
+ /// structure describing the receiver and the class, and a selector as
+ /// arguments. Returns the IMP for the corresponding method.
+ LazyRuntimeFunction MsgLookupSuperFn;
+ /// A flag indicating if we've emitted at least one protocol.
+ /// If we haven't, then we need to emit an empty protocol, to ensure that the
+ /// __start__objc_protocols and __stop__objc_protocols sections exist.
+ bool EmittedProtocol = false;
+ /// A flag indicating if we've emitted at least one protocol reference.
+ /// If we haven't, then we need to emit an empty protocol, to ensure that the
+ /// __start__objc_protocol_refs and __stop__objc_protocol_refs sections
+ /// exist.
+ bool EmittedProtocolRef = false;
+ /// A flag indicating if we've emitted at least one class.
+ /// If we haven't, then we need to emit an empty protocol, to ensure that the
+ /// __start__objc_classes and __stop__objc_classes sections / exist.
+ bool EmittedClass = false;
+ /// Generate the name of a symbol for a reference to a class. Accesses to
+ /// classes should be indirected via this.
+ std::string SymbolForClassRef(StringRef Name, bool isWeak) {
+ if (isWeak)
+ return (StringRef("._OBJC_WEAK_REF_CLASS_") + Name).str();
+ else
+ return (StringRef("._OBJC_REF_CLASS_") + Name).str();
+ }
+ /// Generate the name of a class symbol.
+ std::string SymbolForClass(StringRef Name) {
+ return (StringRef("._OBJC_CLASS_") + Name).str();
+ }
+ void CallRuntimeFunction(CGBuilderTy &B, StringRef FunctionName,
+ ArrayRef<llvm::Value*> Args) {
+ SmallVector<llvm::Type *,8> Types;
+ for (auto *Arg : Args)
+ Types.push_back(Arg->getType());
+ llvm::FunctionType *FT = llvm::FunctionType::get(B.getVoidTy(), Types,
+ false);
+ llvm::Value *Fn = CGM.CreateRuntimeFunction(FT, FunctionName);
+ B.CreateCall(Fn, Args);
+ }
+
+ ConstantAddress GenerateConstantString(const StringLiteral *SL) override {
+
+ auto Str = SL->getString();
+ CharUnits Align = CGM.getPointerAlign();
+
+ // Look for an existing one
+ llvm::StringMap<llvm::Constant*>::iterator old = ObjCStrings.find(Str);
+ if (old != ObjCStrings.end())
+ return ConstantAddress(old->getValue(), Align);
+
+ bool isNonASCII = SL->containsNonAscii();
+
+ auto LiteralLength = SL->getLength();
+
+ if ((CGM.getTarget().getPointerWidth(0) == 64) &&
+ (LiteralLength < 9) && !isNonASCII) {
+ // Tiny strings are only used on 64-bit platforms. They store 8 7-bit
+ // ASCII characters in the high 56 bits, followed by a 4-bit length and a
+ // 3-bit tag (which is always 4).
+ uint64_t str = 0;
+ // Fill in the characters
+ for (unsigned i=0 ; i<LiteralLength ; i++)
+ str |= ((uint64_t)SL->getCodeUnit(i)) << ((64 - 4 - 3) - (i*7));
+ // Fill in the length
+ str |= LiteralLength << 3;
+ // Set the tag
+ str |= 4;
+ auto *ObjCStr = llvm::ConstantExpr::getIntToPtr(
+ llvm::ConstantInt::get(Int64Ty, str), IdTy);
+ ObjCStrings[Str] = ObjCStr;
+ return ConstantAddress(ObjCStr, Align);
+ }
+
+ StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass;
+
+ if (StringClass.empty()) StringClass = "NSConstantString";
+
+ std::string Sym = SymbolForClass(StringClass);
+
+ llvm::Constant *isa = TheModule.getNamedGlobal(Sym);
+
+ if (!isa)
+ isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Sym);
+ else if (isa->getType() != PtrToIdTy)
+ isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy);
+
+ // struct
+ // {
+ // Class isa;
+ // uint32_t flags;
+ // uint32_t length; // Number of codepoints
+ // uint32_t size; // Number of bytes
+ // uint32_t hash;
+ // const char *data;
+ // };
+
+ ConstantInitBuilder Builder(CGM);
+ auto Fields = Builder.beginStruct();
+ Fields.add(isa);
+ // For now, all non-ASCII strings are represented as UTF-16. As such, the
+ // number of bytes is simply double the number of UTF-16 codepoints. In
+ // ASCII strings, the number of bytes is equal to the number of non-ASCII
+ // codepoints.
+ if (isNonASCII) {
+ unsigned NumU8CodeUnits = Str.size();
+ // A UTF-16 representation of a unicode string contains at most the same
+ // number of code units as a UTF-8 representation. Allocate that much
+ // space, plus one for the final null character.
+ SmallVector<llvm::UTF16, 128> ToBuf(NumU8CodeUnits + 1);
+ const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)Str.data();
+ llvm::UTF16 *ToPtr = &ToBuf[0];
+ (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumU8CodeUnits,
+ &ToPtr, ToPtr + NumU8CodeUnits, llvm::strictConversion);
+ uint32_t StringLength = ToPtr - &ToBuf[0];
+ // Add null terminator
+ *ToPtr = 0;
+ // Flags: 2 indicates UTF-16 encoding
+ Fields.addInt(Int32Ty, 2);
+ // Number of UTF-16 codepoints
+ Fields.addInt(Int32Ty, StringLength);
+ // Number of bytes
+ Fields.addInt(Int32Ty, StringLength * 2);
+ // Hash. Not currently initialised by the compiler.
+ Fields.addInt(Int32Ty, 0);
+ // pointer to the data string.
+ auto Arr = llvm::makeArrayRef(&ToBuf[0], ToPtr+1);
+ auto *C = llvm::ConstantDataArray::get(VMContext, Arr);
+ auto *Buffer = new llvm::GlobalVariable(TheModule, C->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str");
+ Buffer->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ Fields.add(Buffer);
+ } else {
+ // Flags: 0 indicates ASCII encoding
+ Fields.addInt(Int32Ty, 0);
+ // Number of UTF-16 codepoints, each ASCII byte is a UTF-16 codepoint
+ Fields.addInt(Int32Ty, Str.size());
+ // Number of bytes
+ Fields.addInt(Int32Ty, Str.size());
+ // Hash. Not currently initialised by the compiler.
+ Fields.addInt(Int32Ty, 0);
+ // Data pointer
+ Fields.add(MakeConstantString(Str));
+ }
+ std::string StringName;
+ bool isNamed = !isNonASCII;
+ if (isNamed) {
+ StringName = ".objc_str_";
+ for (int i=0,e=Str.size() ; i<e ; ++i) {
+ unsigned char c = Str[i];
+ if (isalnum(c))
+ StringName += c;
+ else if (c == ' ')
+ StringName += '_';
+ else {
+ isNamed = false;
+ break;
+ }
+ }
+ }
+ auto *ObjCStrGV =
+ Fields.finishAndCreateGlobal(
+ isNamed ? StringRef(StringName) : ".objc_string",
+ Align, false, isNamed ? llvm::GlobalValue::LinkOnceODRLinkage
+ : llvm::GlobalValue::PrivateLinkage);
+ ObjCStrGV->setSection(ConstantStringSection);
+ if (isNamed) {
+ ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName));
+ ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ }
+ llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy);
+ ObjCStrings[Str] = ObjCStr;
+ ConstantStrings.push_back(ObjCStr);
+ return ConstantAddress(ObjCStr, Align);
+ }
+
+ void PushProperty(ConstantArrayBuilder &PropertiesArray,
+ const ObjCPropertyDecl *property,
+ const Decl *OCD,
+ bool isSynthesized=true, bool
+ isDynamic=true) override {
+ // struct objc_property
+ // {
+ // const char *name;
+ // const char *attributes;
+ // const char *type;
+ // SEL getter;
+ // SEL setter;
+ // };
+ auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy);
+ ASTContext &Context = CGM.getContext();
+ Fields.add(MakeConstantString(property->getNameAsString()));
+ std::string TypeStr =
+ CGM.getContext().getObjCEncodingForPropertyDecl(property, OCD);
+ Fields.add(MakeConstantString(TypeStr));
+ std::string typeStr;
+ Context.getObjCEncodingForType(property->getType(), typeStr);
+ Fields.add(MakeConstantString(typeStr));
+ auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) {
+ if (accessor) {
+ std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor);
+ Fields.add(GetConstantSelector(accessor->getSelector(), TypeStr));
+ } else {
+ Fields.add(NULLPtr);
+ }
+ };
+ addPropertyMethod(property->getGetterMethodDecl());
+ addPropertyMethod(property->getSetterMethodDecl());
+ Fields.finishAndAddTo(PropertiesArray);
+ }
+
+ llvm::Constant *
+ GenerateProtocolMethodList(ArrayRef<const ObjCMethodDecl*> Methods) override {
+ // struct objc_protocol_method_description
+ // {
+ // SEL selector;
+ // const char *types;
+ // };
+ llvm::StructType *ObjCMethodDescTy =
+ llvm::StructType::get(CGM.getLLVMContext(),
+ { PtrToInt8Ty, PtrToInt8Ty });
+ ASTContext &Context = CGM.getContext();
+ ConstantInitBuilder Builder(CGM);
+ // struct objc_protocol_method_description_list
+ // {
+ // int count;
+ // int size;
+ // struct objc_protocol_method_description methods[];
+ // };
+ auto MethodList = Builder.beginStruct();
+ // int count;
+ MethodList.addInt(IntTy, Methods.size());
+ // int size; // sizeof(struct objc_method_description)
+ llvm::DataLayout td(&TheModule);
+ MethodList.addInt(IntTy, td.getTypeSizeInBits(ObjCMethodDescTy) /
+ CGM.getContext().getCharWidth());
+ // struct objc_method_description[]
+ auto MethodArray = MethodList.beginArray(ObjCMethodDescTy);
+ for (auto *M : Methods) {
+ auto Method = MethodArray.beginStruct(ObjCMethodDescTy);
+ Method.add(CGObjCGNU::GetConstantSelector(M));
+ Method.add(GetTypeString(Context.getObjCEncodingForMethodDecl(M, true)));
+ Method.finishAndAddTo(MethodArray);
+ }
+ MethodArray.finishAndAddTo(MethodList);
+ return MethodList.finishAndCreateGlobal(".objc_protocol_method_list",
+ CGM.getPointerAlign());
+ }
+
+ llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
+ llvm::Value *cmd, MessageSendInfo &MSI) override {
+ // Don't access the slot unless we're trying to cache the result.
+ CGBuilderTy &Builder = CGF.Builder;
+ llvm::Value *lookupArgs[] = {CGObjCGNU::EnforceType(Builder, ObjCSuper,
+ PtrToObjCSuperTy).getPointer(), cmd};
+ return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
+ }
+
+ llvm::GlobalVariable *GetClassVar(StringRef Name, bool isWeak=false) {
+ std::string SymbolName = SymbolForClassRef(Name, isWeak);
+ auto *ClassSymbol = TheModule.getNamedGlobal(SymbolName);
+ if (ClassSymbol)
+ return ClassSymbol;
+ ClassSymbol = new llvm::GlobalVariable(TheModule,
+ IdTy, false, llvm::GlobalValue::ExternalLinkage,
+ nullptr, SymbolName);
+ // If this is a weak symbol, then we are creating a valid definition for
+ // the symbol, pointing to a weak definition of the real class pointer. If
+ // this is not a weak reference, then we are expecting another compilation
+ // unit to provide the real indirection symbol.
+ if (isWeak)
+ ClassSymbol->setInitializer(new llvm::GlobalVariable(TheModule,
+ Int8Ty, false, llvm::GlobalValue::ExternalWeakLinkage,
+ nullptr, SymbolForClass(Name)));
+ assert(ClassSymbol->getName() == SymbolName);
+ return ClassSymbol;
+ }
+ llvm::Value *GetClassNamed(CodeGenFunction &CGF,
+ const std::string &Name,
+ bool isWeak) override {
+ return CGF.Builder.CreateLoad(Address(GetClassVar(Name, isWeak),
+ CGM.getPointerAlign()));
+ }
+ int32_t FlagsForOwnership(Qualifiers::ObjCLifetime Ownership) {
+ // typedef enum {
+ // ownership_invalid = 0,
+ // ownership_strong = 1,
+ // ownership_weak = 2,
+ // ownership_unsafe = 3
+ // } ivar_ownership;
+ int Flag;
+ switch (Ownership) {
+ case Qualifiers::OCL_Strong:
+ Flag = 1;
+ break;
+ case Qualifiers::OCL_Weak:
+ Flag = 2;
+ break;
+ case Qualifiers::OCL_ExplicitNone:
+ Flag = 3;
+ break;
+ case Qualifiers::OCL_None:
+ case Qualifiers::OCL_Autoreleasing:
+ assert(Ownership != Qualifiers::OCL_Autoreleasing);
+ Flag = 0;
+ }
+ return Flag;
+ }
+ llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
+ ArrayRef<llvm::Constant *> IvarTypes,
+ ArrayRef<llvm::Constant *> IvarOffsets,
+ ArrayRef<llvm::Constant *> IvarAlign,
+ ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership) override {
+ llvm_unreachable("Method should not be called!");
+ }
+
+ llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName) override {
+ std::string Name = SymbolForProtocol(ProtocolName);
+ auto *GV = TheModule.getGlobalVariable(Name);
+ if (!GV) {
+ // Emit a placeholder symbol.
+ GV = new llvm::GlobalVariable(TheModule, ProtocolTy, false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Name);
+ GV->setAlignment(CGM.getPointerAlign().getQuantity());
+ }
+ return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy);
+ }
+
+ /// Existing protocol references.
+ llvm::StringMap<llvm::Constant*> ExistingProtocolRefs;
+
+ llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+ const ObjCProtocolDecl *PD) override {
+ auto Name = PD->getNameAsString();
+ auto *&Ref = ExistingProtocolRefs[Name];
+ if (!Ref) {
+ auto *&Protocol = ExistingProtocols[Name];
+ if (!Protocol)
+ Protocol = GenerateProtocolRef(PD);
+ std::string RefName = SymbolForProtocolRef(Name);
+ assert(!TheModule.getGlobalVariable(RefName));
+ // Emit a reference symbol.
+ auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy,
+ false, llvm::GlobalValue::ExternalLinkage,
+ llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName);
+ GV->setSection(ProtocolRefSection);
+ GV->setAlignment(CGM.getPointerAlign().getQuantity());
+ Ref = GV;
+ }
+ EmittedProtocolRef = true;
+ return CGF.Builder.CreateAlignedLoad(Ref, CGM.getPointerAlign());
+ }
+
+ llvm::Constant *GenerateProtocolList(ArrayRef<llvm::Constant*> Protocols) {
+ llvm::ArrayType *ProtocolArrayTy = llvm::ArrayType::get(ProtocolPtrTy,
+ Protocols.size());
+ llvm::Constant * ProtocolArray = llvm::ConstantArray::get(ProtocolArrayTy,
+ Protocols);
+ ConstantInitBuilder builder(CGM);
+ auto ProtocolBuilder = builder.beginStruct();
+ ProtocolBuilder.addNullPointer(PtrTy);
+ ProtocolBuilder.addInt(SizeTy, Protocols.size());
+ ProtocolBuilder.add(ProtocolArray);
+ return ProtocolBuilder.finishAndCreateGlobal(".objc_protocol_list",
+ CGM.getPointerAlign(), false, llvm::GlobalValue::InternalLinkage);
+ }
+
+ void GenerateProtocol(const ObjCProtocolDecl *PD) override {
+ // Do nothing - we only emit referenced protocols.
+ }
+ llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) {
+ std::string ProtocolName = PD->getNameAsString();
+ auto *&Protocol = ExistingProtocols[ProtocolName];
+ if (Protocol)
+ return Protocol;
+
+ EmittedProtocol = true;
+
+ // Use the protocol definition, if there is one.
+ if (const ObjCProtocolDecl *Def = PD->getDefinition())
+ PD = Def;
+
+ SmallVector<llvm::Constant*, 16> Protocols;
+ for (const auto *PI : PD->protocols())
+ Protocols.push_back(
+ llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI),
+ ProtocolPtrTy));
+ llvm::Constant *ProtocolList = GenerateProtocolList(Protocols);
+
+ // Collect information about methods
+ llvm::Constant *InstanceMethodList, *OptionalInstanceMethodList;
+ llvm::Constant *ClassMethodList, *OptionalClassMethodList;
+ EmitProtocolMethodList(PD->instance_methods(), InstanceMethodList,
+ OptionalInstanceMethodList);
+ EmitProtocolMethodList(PD->class_methods(), ClassMethodList,
+ OptionalClassMethodList);
+
+ auto SymName = SymbolForProtocol(ProtocolName);
+ auto *OldGV = TheModule.getGlobalVariable(SymName);
+ // The isa pointer must be set to a magic number so the runtime knows it's
+ // the correct layout.
+ ConstantInitBuilder builder(CGM);
+ auto ProtocolBuilder = builder.beginStruct();
+ ProtocolBuilder.add(llvm::ConstantExpr::getIntToPtr(
+ llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
+ ProtocolBuilder.add(MakeConstantString(ProtocolName));
+ ProtocolBuilder.add(ProtocolList);
+ ProtocolBuilder.add(InstanceMethodList);
+ ProtocolBuilder.add(ClassMethodList);
+ ProtocolBuilder.add(OptionalInstanceMethodList);
+ ProtocolBuilder.add(OptionalClassMethodList);
+ // Required instance properties
+ ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, false, false));
+ // Optional instance properties
+ ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, false, true));
+ // Required class properties
+ ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, true, false));
+ // Optional class properties
+ ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, true, true));
+
+ auto *GV = ProtocolBuilder.finishAndCreateGlobal(SymName,
+ CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage);
+ GV->setSection(ProtocolSection);
+ GV->setComdat(TheModule.getOrInsertComdat(SymName));
+ if (OldGV) {
+ OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV,
+ OldGV->getType()));
+ OldGV->removeFromParent();
+ GV->setName(SymName);
+ }
+ Protocol = GV;
+ return GV;
+ }
+ llvm::Constant *EnforceType(llvm::Constant *Val, llvm::Type *Ty) {
+ if (Val->getType() == Ty)
+ return Val;
+ return llvm::ConstantExpr::getBitCast(Val, Ty);
+ }
+ llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) override {
+ return GetConstantSelector(Sel, TypeEncoding);
+ }
+ llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) {
+ if (TypeEncoding.empty())
+ return NULLPtr;
+ std::string MangledTypes = TypeEncoding;
+ std::replace(MangledTypes.begin(), MangledTypes.end(),
+ '@', '\1');
+ std::string TypesVarName = ".objc_sel_types_" + MangledTypes;
+ auto *TypesGlobal = TheModule.getGlobalVariable(TypesVarName);
+ if (!TypesGlobal) {
+ llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
+ TypeEncoding);
+ auto *GV = new llvm::GlobalVariable(TheModule, Init->getType(),
+ true, llvm::GlobalValue::LinkOnceODRLinkage, Init, TypesVarName);
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ TypesGlobal = GV;
+ }
+ return llvm::ConstantExpr::getGetElementPtr(TypesGlobal->getValueType(),
+ TypesGlobal, Zeros);
+ }
+ llvm::Constant *GetConstantSelector(Selector Sel,
+ const std::string &TypeEncoding) override {
+ // @ is used as a special character in symbol names (used for symbol
+ // versioning), so mangle the name to not include it. Replace it with a
+ // character that is not a valid type encoding character (and, being
+ // non-printable, never will be!)
+ std::string MangledTypes = TypeEncoding;
+ std::replace(MangledTypes.begin(), MangledTypes.end(),
+ '@', '\1');
+ auto SelVarName = (StringRef(".objc_selector_") + Sel.getAsString() + "_" +
+ MangledTypes).str();
+ if (auto *GV = TheModule.getNamedGlobal(SelVarName))
+ return EnforceType(GV, SelectorTy);
+ ConstantInitBuilder builder(CGM);
+ auto SelBuilder = builder.beginStruct();
+ SelBuilder.add(ExportUniqueString(Sel.getAsString(), ".objc_sel_name_",
+ true));
+ SelBuilder.add(GetTypeString(TypeEncoding));
+ auto *GV = SelBuilder.finishAndCreateGlobal(SelVarName,
+ CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage);
+ GV->setComdat(TheModule.getOrInsertComdat(SelVarName));
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ GV->setSection(SelSection);
+ auto *SelVal = EnforceType(GV, SelectorTy);
+ return SelVal;
+ }
+ std::pair<llvm::Constant*,llvm::Constant*>
+ GetSectionBounds(StringRef Section) {
+ auto *Start = new llvm::GlobalVariable(TheModule, PtrTy,
+ /*isConstant*/false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__start_") +
+ Section);
+ Start->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ auto *Stop = new llvm::GlobalVariable(TheModule, PtrTy,
+ /*isConstant*/false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__stop_") +
+ Section);
+ Stop->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ return { Start, Stop };
+ }
+ llvm::Function *ModuleInitFunction() override {
+ llvm::Function *LoadFunction = llvm::Function::Create(
+ llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false),
+ llvm::GlobalValue::LinkOnceODRLinkage, ".objcv2_load_function",
+ &TheModule);
+ LoadFunction->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ LoadFunction->setComdat(TheModule.getOrInsertComdat(".objcv2_load_function"));
+
+ llvm::BasicBlock *EntryBB =
+ llvm::BasicBlock::Create(VMContext, "entry", LoadFunction);
+ CGBuilderTy B(CGM, VMContext);
+ B.SetInsertPoint(EntryBB);
+ ConstantInitBuilder builder(CGM);
+ auto InitStructBuilder = builder.beginStruct();
+ InitStructBuilder.addInt(Int64Ty, 0);
+ auto addSection = [&](const char *section) {
+ auto bounds = GetSectionBounds(section);
+ InitStructBuilder.add(bounds.first);
+ InitStructBuilder.add(bounds.second);
+ };
+ addSection(SelSection);
+ addSection(ClsSection);
+ addSection(ClsRefSection);
+ addSection(CatSection);
+ addSection(ProtocolSection);
+ addSection(ProtocolRefSection);
+ addSection(ClassAliasSection);
+ addSection(ConstantStringSection);
+ auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init",
+ CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage);
+ InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ InitStruct->setComdat(TheModule.getOrInsertComdat(".objc_init"));
+
+ CallRuntimeFunction(B, "__objc_load", {InitStruct});;
+ B.CreateRetVoid();
+ // Make sure that the optimisers don't delete this function.
+ CGM.addCompilerUsedGlobal(LoadFunction);
+ // FIXME: Currently ELF only!
+ // We have to do this by hand, rather than with @llvm.ctors, so that the
+ // linker can remove the duplicate invocations.
+ auto *InitVar = new llvm::GlobalVariable(TheModule, LoadFunction->getType(),
+ /*isConstant*/true, llvm::GlobalValue::LinkOnceAnyLinkage,
+ LoadFunction, ".objc_ctor");
+ // Check that this hasn't been renamed. This shouldn't happen, because
+ // this function should be called precisely once.
+ assert(InitVar->getName() == ".objc_ctor");
+ InitVar->setSection(".ctors");
+ InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor"));
+ CGM.addCompilerUsedGlobal(InitVar);
+ for (auto *C : Categories) {
+ auto *Cat = cast<llvm::GlobalVariable>(C->stripPointerCasts());
+ Cat->setSection(CatSection);
+ CGM.addUsedGlobal(Cat);
+ }
+ // Add a null value fore each special section so that we can always
+ // guarantee that the _start and _stop symbols will exist and be
+ // meaningful.
+ auto createNullGlobal = [&](StringRef Name, ArrayRef<llvm::Constant*> Init,
+ StringRef Section) {
+ auto nullBuilder = builder.beginStruct();
+ for (auto *F : Init)
+ nullBuilder.add(F);
+ auto GV = nullBuilder.finishAndCreateGlobal(Name, CGM.getPointerAlign(),
+ false, llvm::GlobalValue::LinkOnceODRLinkage);
+ GV->setSection(Section);
+ GV->setComdat(TheModule.getOrInsertComdat(Name));
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ CGM.addUsedGlobal(GV);
+ return GV;
+ };
+ createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, SelSection);
+ if (Categories.empty())
+ createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr,
+ NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, CatSection);
+ if (!EmittedClass) {
+ createNullGlobal(".objc_null_cls_init_ref", NULLPtr, ClsSection);
+ createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr },
+ ClsRefSection);
+ }
+ if (!EmittedProtocol)
+ createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr,
+ NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr,
+ NULLPtr}, ProtocolSection);
+ if (!EmittedProtocolRef)
+ createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, ProtocolRefSection);
+ if (!ClassAliases.empty())
+ for (auto clsAlias : ClassAliases)
+ createNullGlobal(std::string(".objc_class_alias") +
+ clsAlias.second, { MakeConstantString(clsAlias.second),
+ GetClassVar(clsAlias.first) }, ClassAliasSection);
+ else
+ createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr },
+ ClassAliasSection);
+ if (ConstantStrings.empty()) {
+ auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0);
+ createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero,
+ i32Zero, i32Zero, i32Zero, NULLPtr }, ConstantStringSection);
+ }
+ ConstantStrings.clear();
+ Categories.clear();
+ Classes.clear();
+ return nullptr;//CGObjCGNU::ModuleInitFunction();
+ }
+ /// In the v2 ABI, ivar offset variables use the type encoding in their name
+ /// to trigger linker failures if the types don't match.
+ std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID,
+ const ObjCIvarDecl *Ivar) override {
+ std::string TypeEncoding;
+ CGM.getContext().getObjCEncodingForType(Ivar->getType(), TypeEncoding);
+ // Prevent the @ from being interpreted as a symbol version.
+ std::replace(TypeEncoding.begin(), TypeEncoding.end(),
+ '@', '\1');
+ const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString()
+ + '.' + Ivar->getNameAsString() + '.' + TypeEncoding;
+ return Name;
+ }
+ llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
+ const ObjCInterfaceDecl *Interface,
+ const ObjCIvarDecl *Ivar) override {
+ const std::string Name = GetIVarOffsetVariableName(Ivar->getContainingInterface(), Ivar);
+ llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
+ if (!IvarOffsetPointer)
+ IvarOffsetPointer = new llvm::GlobalVariable(TheModule, IntTy, false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Name);
+ CharUnits Align = CGM.getIntAlign();
+ llvm::Value *Offset = CGF.Builder.CreateAlignedLoad(IvarOffsetPointer, Align);
+ if (Offset->getType() != PtrDiffTy)
+ Offset = CGF.Builder.CreateZExtOrBitCast(Offset, PtrDiffTy);
+ return Offset;
+ }
+ void GenerateClass(const ObjCImplementationDecl *OID) override {
+ ASTContext &Context = CGM.getContext();
+
+ // Get the class name
+ ObjCInterfaceDecl *classDecl =
+ const_cast<ObjCInterfaceDecl *>(OID->getClassInterface());
+ std::string className = classDecl->getNameAsString();
+ auto *classNameConstant = MakeConstantString(className);
+
+ ConstantInitBuilder builder(CGM);
+ auto metaclassFields = builder.beginStruct();
+ // struct objc_class *isa;
+ metaclassFields.addNullPointer(PtrTy);
+ // struct objc_class *super_class;
+ metaclassFields.addNullPointer(PtrTy);
+ // const char *name;
+ metaclassFields.add(classNameConstant);
+ // long version;
+ metaclassFields.addInt(LongTy, 0);
+ // unsigned long info;
+ // objc_class_flag_meta
+ metaclassFields.addInt(LongTy, 1);
+ // long instance_size;
+ // Setting this to zero is consistent with the older ABI, but it might be
+ // more sensible to set this to sizeof(struct objc_class)
+ metaclassFields.addInt(LongTy, 0);
+ // struct objc_ivar_list *ivars;
+ metaclassFields.addNullPointer(PtrTy);
+ // struct objc_method_list *methods
+ // FIXME: Almost identical code is copied and pasted below for the
+ // class, but refactoring it cleanly requires C++14 generic lambdas.
+ if (OID->classmeth_begin() == OID->classmeth_end())
+ metaclassFields.addNullPointer(PtrTy);
+ else {
+ SmallVector<ObjCMethodDecl*, 16> ClassMethods;
+ ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(),
+ OID->classmeth_end());
+ metaclassFields.addBitCast(
+ GenerateMethodList(className, "", ClassMethods, true),
+ PtrTy);
+ }
+ // void *dtable;
+ metaclassFields.addNullPointer(PtrTy);
+ // IMP cxx_construct;
+ metaclassFields.addNullPointer(PtrTy);
+ // IMP cxx_destruct;
+ metaclassFields.addNullPointer(PtrTy);
+ // struct objc_class *subclass_list
+ metaclassFields.addNullPointer(PtrTy);
+ // struct objc_class *sibling_class
+ metaclassFields.addNullPointer(PtrTy);
+ // struct objc_protocol_list *protocols;
+ metaclassFields.addNullPointer(PtrTy);
+ // struct reference_list *extra_data;
+ metaclassFields.addNullPointer(PtrTy);
+ // long abi_version;
+ metaclassFields.addInt(LongTy, 0);
+ // struct objc_property_list *properties
+ metaclassFields.add(GeneratePropertyList(OID, classDecl, /*isClassProperty*/true));
+
+ auto *metaclass = metaclassFields.finishAndCreateGlobal("._OBJC_METACLASS_"
+ + className, CGM.getPointerAlign());
+
+ auto classFields = builder.beginStruct();
+ // struct objc_class *isa;
+ classFields.add(metaclass);
+ // struct objc_class *super_class;
+ // Get the superclass name.
+ const ObjCInterfaceDecl * SuperClassDecl =
+ OID->getClassInterface()->getSuperClass();
+ if (SuperClassDecl) {
+ auto SuperClassName = SymbolForClass(SuperClassDecl->getNameAsString());
+ llvm::Constant *SuperClass = TheModule.getNamedGlobal(SuperClassName);
+ if (!SuperClass)
+ {
+ SuperClass = new llvm::GlobalVariable(TheModule, PtrTy, false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, SuperClassName);
+ }
+ classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy));
+ } else
+ classFields.addNullPointer(PtrTy);
+ // const char *name;
+ classFields.add(classNameConstant);
+ // long version;
+ classFields.addInt(LongTy, 0);
+ // unsigned long info;
+ // !objc_class_flag_meta
+ classFields.addInt(LongTy, 0);
+ // long instance_size;
+ int superInstanceSize = !SuperClassDecl ? 0 :
+ Context.getASTObjCInterfaceLayout(SuperClassDecl).getSize().getQuantity();
+ // Instance size is negative for classes that have not yet had their ivar
+ // layout calculated.
+ classFields.addInt(LongTy,
+ 0 - (Context.getASTObjCImplementationLayout(OID).getSize().getQuantity() -
+ superInstanceSize));
+
+ if (classDecl->all_declared_ivar_begin() == nullptr)
+ classFields.addNullPointer(PtrTy);
+ else {
+ int ivar_count = 0;
+ for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD;
+ IVD = IVD->getNextIvar()) ivar_count++;
+ llvm::DataLayout td(&TheModule);
+ // struct objc_ivar_list *ivars;
+ ConstantInitBuilder b(CGM);
+ auto ivarListBuilder = b.beginStruct();
+ // int count;
+ ivarListBuilder.addInt(IntTy, ivar_count);
+ // size_t size;
+ llvm::StructType *ObjCIvarTy = llvm::StructType::get(
+ PtrToInt8Ty,
+ PtrToInt8Ty,
+ PtrToInt8Ty,
+ Int32Ty,
+ Int32Ty);
+ ivarListBuilder.addInt(SizeTy, td.getTypeSizeInBits(ObjCIvarTy) /
+ CGM.getContext().getCharWidth());
+ // struct objc_ivar ivars[]
+ auto ivarArrayBuilder = ivarListBuilder.beginArray();
+ CodeGenTypes &Types = CGM.getTypes();
+ for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD;
+ IVD = IVD->getNextIvar()) {
+ auto ivarTy = IVD->getType();
+ auto ivarBuilder = ivarArrayBuilder.beginStruct();
+ // const char *name;
+ ivarBuilder.add(MakeConstantString(IVD->getNameAsString()));
+ // const char *type;
+ std::string TypeStr;
+ //Context.getObjCEncodingForType(ivarTy, TypeStr, IVD, true);
+ Context.getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, ivarTy, TypeStr, true);
+ ivarBuilder.add(MakeConstantString(TypeStr));
+ // int *offset;
+ uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD);
+ uint64_t Offset = BaseOffset - superInstanceSize;
+ llvm::Constant *OffsetValue = llvm::ConstantInt::get(IntTy, Offset);
+ std::string OffsetName = GetIVarOffsetVariableName(classDecl, IVD);
+ llvm::GlobalVariable *OffsetVar = TheModule.getGlobalVariable(OffsetName);
+ if (OffsetVar)
+ OffsetVar->setInitializer(OffsetValue);
+ else
+ OffsetVar = new llvm::GlobalVariable(TheModule, IntTy,
+ false, llvm::GlobalValue::ExternalLinkage,
+ OffsetValue, OffsetName);
+ auto ivarVisibility =
+ (IVD->getAccessControl() == ObjCIvarDecl::Private ||
+ IVD->getAccessControl() == ObjCIvarDecl::Package ||
+ classDecl->getVisibility() == HiddenVisibility) ?
+ llvm::GlobalValue::HiddenVisibility :
+ llvm::GlobalValue::DefaultVisibility;
+ OffsetVar->setVisibility(ivarVisibility);
+ ivarBuilder.add(OffsetVar);
+ // Ivar size
+ ivarBuilder.addInt(Int32Ty,
+ td.getTypeSizeInBits(Types.ConvertType(ivarTy)) /
+ CGM.getContext().getCharWidth());
+ // Alignment will be stored as a base-2 log of the alignment.
+ int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity());
+ // Objects that require more than 2^64-byte alignment should be impossible!
+ assert(align < 64);
+ // uint32_t flags;
+ // Bits 0-1 are ownership.
+ // Bit 2 indicates an extended type encoding
+ // Bits 3-8 contain log2(aligment)
+ ivarBuilder.addInt(Int32Ty,
+ (align << 3) | (1<<2) |
+ FlagsForOwnership(ivarTy.getQualifiers().getObjCLifetime()));
+ ivarBuilder.finishAndAddTo(ivarArrayBuilder);
+ }
+ ivarArrayBuilder.finishAndAddTo(ivarListBuilder);
+ auto ivarList = ivarListBuilder.finishAndCreateGlobal(".objc_ivar_list",
+ CGM.getPointerAlign(), /*constant*/ false,
+ llvm::GlobalValue::PrivateLinkage);
+ classFields.add(ivarList);
+ }
+ // struct objc_method_list *methods
+ SmallVector<const ObjCMethodDecl*, 16> InstanceMethods;
+ InstanceMethods.insert(InstanceMethods.begin(), OID->instmeth_begin(),
+ OID->instmeth_end());
+ for (auto *propImpl : OID->property_impls())
+ if (propImpl->getPropertyImplementation() ==
+ ObjCPropertyImplDecl::Synthesize) {
+ ObjCPropertyDecl *prop = propImpl->getPropertyDecl();
+ auto addIfExists = [&](const ObjCMethodDecl* OMD) {
+ if (OMD)
+ InstanceMethods.push_back(OMD);
+ };
+ addIfExists(prop->getGetterMethodDecl());
+ addIfExists(prop->getSetterMethodDecl());
+ }
+
+ if (InstanceMethods.size() == 0)
+ classFields.addNullPointer(PtrTy);
+ else
+ classFields.addBitCast(
+ GenerateMethodList(className, "", InstanceMethods, false),
+ PtrTy);
+ // void *dtable;
+ classFields.addNullPointer(PtrTy);
+ // IMP cxx_construct;
+ classFields.addNullPointer(PtrTy);
+ // IMP cxx_destruct;
+ classFields.addNullPointer(PtrTy);
+ // struct objc_class *subclass_list
+ classFields.addNullPointer(PtrTy);
+ // struct objc_class *sibling_class
+ classFields.addNullPointer(PtrTy);
+ // struct objc_protocol_list *protocols;
+ SmallVector<llvm::Constant*, 16> Protocols;
+ for (const auto *I : classDecl->protocols())
+ Protocols.push_back(
+ llvm::ConstantExpr::getBitCast(GenerateProtocolRef(I),
+ ProtocolPtrTy));
+ if (Protocols.empty())
+ classFields.addNullPointer(PtrTy);
+ else
+ classFields.add(GenerateProtocolList(Protocols));
+ // struct reference_list *extra_data;
+ classFields.addNullPointer(PtrTy);
+ // long abi_version;
+ classFields.addInt(LongTy, 0);
+ // struct objc_property_list *properties
+ classFields.add(GeneratePropertyList(OID, classDecl));
+
+ auto *classStruct =
+ classFields.finishAndCreateGlobal(SymbolForClass(className),
+ CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage);
+
+ if (CGM.getTriple().isOSBinFormatCOFF()) {
+ auto Storage = llvm::GlobalValue::DefaultStorageClass;
+ if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
+ Storage = llvm::GlobalValue::DLLImportStorageClass;
+ else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
+ Storage = llvm::GlobalValue::DLLExportStorageClass;
+ cast<llvm::GlobalValue>(classStruct)->setDLLStorageClass(Storage);
+ }
+
+ auto *classRefSymbol = GetClassVar(className);
+ classRefSymbol->setSection(ClsRefSection);
+ classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy));
+
+
+ // Resolve the class aliases, if they exist.
+ // FIXME: Class pointer aliases shouldn't exist!
+ if (ClassPtrAlias) {
+ ClassPtrAlias->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(classStruct, IdTy));
+ ClassPtrAlias->eraseFromParent();
+ ClassPtrAlias = nullptr;
+ }
+ if (auto Placeholder =
+ TheModule.getNamedGlobal(SymbolForClass(className)))
+ if (Placeholder != classStruct) {
+ Placeholder->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(classStruct, Placeholder->getType()));
+ Placeholder->eraseFromParent();
+ classStruct->setName(SymbolForClass(className));
+ }
+ if (MetaClassPtrAlias) {
+ MetaClassPtrAlias->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(metaclass, IdTy));
+ MetaClassPtrAlias->eraseFromParent();
+ MetaClassPtrAlias = nullptr;
+ }
+ assert(classStruct->getName() == SymbolForClass(className));
+
+ auto classInitRef = new llvm::GlobalVariable(TheModule,
+ classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage,
+ classStruct, "._OBJC_INIT_CLASS_" + className);
+ classInitRef->setSection(ClsSection);
+ CGM.addUsedGlobal(classInitRef);
+
+ EmittedClass = true;
+ }
+ public:
+ CGObjCGNUstep2(CodeGenModule &Mod) : CGObjCGNUstep(Mod, 10, 4, 2) {
+ MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
+ PtrToObjCSuperTy, SelectorTy);
+ // struct objc_property
+ // {
+ // const char *name;
+ // const char *attributes;
+ // const char *type;
+ // SEL getter;
+ // SEL setter;
+ // }
+ PropertyMetadataTy =
+ llvm::StructType::get(CGM.getLLVMContext(),
+ { PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty });
+ }
+
+};
+
/// Support for the ObjFW runtime.
class CGObjCObjFW: public CGObjCGNU {
protected:
@@ -878,22 +1923,12 @@ void CGObjCGNU::EmitClassRef(const std::string &className) {
llvm::GlobalValue::WeakAnyLinkage, ClassSymbol, symbolRef);
}
-static std::string SymbolNameForMethod( StringRef ClassName,
- StringRef CategoryName, const Selector MethodName,
- bool isClassMethod) {
- std::string MethodNameColonStripped = MethodName.getAsString();
- std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(),
- ':', '_');
- return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" +
- CategoryName + "_" + MethodNameColonStripped).str();
-}
-
CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
- unsigned protocolClassVersion)
+ unsigned protocolClassVersion, unsigned classABI)
: CGObjCRuntime(cgm), TheModule(CGM.getModule()),
VMContext(cgm.getLLVMContext()), ClassPtrAlias(nullptr),
MetaClassPtrAlias(nullptr), RuntimeVersion(runtimeABIVersion),
- ProtocolVersion(protocolClassVersion) {
+ ProtocolVersion(protocolClassVersion), ClassABIVersion(classABI) {
msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend");
@@ -911,6 +1946,8 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
Int8Ty = llvm::Type::getInt8Ty(VMContext);
// C string type. Used in lots of places.
PtrToInt8Ty = llvm::PointerType::getUnqual(Int8Ty);
+ ProtocolPtrTy = llvm::PointerType::getUnqual(
+ Types.ConvertType(CGM.getContext().getObjCProtoType()));
Zeros[0] = llvm::ConstantInt::get(LongTy, 0);
Zeros[1] = Zeros[0];
@@ -942,6 +1979,31 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
IdTy = PtrToInt8Ty;
}
PtrToIdTy = llvm::PointerType::getUnqual(IdTy);
+ ProtocolTy = llvm::StructType::get(IdTy,
+ PtrToInt8Ty, // name
+ PtrToInt8Ty, // protocols
+ PtrToInt8Ty, // instance methods
+ PtrToInt8Ty, // class methods
+ PtrToInt8Ty, // optional instance methods
+ PtrToInt8Ty, // optional class methods
+ PtrToInt8Ty, // properties
+ PtrToInt8Ty);// optional properties
+
+ // struct objc_property_gsv1
+ // {
+ // const char *name;
+ // char attributes;
+ // char attributes2;
+ // char unused1;
+ // char unused2;
+ // const char *getter_name;
+ // const char *getter_types;
+ // const char *setter_name;
+ // const char *setter_types;
+ // }
+ PropertyMetadataTy = llvm::StructType::get(CGM.getLLVMContext(), {
+ PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, PtrToInt8Ty,
+ PtrToInt8Ty, PtrToInt8Ty });
ObjCSuperTy = llvm::StructType::get(IdTy, IdTy);
PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy);
@@ -1035,16 +2097,8 @@ llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF,
const ObjCInterfaceDecl *OID) {
auto *Value =
GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
- if (CGM.getTriple().isOSBinFormatCOFF()) {
- if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) {
- auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
- if (OID->hasAttr<DLLExportAttr>())
- DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
- else if (OID->hasAttr<DLLImportAttr>())
- DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
- ClassSymbol->setDLLStorageClass(DLLStorage);
- }
- }
+ if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value))
+ CGM.setGVProperties(ClassSymbol, OID);
return Value;
}
@@ -1061,13 +2115,7 @@ llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
if ((VD = dyn_cast<VarDecl>(Result)))
break;
- auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
- if (!VD || VD->hasAttr<DLLImportAttr>())
- DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
- else if (VD->hasAttr<DLLExportAttr>())
- DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
-
- ClassSymbol->setDLLStorageClass(DLLStorage);
+ CGM.setGVProperties(ClassSymbol, VD);
}
}
return Value;
@@ -1217,7 +2265,7 @@ ConstantAddress CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass;
- if (StringClass.empty()) StringClass = "NXConstantString";
+ if (StringClass.empty()) StringClass = "NSConstantString";
std::string Sym = "_OBJC_CLASS_";
Sym += StringClass;
@@ -1278,54 +2326,67 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs);
llvm::Value *ReceiverClass = nullptr;
- if (isCategoryImpl) {
- llvm::Constant *classLookupFunction = nullptr;
+ bool isV2ABI = isRuntime(ObjCRuntime::GNUstep, 2);
+ if (isV2ABI) {
+ ReceiverClass = GetClassNamed(CGF,
+ Class->getSuperClass()->getNameAsString(), /*isWeak*/false);
if (IsClassMessage) {
- classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get(
- IdTy, PtrTy, true), "objc_get_meta_class");
- } else {
- classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get(
- IdTy, PtrTy, true), "objc_get_class");
+ // Load the isa pointer of the superclass is this is a class method.
+ ReceiverClass = Builder.CreateBitCast(ReceiverClass,
+ llvm::PointerType::getUnqual(IdTy));
+ ReceiverClass =
+ Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign());
}
- ReceiverClass = Builder.CreateCall(classLookupFunction,
- MakeConstantString(Class->getNameAsString()));
+ ReceiverClass = EnforceType(Builder, ReceiverClass, IdTy);
} else {
- // Set up global aliases for the metaclass or class pointer if they do not
- // already exist. These will are forward-references which will be set to
- // pointers to the class and metaclass structure created for the runtime
- // load function. To send a message to super, we look up the value of the
- // super_class pointer from either the class or metaclass structure.
- if (IsClassMessage) {
- if (!MetaClassPtrAlias) {
- MetaClassPtrAlias = llvm::GlobalAlias::create(
- IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
- ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
+ if (isCategoryImpl) {
+ llvm::Constant *classLookupFunction = nullptr;
+ if (IsClassMessage) {
+ classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get(
+ IdTy, PtrTy, true), "objc_get_meta_class");
+ } else {
+ classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get(
+ IdTy, PtrTy, true), "objc_get_class");
}
- ReceiverClass = MetaClassPtrAlias;
+ ReceiverClass = Builder.CreateCall(classLookupFunction,
+ MakeConstantString(Class->getNameAsString()));
} else {
- if (!ClassPtrAlias) {
- ClassPtrAlias = llvm::GlobalAlias::create(
- IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
- ".objc_class_ref" + Class->getNameAsString(), &TheModule);
+ // Set up global aliases for the metaclass or class pointer if they do not
+ // already exist. These will are forward-references which will be set to
+ // pointers to the class and metaclass structure created for the runtime
+ // load function. To send a message to super, we look up the value of the
+ // super_class pointer from either the class or metaclass structure.
+ if (IsClassMessage) {
+ if (!MetaClassPtrAlias) {
+ MetaClassPtrAlias = llvm::GlobalAlias::create(
+ IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+ ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
+ }
+ ReceiverClass = MetaClassPtrAlias;
+ } else {
+ if (!ClassPtrAlias) {
+ ClassPtrAlias = llvm::GlobalAlias::create(
+ IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+ ".objc_class_ref" + Class->getNameAsString(), &TheModule);
+ }
+ ReceiverClass = ClassPtrAlias;
}
- ReceiverClass = ClassPtrAlias;
}
+ // Cast the pointer to a simplified version of the class structure
+ llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy);
+ ReceiverClass = Builder.CreateBitCast(ReceiverClass,
+ llvm::PointerType::getUnqual(CastTy));
+ // Get the superclass pointer
+ ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1);
+ // Load the superclass pointer
+ ReceiverClass =
+ Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign());
}
- // Cast the pointer to a simplified version of the class structure
- llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy);
- ReceiverClass = Builder.CreateBitCast(ReceiverClass,
- llvm::PointerType::getUnqual(CastTy));
- // Get the superclass pointer
- ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1);
- // Load the superclass pointer
- ReceiverClass =
- Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign());
// Construct the structure used to look up the IMP
llvm::StructType *ObjCSuperTy =
llvm::StructType::get(Receiver->getType(), IdTy);
- // FIXME: Is this really supposed to be a dynamic alloca?
- Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy),
+ Address ObjCSuper = CGF.CreateTempAlloca(ObjCSuperTy,
CGF.getPointerAlign());
Builder.CreateStore(Receiver,
@@ -1456,7 +2517,7 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
}
// Reset the receiver in case the lookup modified it
- ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy, false);
+ ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy);
imp = EnforceType(Builder, imp, MSI.MessengerType);
@@ -1506,17 +2567,16 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
llvm::Constant *CGObjCGNU::
GenerateMethodList(StringRef ClassName,
StringRef CategoryName,
- ArrayRef<Selector> MethodSels,
- ArrayRef<llvm::Constant *> MethodTypes,
+ ArrayRef<const ObjCMethodDecl*> Methods,
bool isClassMethodList) {
- if (MethodSels.empty())
+ if (Methods.empty())
return NULLPtr;
ConstantInitBuilder Builder(CGM);
auto MethodList = Builder.beginStruct();
MethodList.addNullPointer(CGM.Int8PtrTy);
- MethodList.addInt(Int32Ty, MethodTypes.size());
+ MethodList.addInt(Int32Ty, Methods.size());
// Get the method structure type.
llvm::StructType *ObjCMethodTy =
@@ -1525,20 +2585,48 @@ GenerateMethodList(StringRef ClassName,
PtrToInt8Ty, // Method types
IMPTy // Method pointer
});
- auto Methods = MethodList.beginArray();
- for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) {
+ bool isV2ABI = isRuntime(ObjCRuntime::GNUstep, 2);
+ if (isV2ABI) {
+ // size_t size;
+ llvm::DataLayout td(&TheModule);
+ MethodList.addInt(SizeTy, td.getTypeSizeInBits(ObjCMethodTy) /
+ CGM.getContext().getCharWidth());
+ ObjCMethodTy =
+ llvm::StructType::get(CGM.getLLVMContext(), {
+ IMPTy, // Method pointer
+ PtrToInt8Ty, // Selector
+ PtrToInt8Ty // Extended type encoding
+ });
+ } else {
+ ObjCMethodTy =
+ llvm::StructType::get(CGM.getLLVMContext(), {
+ PtrToInt8Ty, // Really a selector, but the runtime creates it us.
+ PtrToInt8Ty, // Method types
+ IMPTy // Method pointer
+ });
+ }
+ auto MethodArray = MethodList.beginArray();
+ ASTContext &Context = CGM.getContext();
+ for (const auto *OMD : Methods) {
llvm::Constant *FnPtr =
TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName,
- MethodSels[i],
+ OMD->getSelector(),
isClassMethodList));
assert(FnPtr && "Can't generate metadata for method that doesn't exist");
- auto Method = Methods.beginStruct(ObjCMethodTy);
- Method.add(MakeConstantString(MethodSels[i].getAsString()));
- Method.add(MethodTypes[i]);
- Method.addBitCast(FnPtr, IMPTy);
- Method.finishAndAddTo(Methods);
+ auto Method = MethodArray.beginStruct(ObjCMethodTy);
+ if (isV2ABI) {
+ Method.addBitCast(FnPtr, IMPTy);
+ Method.add(GetConstantSelector(OMD->getSelector(),
+ Context.getObjCEncodingForMethodDecl(OMD)));
+ Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD, true)));
+ } else {
+ Method.add(MakeConstantString(OMD->getSelector().getAsString()));
+ Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD)));
+ Method.addBitCast(FnPtr, IMPTy);
+ }
+ Method.finishAndAddTo(MethodArray);
}
- Methods.finishAndAddTo(MethodList);
+ MethodArray.finishAndAddTo(MethodList);
// Create an instance of the structure
return MethodList.finishAndCreateGlobal(".objc_method_list",
@@ -1549,7 +2637,9 @@ GenerateMethodList(StringRef ClassName,
llvm::Constant *CGObjCGNU::
GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
ArrayRef<llvm::Constant *> IvarTypes,
- ArrayRef<llvm::Constant *> IvarOffsets) {
+ ArrayRef<llvm::Constant *> IvarOffsets,
+ ArrayRef<llvm::Constant *> IvarAlign,
+ ArrayRef<Qualifiers::ObjCLifetime> IvarOwnership) {
if (IvarNames.empty())
return NULLPtr;
@@ -1664,7 +2754,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
// gc_object_type
Elements.add(NULLPtr);
// abi_version
- Elements.addInt(LongTy, 1);
+ Elements.addInt(LongTy, ClassABIVersion);
// ivar_offsets
Elements.add(IvarOffsets);
// properties
@@ -1693,22 +2783,22 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
}
llvm::Constant *CGObjCGNU::
-GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames,
- ArrayRef<llvm::Constant *> MethodTypes) {
+GenerateProtocolMethodList(ArrayRef<const ObjCMethodDecl*> Methods) {
// Get the method structure type.
llvm::StructType *ObjCMethodDescTy =
llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty });
+ ASTContext &Context = CGM.getContext();
ConstantInitBuilder Builder(CGM);
auto MethodList = Builder.beginStruct();
- MethodList.addInt(IntTy, MethodNames.size());
- auto Methods = MethodList.beginArray(ObjCMethodDescTy);
- for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) {
- auto Method = Methods.beginStruct(ObjCMethodDescTy);
- Method.add(MethodNames[i]);
- Method.add(MethodTypes[i]);
- Method.finishAndAddTo(Methods);
- }
- Methods.finishAndAddTo(MethodList);
+ MethodList.addInt(IntTy, Methods.size());
+ auto MethodArray = MethodList.beginArray(ObjCMethodDescTy);
+ for (auto *M : Methods) {
+ auto Method = MethodArray.beginStruct(ObjCMethodDescTy);
+ Method.add(MakeConstantString(M->getSelector().getAsString()));
+ Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(M)));
+ Method.finishAndAddTo(MethodArray);
+ }
+ MethodArray.finishAndAddTo(MethodList);
return MethodList.finishAndCreateGlobal(".objc_method_list",
CGM.getPointerAlign());
}
@@ -1742,16 +2832,19 @@ CGObjCGNU::GenerateProtocolList(ArrayRef<std::string> Protocols) {
llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF,
const ObjCProtocolDecl *PD) {
- llvm::Value *protocol = ExistingProtocols[PD->getNameAsString()];
+ llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()];
+ if (!protocol)
+ GenerateProtocol(PD);
llvm::Type *T =
CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType());
return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T));
}
llvm::Constant *
-CGObjCGNU::GenerateEmptyProtocol(const std::string &ProtocolName) {
+CGObjCGNU::GenerateEmptyProtocol(StringRef ProtocolName) {
llvm::Constant *ProtocolList = GenerateProtocolList({});
- llvm::Constant *MethodList = GenerateProtocolMethodList({}, {});
+ llvm::Constant *MethodList = GenerateProtocolMethodList({});
+ MethodList = llvm::ConstantExpr::getBitCast(MethodList, PtrToInt8Ty);
// Protocols are objects containing lists of the methods implemented and
// protocols adopted.
ConstantInitBuilder Builder(CGM);
@@ -1763,17 +2856,18 @@ CGObjCGNU::GenerateEmptyProtocol(const std::string &ProtocolName) {
llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
Elements.add(MakeConstantString(ProtocolName, ".objc_protocol_name"));
- Elements.add(ProtocolList);
- Elements.add(MethodList);
- Elements.add(MethodList);
- Elements.add(MethodList);
- Elements.add(MethodList);
- return Elements.finishAndCreateGlobal(".objc_protocol",
+ Elements.add(ProtocolList); /* .protocol_list */
+ Elements.add(MethodList); /* .instance_methods */
+ Elements.add(MethodList); /* .class_methods */
+ Elements.add(MethodList); /* .optional_instance_methods */
+ Elements.add(MethodList); /* .optional_class_methods */
+ Elements.add(NULLPtr); /* .properties */
+ Elements.add(NULLPtr); /* .optional_properties */
+ return Elements.finishAndCreateGlobal(SymbolForProtocol(ProtocolName),
CGM.getPointerAlign());
}
void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
- ASTContext &Context = CGM.getContext();
std::string ProtocolName = PD->getNameAsString();
// Use the protocol definition, if there is one.
@@ -1783,51 +2877,31 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
SmallVector<std::string, 16> Protocols;
for (const auto *PI : PD->protocols())
Protocols.push_back(PI->getNameAsString());
- SmallVector<llvm::Constant*, 16> InstanceMethodNames;
- SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
- SmallVector<llvm::Constant*, 16> OptionalInstanceMethodNames;
- SmallVector<llvm::Constant*, 16> OptionalInstanceMethodTypes;
- for (const auto *I : PD->instance_methods()) {
- std::string TypeStr = Context.getObjCEncodingForMethodDecl(I);
- if (I->getImplementationControl() == ObjCMethodDecl::Optional) {
- OptionalInstanceMethodNames.push_back(
- MakeConstantString(I->getSelector().getAsString()));
- OptionalInstanceMethodTypes.push_back(MakeConstantString(TypeStr));
- } else {
- InstanceMethodNames.push_back(
- MakeConstantString(I->getSelector().getAsString()));
- InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
- }
- }
+ SmallVector<const ObjCMethodDecl*, 16> InstanceMethods;
+ SmallVector<const ObjCMethodDecl*, 16> OptionalInstanceMethods;
+ for (const auto *I : PD->instance_methods())
+ if (I->isOptional())
+ OptionalInstanceMethods.push_back(I);
+ else
+ InstanceMethods.push_back(I);
// Collect information about class methods:
- SmallVector<llvm::Constant*, 16> ClassMethodNames;
- SmallVector<llvm::Constant*, 16> ClassMethodTypes;
- SmallVector<llvm::Constant*, 16> OptionalClassMethodNames;
- SmallVector<llvm::Constant*, 16> OptionalClassMethodTypes;
- for (const auto *I : PD->class_methods()) {
- std::string TypeStr = Context.getObjCEncodingForMethodDecl(I);
- if (I->getImplementationControl() == ObjCMethodDecl::Optional) {
- OptionalClassMethodNames.push_back(
- MakeConstantString(I->getSelector().getAsString()));
- OptionalClassMethodTypes.push_back(MakeConstantString(TypeStr));
- } else {
- ClassMethodNames.push_back(
- MakeConstantString(I->getSelector().getAsString()));
- ClassMethodTypes.push_back(MakeConstantString(TypeStr));
- }
- }
+ SmallVector<const ObjCMethodDecl*, 16> ClassMethods;
+ SmallVector<const ObjCMethodDecl*, 16> OptionalClassMethods;
+ for (const auto *I : PD->class_methods())
+ if (I->isOptional())
+ OptionalClassMethods.push_back(I);
+ else
+ ClassMethods.push_back(I);
llvm::Constant *ProtocolList = GenerateProtocolList(Protocols);
llvm::Constant *InstanceMethodList =
- GenerateProtocolMethodList(InstanceMethodNames, InstanceMethodTypes);
+ GenerateProtocolMethodList(InstanceMethods);
llvm::Constant *ClassMethodList =
- GenerateProtocolMethodList(ClassMethodNames, ClassMethodTypes);
+ GenerateProtocolMethodList(ClassMethods);
llvm::Constant *OptionalInstanceMethodList =
- GenerateProtocolMethodList(OptionalInstanceMethodNames,
- OptionalInstanceMethodTypes);
+ GenerateProtocolMethodList(OptionalInstanceMethods);
llvm::Constant *OptionalClassMethodList =
- GenerateProtocolMethodList(OptionalClassMethodNames,
- OptionalClassMethodTypes);
+ GenerateProtocolMethodList(OptionalClassMethods);
// Property metadata: name, attributes, isSynthesized, setter name, setter
// types, getter name, getter types.
@@ -1835,78 +2909,10 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
// simplify the runtime library by allowing it to use the same data
// structures for protocol metadata everywhere.
- llvm::Constant *PropertyList;
- llvm::Constant *OptionalPropertyList;
- {
- llvm::StructType *propertyMetadataTy =
- llvm::StructType::get(CGM.getLLVMContext(),
- { PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
- PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty });
-
- unsigned numReqProperties = 0, numOptProperties = 0;
- for (auto property : PD->instance_properties()) {
- if (property->isOptional())
- numOptProperties++;
- else
- numReqProperties++;
- }
-
- ConstantInitBuilder reqPropertyListBuilder(CGM);
- auto reqPropertiesList = reqPropertyListBuilder.beginStruct();
- reqPropertiesList.addInt(IntTy, numReqProperties);
- reqPropertiesList.add(NULLPtr);
- auto reqPropertiesArray = reqPropertiesList.beginArray(propertyMetadataTy);
-
- ConstantInitBuilder optPropertyListBuilder(CGM);
- auto optPropertiesList = optPropertyListBuilder.beginStruct();
- optPropertiesList.addInt(IntTy, numOptProperties);
- optPropertiesList.add(NULLPtr);
- auto optPropertiesArray = optPropertiesList.beginArray(propertyMetadataTy);
-
- // Add all of the property methods need adding to the method list and to the
- // property metadata list.
- for (auto *property : PD->instance_properties()) {
- auto &propertiesArray =
- (property->isOptional() ? optPropertiesArray : reqPropertiesArray);
- auto fields = propertiesArray.beginStruct(propertyMetadataTy);
-
- fields.add(MakePropertyEncodingString(property, nullptr));
- PushPropertyAttributes(fields, property);
-
- if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) {
- std::string typeStr = Context.getObjCEncodingForMethodDecl(getter);
- llvm::Constant *typeEncoding = MakeConstantString(typeStr);
- InstanceMethodTypes.push_back(typeEncoding);
- fields.add(MakeConstantString(getter->getSelector().getAsString()));
- fields.add(typeEncoding);
- } else {
- fields.add(NULLPtr);
- fields.add(NULLPtr);
- }
- if (ObjCMethodDecl *setter = property->getSetterMethodDecl()) {
- std::string typeStr = Context.getObjCEncodingForMethodDecl(setter);
- llvm::Constant *typeEncoding = MakeConstantString(typeStr);
- InstanceMethodTypes.push_back(typeEncoding);
- fields.add(MakeConstantString(setter->getSelector().getAsString()));
- fields.add(typeEncoding);
- } else {
- fields.add(NULLPtr);
- fields.add(NULLPtr);
- }
-
- fields.finishAndAddTo(propertiesArray);
- }
-
- reqPropertiesArray.finishAndAddTo(reqPropertiesList);
- PropertyList =
- reqPropertiesList.finishAndCreateGlobal(".objc_property_list",
- CGM.getPointerAlign());
-
- optPropertiesArray.finishAndAddTo(optPropertiesList);
- OptionalPropertyList =
- optPropertiesList.finishAndCreateGlobal(".objc_property_list",
- CGM.getPointerAlign());
- }
+ llvm::Constant *PropertyList =
+ GeneratePropertyList(nullptr, PD, false, false);
+ llvm::Constant *OptionalPropertyList =
+ GeneratePropertyList(nullptr, PD, false, true);
// Protocols are objects containing lists of the methods implemented and
// protocols adopted.
@@ -1917,8 +2923,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
Elements.add(
llvm::ConstantExpr::getIntToPtr(
llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
- Elements.add(
- MakeConstantString(ProtocolName, ".objc_protocol_name"));
+ Elements.add(MakeConstantString(ProtocolName));
Elements.add(ProtocolList);
Elements.add(InstanceMethodList);
Elements.add(ClassMethodList);
@@ -1933,8 +2938,6 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
}
void CGObjCGNU::GenerateProtocolHolderCategory() {
// Collect information about instance methods
- SmallVector<Selector, 1> MethodSels;
- SmallVector<llvm::Constant*, 1> MethodTypes;
ConstantInitBuilder Builder(CGM);
auto Elements = Builder.beginStruct();
@@ -1945,10 +2948,10 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
Elements.add(MakeConstantString(ClassName));
// Instance method list
Elements.addBitCast(GenerateMethodList(
- ClassName, CategoryName, MethodSels, MethodTypes, false), PtrTy);
+ ClassName, CategoryName, {}, false), PtrTy);
// Class method list
Elements.addBitCast(GenerateMethodList(
- ClassName, CategoryName, MethodSels, MethodTypes, true), PtrTy);
+ ClassName, CategoryName, {}, true), PtrTy);
// Protocol list
ConstantInitBuilder ProtocolListBuilder(CGM);
@@ -2016,25 +3019,9 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
}
void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
- std::string ClassName = OCD->getClassInterface()->getNameAsString();
+ const ObjCInterfaceDecl *Class = OCD->getClassInterface();
+ std::string ClassName = Class->getNameAsString();
std::string CategoryName = OCD->getNameAsString();
- // Collect information about instance methods
- SmallVector<Selector, 16> InstanceMethodSels;
- SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
- for (const auto *I : OCD->instance_methods()) {
- InstanceMethodSels.push_back(I->getSelector());
- std::string TypeStr = CGM.getContext().getObjCEncodingForMethodDecl(I);
- InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
- }
-
- // Collect information about class methods
- SmallVector<Selector, 16> ClassMethodSels;
- SmallVector<llvm::Constant*, 16> ClassMethodTypes;
- for (const auto *I : OCD->class_methods()) {
- ClassMethodSels.push_back(I->getSelector());
- std::string TypeStr = CGM.getContext().getObjCEncodingForMethodDecl(I);
- ClassMethodTypes.push_back(MakeConstantString(TypeStr));
- }
// Collect the names of referenced protocols
SmallVector<std::string, 16> Protocols;
@@ -2049,84 +3036,125 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
Elements.add(MakeConstantString(CategoryName));
Elements.add(MakeConstantString(ClassName));
// Instance method list
+ SmallVector<ObjCMethodDecl*, 16> InstanceMethods;
+ InstanceMethods.insert(InstanceMethods.begin(), OCD->instmeth_begin(),
+ OCD->instmeth_end());
Elements.addBitCast(
- GenerateMethodList(ClassName, CategoryName, InstanceMethodSels,
- InstanceMethodTypes, false),
+ GenerateMethodList(ClassName, CategoryName, InstanceMethods, false),
PtrTy);
// Class method list
+
+ SmallVector<ObjCMethodDecl*, 16> ClassMethods;
+ ClassMethods.insert(ClassMethods.begin(), OCD->classmeth_begin(),
+ OCD->classmeth_end());
Elements.addBitCast(
- GenerateMethodList(ClassName, CategoryName, ClassMethodSels,
- ClassMethodTypes, true),
+ GenerateMethodList(ClassName, CategoryName, ClassMethods, true),
PtrTy);
// Protocol list
Elements.addBitCast(GenerateProtocolList(Protocols), PtrTy);
+ if (isRuntime(ObjCRuntime::GNUstep, 2)) {
+ const ObjCCategoryDecl *Category =
+ Class->FindCategoryDeclaration(OCD->getIdentifier());
+ if (Category) {
+ // Instance properties
+ Elements.addBitCast(GeneratePropertyList(OCD, Category, false), PtrTy);
+ // Class properties
+ Elements.addBitCast(GeneratePropertyList(OCD, Category, true), PtrTy);
+ } else {
+ Elements.addNullPointer(PtrTy);
+ Elements.addNullPointer(PtrTy);
+ }
+ }
+
Categories.push_back(llvm::ConstantExpr::getBitCast(
- Elements.finishAndCreateGlobal("", CGM.getPointerAlign()),
+ Elements.finishAndCreateGlobal(
+ std::string(".objc_category_")+ClassName+CategoryName,
+ CGM.getPointerAlign()),
PtrTy));
}
-llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OID,
- SmallVectorImpl<Selector> &InstanceMethodSels,
- SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes) {
+llvm::Constant *CGObjCGNU::GeneratePropertyList(const Decl *Container,
+ const ObjCContainerDecl *OCD,
+ bool isClassProperty,
+ bool protocolOptionalProperties) {
+
+ SmallVector<const ObjCPropertyDecl *, 16> Properties;
+ llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
+ bool isProtocol = isa<ObjCProtocolDecl>(OCD);
ASTContext &Context = CGM.getContext();
- // Property metadata: name, attributes, attributes2, padding1, padding2,
- // setter name, setter types, getter name, getter types.
- llvm::StructType *propertyMetadataTy =
- llvm::StructType::get(CGM.getLLVMContext(),
- { PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
- PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty });
- unsigned numProperties = 0;
- for (auto *propertyImpl : OID->property_impls()) {
- (void) propertyImpl;
- numProperties++;
+ std::function<void(const ObjCProtocolDecl *Proto)> collectProtocolProperties
+ = [&](const ObjCProtocolDecl *Proto) {
+ for (const auto *P : Proto->protocols())
+ collectProtocolProperties(P);
+ for (const auto *PD : Proto->properties()) {
+ if (isClassProperty != PD->isClassProperty())
+ continue;
+ // Skip any properties that are declared in protocols that this class
+ // conforms to but are not actually implemented by this class.
+ if (!isProtocol && !Context.getObjCPropertyImplDeclForPropertyDecl(PD, Container))
+ continue;
+ if (!PropertySet.insert(PD->getIdentifier()).second)
+ continue;
+ Properties.push_back(PD);
+ }
+ };
+
+ if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD))
+ for (const ObjCCategoryDecl *ClassExt : OID->known_extensions())
+ for (auto *PD : ClassExt->properties()) {
+ if (isClassProperty != PD->isClassProperty())
+ continue;
+ PropertySet.insert(PD->getIdentifier());
+ Properties.push_back(PD);
+ }
+
+ for (const auto *PD : OCD->properties()) {
+ if (isClassProperty != PD->isClassProperty())
+ continue;
+ // If we're generating a list for a protocol, skip optional / required ones
+ // when generating the other list.
+ if (isProtocol && (protocolOptionalProperties != PD->isOptional()))
+ continue;
+ // Don't emit duplicate metadata for properties that were already in a
+ // class extension.
+ if (!PropertySet.insert(PD->getIdentifier()).second)
+ continue;
+
+ Properties.push_back(PD);
}
+ if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD))
+ for (const auto *P : OID->all_referenced_protocols())
+ collectProtocolProperties(P);
+ else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD))
+ for (const auto *P : CD->protocols())
+ collectProtocolProperties(P);
+
+ auto numProperties = Properties.size();
+
+ if (numProperties == 0)
+ return NULLPtr;
+
ConstantInitBuilder builder(CGM);
auto propertyList = builder.beginStruct();
- propertyList.addInt(IntTy, numProperties);
- propertyList.add(NULLPtr);
- auto properties = propertyList.beginArray(propertyMetadataTy);
+ auto properties = PushPropertyListHeader(propertyList, numProperties);
// Add all of the property methods need adding to the method list and to the
// property metadata list.
- for (auto *propertyImpl : OID->property_impls()) {
- auto fields = properties.beginStruct(propertyMetadataTy);
- ObjCPropertyDecl *property = propertyImpl->getPropertyDecl();
- bool isSynthesized = (propertyImpl->getPropertyImplementation() ==
- ObjCPropertyImplDecl::Synthesize);
- bool isDynamic = (propertyImpl->getPropertyImplementation() ==
- ObjCPropertyImplDecl::Dynamic);
-
- fields.add(MakePropertyEncodingString(property, OID));
- PushPropertyAttributes(fields, property, isSynthesized, isDynamic);
- if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) {
- std::string TypeStr = Context.getObjCEncodingForMethodDecl(getter);
- llvm::Constant *TypeEncoding = MakeConstantString(TypeStr);
- if (isSynthesized) {
- InstanceMethodTypes.push_back(TypeEncoding);
- InstanceMethodSels.push_back(getter->getSelector());
+ for (auto *property : Properties) {
+ bool isSynthesized = false;
+ bool isDynamic = false;
+ if (!isProtocol) {
+ auto *propertyImpl = Context.getObjCPropertyImplDeclForPropertyDecl(property, Container);
+ if (propertyImpl) {
+ isSynthesized = (propertyImpl->getPropertyImplementation() ==
+ ObjCPropertyImplDecl::Synthesize);
+ isDynamic = (propertyImpl->getPropertyImplementation() ==
+ ObjCPropertyImplDecl::Dynamic);
}
- fields.add(MakeConstantString(getter->getSelector().getAsString()));
- fields.add(TypeEncoding);
- } else {
- fields.add(NULLPtr);
- fields.add(NULLPtr);
}
- if (ObjCMethodDecl *setter = property->getSetterMethodDecl()) {
- std::string TypeStr = Context.getObjCEncodingForMethodDecl(setter);
- llvm::Constant *TypeEncoding = MakeConstantString(TypeStr);
- if (isSynthesized) {
- InstanceMethodTypes.push_back(TypeEncoding);
- InstanceMethodSels.push_back(setter->getSelector());
- }
- fields.add(MakeConstantString(setter->getSelector().getAsString()));
- fields.add(TypeEncoding);
- } else {
- fields.add(NULLPtr);
- fields.add(NULLPtr);
- }
- fields.finishAndAddTo(properties);
+ PushProperty(properties, property, Container, isSynthesized, isDynamic);
}
properties.finishAndAddTo(propertyList);
@@ -2179,6 +3207,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
SmallVector<llvm::Constant*, 16> IvarNames;
SmallVector<llvm::Constant*, 16> IvarTypes;
SmallVector<llvm::Constant*, 16> IvarOffsets;
+ SmallVector<llvm::Constant*, 16> IvarAligns;
+ SmallVector<Qualifiers::ObjCLifetime, 16> IvarOwnership;
ConstantInitBuilder IvarOffsetBuilder(CGM);
auto IvarOffsetValues = IvarOffsetBuilder.beginArray(PtrToIntTy);
@@ -2201,6 +3231,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
std::string TypeStr;
Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD);
IvarTypes.push_back(MakeConstantString(TypeStr));
+ IvarAligns.push_back(llvm::ConstantInt::get(IntTy,
+ Context.getTypeSize(IVD->getType())));
// Get the offset
uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD);
uint64_t Offset = BaseOffset;
@@ -2211,6 +3243,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
// Create the direct offset value
std::string OffsetName = "__objc_ivar_offset_value_" + ClassName +"." +
IVD->getNameAsString();
+
llvm::GlobalVariable *OffsetVar = TheModule.getGlobalVariable(OffsetName);
if (OffsetVar) {
OffsetVar->setInitializer(OffsetValue);
@@ -2219,14 +3252,13 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
// copy.
OffsetVar->setLinkage(llvm::GlobalValue::ExternalLinkage);
} else
- OffsetVar = new llvm::GlobalVariable(TheModule, IntTy,
+ OffsetVar = new llvm::GlobalVariable(TheModule, Int32Ty,
false, llvm::GlobalValue::ExternalLinkage,
- OffsetValue,
- "__objc_ivar_offset_value_" + ClassName +"." +
- IVD->getNameAsString());
+ OffsetValue, OffsetName);
IvarOffsets.push_back(OffsetValue);
IvarOffsetValues.add(OffsetVar);
Qualifiers::ObjCLifetime lt = IVD->getType().getQualifiers().getObjCLifetime();
+ IvarOwnership.push_back(lt);
switch (lt) {
case Qualifiers::OCL_Strong:
StrongIvars.push_back(true);
@@ -2248,25 +3280,30 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
CGM.getPointerAlign());
// Collect information about instance methods
- SmallVector<Selector, 16> InstanceMethodSels;
- SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
- for (const auto *I : OID->instance_methods()) {
- InstanceMethodSels.push_back(I->getSelector());
- std::string TypeStr = Context.getObjCEncodingForMethodDecl(I);
- InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
- }
+ SmallVector<const ObjCMethodDecl*, 16> InstanceMethods;
+ InstanceMethods.insert(InstanceMethods.begin(), OID->instmeth_begin(),
+ OID->instmeth_end());
+
+ SmallVector<const ObjCMethodDecl*, 16> ClassMethods;
+ ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(),
+ OID->classmeth_end());
+
+ // Collect the same information about synthesized properties, which don't
+ // show up in the instance method lists.
+ for (auto *propertyImpl : OID->property_impls())
+ if (propertyImpl->getPropertyImplementation() ==
+ ObjCPropertyImplDecl::Synthesize) {
+ ObjCPropertyDecl *property = propertyImpl->getPropertyDecl();
+ auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) {
+ if (accessor)
+ InstanceMethods.push_back(accessor);
+ };
+ addPropertyMethod(property->getGetterMethodDecl());
+ addPropertyMethod(property->getSetterMethodDecl());
+ }
- llvm::Constant *Properties = GeneratePropertyList(OID, InstanceMethodSels,
- InstanceMethodTypes);
+ llvm::Constant *Properties = GeneratePropertyList(OID, ClassDecl);
- // Collect information about class methods
- SmallVector<Selector, 16> ClassMethodSels;
- SmallVector<llvm::Constant*, 16> ClassMethodTypes;
- for (const auto *I : OID->class_methods()) {
- ClassMethodSels.push_back(I->getSelector());
- std::string TypeStr = Context.getObjCEncodingForMethodDecl(I);
- ClassMethodTypes.push_back(MakeConstantString(TypeStr));
- }
// Collect the names of referenced protocols
SmallVector<std::string, 16> Protocols;
for (const auto *I : ClassDecl->protocols())
@@ -2283,11 +3320,11 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
SmallVector<llvm::Constant*, 1> empty;
// Generate the method and instance variable lists
llvm::Constant *MethodList = GenerateMethodList(ClassName, "",
- InstanceMethodSels, InstanceMethodTypes, false);
+ InstanceMethods, false);
llvm::Constant *ClassMethodList = GenerateMethodList(ClassName, "",
- ClassMethodSels, ClassMethodTypes, true);
+ ClassMethods, true);
llvm::Constant *IvarList = GenerateIvarList(IvarNames, IvarTypes,
- IvarOffsets);
+ IvarOffsets, IvarAligns, IvarOwnership);
// Irrespective of whether we are compiling for a fragile or non-fragile ABI,
// we emit a symbol containing the offset for each ivar in the class. This
// allows code compiled for the non-Fragile ABI to inherit from code compiled
@@ -2300,14 +3337,13 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
// the offset (third field in ivar structure)
llvm::Type *IndexTy = Int32Ty;
llvm::Constant *offsetPointerIndexes[] = {Zeros[0],
- llvm::ConstantInt::get(IndexTy, 1), nullptr,
- llvm::ConstantInt::get(IndexTy, 2) };
+ llvm::ConstantInt::get(IndexTy, ClassABIVersion > 1 ? 2 : 1), nullptr,
+ llvm::ConstantInt::get(IndexTy, ClassABIVersion > 1 ? 3 : 2) };
unsigned ivarIndex = 0;
for (const ObjCIvarDecl *IVD = ClassDecl->all_declared_ivar_begin(); IVD;
IVD = IVD->getNextIvar()) {
- const std::string Name = "__objc_ivar_offset_" + ClassName + '.'
- + IVD->getNameAsString();
+ const std::string Name = GetIVarOffsetVariableName(ClassDecl, IVD);
offsetPointerIndexes[2] = llvm::ConstantInt::get(IndexTy, ivarIndex);
// Get the correct ivar field
llvm::Constant *offsetValue = llvm::ConstantExpr::getGetElementPtr(
@@ -2321,12 +3357,10 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
// different modules will use this one, rather than their private
// copy.
offset->setLinkage(llvm::GlobalValue::ExternalLinkage);
- } else {
+ } else
// Add a new alias if there isn't one already.
- offset = new llvm::GlobalVariable(TheModule, offsetValue->getType(),
+ new llvm::GlobalVariable(TheModule, offsetValue->getType(),
false, llvm::GlobalValue::ExternalLinkage, offsetValue, Name);
- (void) offset; // Silence dead store warning.
- }
++ivarIndex;
}
llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0);
@@ -2334,16 +3368,10 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
//Generate metaclass for class methods
llvm::Constant *MetaClassStruct = GenerateClassStructure(
NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0],
- GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr,
- NULLPtr, ZeroPtr, ZeroPtr, true);
- if (CGM.getTriple().isOSBinFormatCOFF()) {
- auto Storage = llvm::GlobalValue::DefaultStorageClass;
- if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
- Storage = llvm::GlobalValue::DLLImportStorageClass;
- else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
- Storage = llvm::GlobalValue::DLLExportStorageClass;
- cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage);
- }
+ NULLPtr, ClassMethodList, NULLPtr, NULLPtr,
+ GeneratePropertyList(OID, ClassDecl, true), ZeroPtr, ZeroPtr, true);
+ CGM.setGVProperties(cast<llvm::GlobalValue>(MetaClassStruct),
+ OID->getClassInterface());
// Generate the class structure
llvm::Constant *ClassStruct = GenerateClassStructure(
@@ -2351,14 +3379,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList,
GenerateProtocolList(Protocols), IvarOffsetArray, Properties,
StrongIvarBitmap, WeakIvarBitmap);
- if (CGM.getTriple().isOSBinFormatCOFF()) {
- auto Storage = llvm::GlobalValue::DefaultStorageClass;
- if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
- Storage = llvm::GlobalValue::DLLImportStorageClass;
- else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
- Storage = llvm::GlobalValue::DLLExportStorageClass;
- cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage);
- }
+ CGM.setGVProperties(cast<llvm::GlobalValue>(ClassStruct),
+ OID->getClassInterface());
// Resolve the class aliases, if they exist.
if (ClassPtrAlias) {
@@ -2785,8 +3807,7 @@ void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF,
llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
const ObjCInterfaceDecl *ID,
const ObjCIvarDecl *Ivar) {
- const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString()
- + '.' + Ivar->getNameAsString();
+ const std::string Name = GetIVarOffsetVariableName(ID, Ivar);
// Emit the variable and initialize it with what we think the correct value
// is. This allows code compiled with non-fragile ivars to work correctly
// when linked against code which isn't (most of the time).
@@ -2895,8 +3916,11 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
CGObjCRuntime *
clang::CodeGen::CreateGNUObjCRuntime(CodeGenModule &CGM) {
- switch (CGM.getLangOpts().ObjCRuntime.getKind()) {
+ auto Runtime = CGM.getLangOpts().ObjCRuntime;
+ switch (Runtime.getKind()) {
case ObjCRuntime::GNUstep:
+ if (Runtime.getVersion() >= VersionTuple(2, 0))
+ return new CGObjCGNUstep2(CGM);
return new CGObjCGNUstep(CGM);
case ObjCRuntime::GCC:
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index ef4e6cd4f01b..0c766575dc21 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -888,7 +888,7 @@ protected:
/// int * but is actually an Obj-C class pointer.
llvm::WeakTrackingVH ConstantStringClassRef;
- /// \brief The LLVM type corresponding to NSConstantString.
+ /// The LLVM type corresponding to NSConstantString.
llvm::StructType *NSConstantStringType = nullptr;
llvm::StringMap<llvm::GlobalVariable *> NSConstantStringMap;
@@ -1708,7 +1708,7 @@ struct NullReturnState {
e = Method->param_end(); i != e; ++i, ++I) {
const ParmVarDecl *ParamDecl = (*i);
if (ParamDecl->hasAttr<NSConsumedAttr>()) {
- RValue RV = I->RV;
+ RValue RV = I->getRValue(CGF);
assert(RV.isScalar() &&
"NullReturnState::complete - arg not on object");
CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime);
@@ -3401,7 +3401,9 @@ static bool hasMRCWeakIvars(CodeGenModule &CGM,
See EmitClassExtension();
*/
void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
- DefinedSymbols.insert(ID->getIdentifier());
+ IdentifierInfo *RuntimeName =
+ &CGM.getContext().Idents.get(ID->getObjCRuntimeNameAsString());
+ DefinedSymbols.insert(RuntimeName);
std::string ClassName = ID->getNameAsString();
// FIXME: Gross
@@ -4179,10 +4181,6 @@ void FragileHazards::emitHazardsInNewBlocks() {
}
}
-static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, llvm::Value *V) {
- if (V) S.insert(V);
-}
-
static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, Address V) {
if (V.isValid()) S.insert(V.getPointer());
}
@@ -4984,7 +4982,9 @@ llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF,
if (ID->hasAttr<ObjCRuntimeVisibleAttr>())
return EmitClassRefViaRuntime(CGF, ID, ObjCTypes);
- return EmitClassRefFromId(CGF, ID->getIdentifier());
+ IdentifierInfo *RuntimeName =
+ &CGM.getContext().Idents.get(ID->getObjCRuntimeNameAsString());
+ return EmitClassRefFromId(CGF, RuntimeName);
}
llvm::Value *CGObjCMac::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
@@ -6309,9 +6309,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
llvm::GlobalVariable *MetaTClass =
BuildClassObject(CI, /*metaclass*/ true,
IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden);
- if (CGM.getTriple().isOSBinFormatCOFF())
- if (CI->hasAttr<DLLExportAttr>())
- MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ CGM.setGVProperties(MetaTClass, CI);
DefinedMetaClasses.push_back(MetaTClass);
// Metadata for the class
@@ -6351,9 +6349,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
llvm::GlobalVariable *ClassMD =
BuildClassObject(CI, /*metaclass*/ false,
MetaTClass, SuperClassGV, CLASS_RO_GV, classIsHidden);
- if (CGM.getTriple().isOSBinFormatCOFF())
- if (CI->hasAttr<DLLExportAttr>())
- ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ CGM.setGVProperties(ClassMD, CI);
DefinedClasses.push_back(ClassMD);
ImplementedClasses.push_back(CI);
@@ -6403,7 +6399,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
PTGV->setAlignment(Align.getQuantity());
if (!CGM.getTriple().isOSBinFormatMachO())
PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName));
- CGM.addCompilerUsedGlobal(PTGV);
+ CGM.addUsedGlobal(PTGV);
return CGF.Builder.CreateAlignedLoad(PTGV, Align);
}
@@ -6847,7 +6843,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
Protocols[PD->getIdentifier()] = Entry;
}
Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.addCompilerUsedGlobal(Entry);
+ CGM.addUsedGlobal(Entry);
// Use this protocol meta-data to build protocol list table in section
// __DATA, __objc_protolist
@@ -6866,7 +6862,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
PTGV->setSection(GetSectionName("__objc_protolist",
"coalesced,no_dead_strip"));
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.addCompilerUsedGlobal(PTGV);
+ CGM.addUsedGlobal(PTGV);
return Entry;
}
@@ -6952,7 +6948,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitIvarOffset(
// This could be 32bit int or 64bit integer depending on the architecture.
// Cast it to 64bit integer value, if it is a 32bit integer ivar offset value
- // as this is what caller always expectes.
+ // as this is what caller always expects.
if (ObjCTypes.IvarOffsetVarTy == ObjCTypes.IntTy)
IvarOffsetValue = CGF.Builder.CreateIntCast(
IvarOffsetValue, ObjCTypes.LongTy, true, "ivar.conv");
@@ -7079,7 +7075,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
CGF.getPointerAlign());
// Update the message ref argument.
- args[1].RV = RValue::get(mref.getPointer());
+ args[1].setRValue(RValue::get(mref.getPointer()));
// Load the function to call from the message ref table.
Address calleeAddr =
@@ -7528,12 +7524,7 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
false, llvm::GlobalValue::ExternalLinkage,
nullptr, EHTypeName);
- if (CGM.getTriple().isOSBinFormatCOFF()) {
- if (ID->hasAttr<DLLExportAttr>())
- Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- else if (ID->hasAttr<DLLImportAttr>())
- Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- }
+ CGM.setGVProperties(Entry, ID);
return Entry;
}
}
@@ -7572,10 +7563,8 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
CGM.getPointerAlign(),
/*constant*/ false,
L);
- if (CGM.getTriple().isOSBinFormatCOFF())
- if (hasObjCExceptionAttribute(CGM.getContext(), ID))
- if (ID->hasAttr<DLLExportAttr>())
- Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ if (hasObjCExceptionAttribute(CGM.getContext(), ID))
+ CGM.setGVProperties(Entry, ID);
}
assert(Entry->getLinkage() == L);
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index d140e7f09e9a..1da19a90c387 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -66,13 +66,19 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
}
llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) {
- if (!PipeTy){
- uint32_t PipeAddrSpc = CGM.getContext().getTargetAddressSpace(
- CGM.getContext().getOpenCLTypeAddrSpace(T));
- PipeTy = llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc);
- }
+ if (T->isReadOnly())
+ return getPipeType(T, "opencl.pipe_ro_t", PipeROTy);
+ else
+ return getPipeType(T, "opencl.pipe_wo_t", PipeWOTy);
+}
+llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T, StringRef Name,
+ llvm::Type *&PipeTy) {
+ if (!PipeTy)
+ PipeTy = llvm::PointerType::get(llvm::StructType::create(
+ CGM.getLLVMContext(), Name),
+ CGM.getContext().getTargetAddressSpace(
+ CGM.getContext().getOpenCLTypeAddrSpace(T)));
return PipeTy;
}
@@ -112,37 +118,64 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
-CGOpenCLRuntime::EnqueuedBlockInfo
-CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
- // The block literal may be assigned to a const variable. Chasing down
- // to get the block literal.
+// Get the block literal from an expression derived from the block expression.
+// OpenCL v2.0 s6.12.5:
+// Block variable declarations are implicitly qualified with const. Therefore
+// all block variables must be initialized at declaration time and may not be
+// reassigned.
+static const BlockExpr *getBlockExpr(const Expr *E) {
+ if (auto Cast = dyn_cast<CastExpr>(E)) {
+ E = Cast->getSubExpr();
+ }
if (auto DR = dyn_cast<DeclRefExpr>(E)) {
E = cast<VarDecl>(DR->getDecl())->getInit();
}
+ E = E->IgnoreImplicit();
if (auto Cast = dyn_cast<CastExpr>(E)) {
E = Cast->getSubExpr();
}
- auto *Block = cast<BlockExpr>(E);
+ return cast<BlockExpr>(E);
+}
+
+/// Record emitted llvm invoke function and llvm block literal for the
+/// corresponding block expression.
+void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
+ llvm::Function *InvokeF,
+ llvm::Value *Block) {
+ assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() &&
+ "Block expression emitted twice");
+ assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
+ assert(Block->getType()->isPointerTy() && "Invalid block literal type");
+ EnqueuedBlockMap[E].InvokeFunc = InvokeF;
+ EnqueuedBlockMap[E].BlockArg = Block;
+ EnqueuedBlockMap[E].Kernel = nullptr;
+}
+
+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
+}
+
+CGOpenCLRuntime::EnqueuedBlockInfo
+CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
+ CGF.EmitScalarExpr(E);
+
+ const BlockExpr *Block = getBlockExpr(E);
+ assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
+ "Block expression not emitted");
- // The same block literal may be enqueued multiple times. Cache it if
- // possible.
- auto Loc = EnqueuedBlockMap.find(Block);
- if (Loc != EnqueuedBlockMap.end()) {
- return Loc->second;
+ // Do not emit the block wrapper again if it has been emitted.
+ if (EnqueuedBlockMap[Block].Kernel) {
+ return EnqueuedBlockMap[Block];
}
- // Emit block literal as a common block expression and get the block invoke
- // function.
- llvm::Function *Invoke;
- auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke);
auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel(
- CGF, Invoke, V->stripPointerCasts());
+ CGF, EnqueuedBlockMap[Block].InvokeFunc,
+ EnqueuedBlockMap[Block].BlockArg->stripPointerCasts());
// The common part of the post-processing of the kernel goes here.
F->addFnAttr(llvm::Attribute::NoUnwind);
F->setCallingConv(
CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel));
- EnqueuedBlockInfo Info{F, V};
- EnqueuedBlockMap[Block] = Info;
- return Info;
+ EnqueuedBlockMap[Block].Kernel = F;
+ return EnqueuedBlockMap[Block];
}
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index ead303d1d0d5..a513340827a8 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -23,6 +23,7 @@
namespace clang {
+class BlockExpr;
class Expr;
class VarDecl;
@@ -34,20 +35,25 @@ class CodeGenModule;
class CGOpenCLRuntime {
protected:
CodeGenModule &CGM;
- llvm::Type *PipeTy;
+ llvm::Type *PipeROTy;
+ llvm::Type *PipeWOTy;
llvm::PointerType *SamplerTy;
/// Structure for enqueued block information.
struct EnqueuedBlockInfo {
- llvm::Function *Kernel; /// Enqueued block kernel.
- llvm::Value *BlockArg; /// The first argument to enqueued block kernel.
+ llvm::Function *InvokeFunc; /// Block invoke function.
+ llvm::Function *Kernel; /// Enqueued block kernel.
+ llvm::Value *BlockArg; /// The first argument to enqueued block kernel.
};
/// Maps block expression to block information.
llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
+ virtual llvm::Type *getPipeType(const PipeType *T, StringRef Name,
+ llvm::Type *&PipeTy);
+
public:
- CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr),
- SamplerTy(nullptr) {}
+ CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM),
+ PipeROTy(nullptr), PipeWOTy(nullptr), SamplerTy(nullptr) {}
virtual ~CGOpenCLRuntime();
/// Emit the IR required for a work-group-local variable declaration, and add
@@ -62,11 +68,11 @@ public:
llvm::PointerType *getSamplerType(const Type *T);
- // \brief Returnes a value which indicates the size in bytes of the pipe
+ // Returns a value which indicates the size in bytes of the pipe
// element.
virtual llvm::Value *getPipeElemSize(const Expr *PipeArg);
- // \brief Returnes a value which indicates the alignment in bytes of the pipe
+ // Returns a value which indicates the alignment in bytes of the pipe
// element.
virtual llvm::Value *getPipeElemAlign(const Expr *PipeArg);
@@ -76,6 +82,19 @@ public:
/// \return enqueued block information for enqueued block.
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF,
const Expr *E);
+
+ /// Record invoke function and block literal emitted during normal
+ /// codegen for a block expression. The information is used by
+ /// emitOpenCLEnqueuedBlock to emit wrapper kernel.
+ ///
+ /// \param InvokeF invoke function emitted for the block expression.
+ /// \param Block block literal emitted for the block expression.
+ void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
+ llvm::Value *Block);
+
+ /// \return LLVM block invoke function emitted for an expression derived from
+ /// the block expression.
+ llvm::Function *getInvokeFunction(const Expr *E);
};
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index fa38ee80bf41..3730b9af12fa 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -14,12 +14,13 @@
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
+#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/Basic/BitmaskEnum.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
@@ -33,20 +34,20 @@ using namespace clang;
using namespace CodeGen;
namespace {
-/// \brief Base class for handling code generation inside OpenMP regions.
+/// Base class for handling code generation inside OpenMP regions.
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
- /// \brief Kinds of OpenMP regions used in codegen.
+ /// Kinds of OpenMP regions used in codegen.
enum CGOpenMPRegionKind {
- /// \brief Region with outlined function for standalone 'parallel'
+ /// Region with outlined function for standalone 'parallel'
/// directive.
ParallelOutlinedRegion,
- /// \brief Region with outlined function for standalone 'task' directive.
+ /// Region with outlined function for standalone 'task' directive.
TaskOutlinedRegion,
- /// \brief Region for constructs that do not require function outlining,
+ /// Region for constructs that do not require function outlining,
/// like 'for', 'sections', 'atomic' etc. directives.
InlinedRegion,
- /// \brief Region with outlined function for standalone 'target' directive.
+ /// Region with outlined function for standalone 'target' directive.
TargetRegion,
};
@@ -63,14 +64,14 @@ public:
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
Kind(Kind), HasCancel(HasCancel) {}
- /// \brief Get a variable or parameter for storing global thread id
+ /// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
virtual const VarDecl *getThreadIDVariable() const = 0;
- /// \brief Emit the captured statement body.
+ /// Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
- /// \brief Get an LValue for the current ThreadID variable.
+ /// Get an LValue for the current ThreadID variable.
/// \return LValue for thread id variable. This LValue always has type int32*.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
@@ -95,7 +96,7 @@ protected:
bool HasCancel;
};
-/// \brief API for captured statement code generation in OpenMP constructs.
+/// API for captured statement code generation in OpenMP constructs.
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
@@ -108,11 +109,11 @@ public:
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
- /// \brief Get a variable or parameter for storing global thread id
+ /// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
- /// \brief Get the name of the capture helper.
+ /// Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
@@ -122,13 +123,13 @@ public:
}
private:
- /// \brief A variable or parameter storing global thread id for OpenMP
+ /// A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
StringRef HelperName;
};
-/// \brief API for captured statement code generation in OpenMP constructs.
+/// API for captured statement code generation in OpenMP constructs.
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
class UntiedTaskActionTy final : public PrePostActionTy {
@@ -144,11 +145,12 @@ public:
void Enter(CodeGenFunction &CGF) override {
if (Untied) {
// Emit task switching point.
- auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+ LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
- auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
- auto *DoneBB = CGF.createBasicBlock(".untied.done.");
+ llvm::Value *Res =
+ CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
+ llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
CGF.EmitBlock(DoneBB);
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
@@ -160,7 +162,7 @@ public:
}
void emitUntiedSwitch(CodeGenFunction &CGF) const {
if (Untied) {
- auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+ LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
@@ -188,14 +190,14 @@ public:
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
- /// \brief Get a variable or parameter for storing global thread id
+ /// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
- /// \brief Get an LValue for the current ThreadID variable.
+ /// Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
- /// \brief Get the name of the capture helper.
+ /// Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
void emitUntiedSwitch(CodeGenFunction &CGF) override {
@@ -209,14 +211,14 @@ public:
}
private:
- /// \brief A variable or parameter storing global thread id for OpenMP
+ /// A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// Action for emitting code for untied tasks.
const UntiedTaskActionTy &Action;
};
-/// \brief API for inlined captured statement code generation in OpenMP
+/// API for inlined captured statement code generation in OpenMP
/// constructs.
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
public:
@@ -227,7 +229,7 @@ public:
OldCSI(OldCSI),
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
- // \brief Retrieve the value of the context parameter.
+ // Retrieve the value of the context parameter.
llvm::Value *getContextValue() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getContextValue();
@@ -242,7 +244,7 @@ public:
llvm_unreachable("No context value for inlined OpenMP region");
}
- /// \brief Lookup the captured field decl for a variable.
+ /// Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (OuterRegionInfo)
return OuterRegionInfo->lookup(VD);
@@ -257,7 +259,7 @@ public:
return nullptr;
}
- /// \brief Get a variable or parameter for storing global thread id
+ /// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
if (OuterRegionInfo)
@@ -265,14 +267,14 @@ public:
return nullptr;
}
- /// \brief Get an LValue for the current ThreadID variable.
+ /// Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
if (OuterRegionInfo)
return OuterRegionInfo->getThreadIDVariableLValue(CGF);
llvm_unreachable("No LValue for inlined OpenMP construct");
}
- /// \brief Get the name of the capture helper.
+ /// Get the name of the capture helper.
StringRef getHelperName() const override {
if (auto *OuterRegionInfo = getOldCSI())
return OuterRegionInfo->getHelperName();
@@ -294,12 +296,12 @@ public:
~CGOpenMPInlinedRegionInfo() override = default;
private:
- /// \brief CodeGen info about outer OpenMP region.
+ /// CodeGen info about outer OpenMP region.
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
CGOpenMPRegionInfo *OuterRegionInfo;
};
-/// \brief API for captured statement code generation in OpenMP target
+/// API for captured statement code generation in OpenMP target
/// constructs. For this captures, implicit parameters are used instead of the
/// captured fields. The name of the target region has to be unique in a given
/// application so it is provided by the client, because only the client has
@@ -312,11 +314,11 @@ public:
/*HasCancel=*/false),
HelperName(HelperName) {}
- /// \brief This is unused for target regions because each starts executing
+ /// This is unused for target regions because each starts executing
/// with a single thread.
const VarDecl *getThreadIDVariable() const override { return nullptr; }
- /// \brief Get the name of the capture helper.
+ /// Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
@@ -331,7 +333,7 @@ private:
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
llvm_unreachable("No codegen for expressions");
}
-/// \brief API for generation of expressions captured in a innermost OpenMP
+/// API for generation of expressions captured in a innermost OpenMP
/// region.
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
public:
@@ -343,7 +345,7 @@ public:
// Make sure the globals captured in the provided statement are local by
// using the privatization logic. We assume the same variable is not
// captured more than once.
- for (auto &C : CS.captures()) {
+ for (const auto &C : CS.captures()) {
if (!C.capturesVariable() && !C.capturesVariableByCopy())
continue;
@@ -354,33 +356,32 @@ public:
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
/*RefersToEnclosingVariableOrCapture=*/false,
VD->getType().getNonReferenceType(), VK_LValue,
- SourceLocation());
- PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
- return CGF.EmitLValue(&DRE).getAddress();
- });
+ C.getLocation());
+ PrivScope.addPrivate(
+ VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
}
(void)PrivScope.Privatize();
}
- /// \brief Lookup the captured field decl for a variable.
+ /// Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
- if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
+ if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
return FD;
return nullptr;
}
- /// \brief Emit the captured statement body.
+ /// Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
llvm_unreachable("No body for expressions");
}
- /// \brief Get a variable or parameter for storing global thread id
+ /// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
llvm_unreachable("No thread id for expressions");
}
- /// \brief Get the name of the capture helper.
+ /// Get the name of the capture helper.
StringRef getHelperName() const override {
llvm_unreachable("No helper name for expressions");
}
@@ -392,14 +393,15 @@ private:
CodeGenFunction::OMPPrivateScope PrivScope;
};
-/// \brief RAII for emitting code of OpenMP constructs.
+/// RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
+ const CodeGen::CGBlockInfo *BlockInfo = nullptr;
public:
- /// \brief Constructs region for combined constructs.
+ /// Constructs region for combined constructs.
/// \param CodeGen Code generation sequence for combined directives. Includes
/// a list of functions used for code generation of implicitly inlined
/// regions.
@@ -412,6 +414,8 @@ public:
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
CGF.LambdaThisCaptureField = nullptr;
+ BlockInfo = CGF.BlockInfo;
+ CGF.BlockInfo = nullptr;
}
~InlinedOpenMPRegionRAII() {
@@ -422,28 +426,29 @@ public:
CGF.CapturedStmtInfo = OldCSI;
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
+ CGF.BlockInfo = BlockInfo;
}
};
-/// \brief Values for bit flags used in the ident_t to describe the fields.
+/// Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
enum OpenMPLocationFlags : unsigned {
- /// \brief Use trampoline for internal microtask.
+ /// Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
- /// \brief Use c-style ident structure.
+ /// Use c-style ident structure.
OMP_IDENT_KMPC = 0x02,
- /// \brief Atomic reduction option for kmpc_reduce.
+ /// Atomic reduction option for kmpc_reduce.
OMP_ATOMIC_REDUCE = 0x10,
- /// \brief Explicit 'barrier' directive.
+ /// Explicit 'barrier' directive.
OMP_IDENT_BARRIER_EXPL = 0x20,
- /// \brief Implicit barrier in code.
+ /// Implicit barrier in code.
OMP_IDENT_BARRIER_IMPL = 0x40,
- /// \brief Implicit barrier in 'for' directive.
+ /// Implicit barrier in 'for' directive.
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
- /// \brief Implicit barrier in 'sections' directive.
+ /// Implicit barrier in 'sections' directive.
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
- /// \brief Implicit barrier in 'single' directive.
+ /// Implicit barrier in 'single' directive.
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
/// Call of __kmp_for_static_init for static loop.
OMP_IDENT_WORK_LOOP = 0x200,
@@ -454,7 +459,7 @@ enum OpenMPLocationFlags : unsigned {
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
-/// \brief Describes ident structure that describes a source location.
+/// Describes ident structure that describes a source location.
/// All descriptions are taken from
/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
/// Original structure:
@@ -481,24 +486,24 @@ enum OpenMPLocationFlags : unsigned {
/// */
/// } ident_t;
enum IdentFieldIndex {
- /// \brief might be used in Fortran
+ /// might be used in Fortran
IdentField_Reserved_1,
- /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
+ /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
IdentField_Flags,
- /// \brief Not really used in Fortran any more
+ /// Not really used in Fortran any more
IdentField_Reserved_2,
- /// \brief Source[4] in Fortran, do not use for C++
+ /// Source[4] in Fortran, do not use for C++
IdentField_Reserved_3,
- /// \brief String describing the source location. The string is composed of
+ /// String describing the source location. The string is composed of
/// semi-colon separated fields which describe the source file, the function
/// and a pair of line numbers that delimit the construct.
IdentField_PSource
};
-/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
+/// Schedule types for 'omp for' loops (these enumerators are taken from
/// the enum sched_type in kmp.h).
enum OpenMPSchedType {
- /// \brief Lower bound for default (unordered) versions.
+ /// Lower bound for default (unordered) versions.
OMP_sch_lower = 32,
OMP_sch_static_chunked = 33,
OMP_sch_static = 34,
@@ -508,7 +513,7 @@ enum OpenMPSchedType {
OMP_sch_auto = 38,
/// static with chunk adjustment (e.g., simd)
OMP_sch_static_balanced_chunked = 45,
- /// \brief Lower bound for 'ordered' versions.
+ /// Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
OMP_ord_static_chunked = 65,
OMP_ord_static = 66,
@@ -517,7 +522,7 @@ enum OpenMPSchedType {
OMP_ord_runtime = 69,
OMP_ord_auto = 70,
OMP_sch_default = OMP_sch_static,
- /// \brief dist_schedule types
+ /// dist_schedule types
OMP_dist_sch_static_chunked = 91,
OMP_dist_sch_static = 92,
/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
@@ -528,13 +533,13 @@ enum OpenMPSchedType {
};
enum OpenMPRTLFunction {
- /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+ /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
/// kmpc_micro microtask, ...);
OMPRTL__kmpc_fork_call,
- /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
+ /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
/// kmp_int32 global_tid, void *data, size_t size, void ***cache);
OMPRTL__kmpc_threadprivate_cached,
- /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
+ /// Call to void __kmpc_threadprivate_register( ident_t *,
/// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
OMPRTL__kmpc_threadprivate_register,
// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
@@ -742,11 +747,11 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
/// UDR decl used for reduction.
static const OMPDeclareReductionDecl *
getReductionInit(const Expr *ReductionOp) {
- if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
- if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
- if (auto *DRE =
+ if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
+ if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+ if (const auto *DRE =
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
- if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
+ if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
return DRD;
return nullptr;
}
@@ -759,48 +764,51 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
if (DRD->getInitializer()) {
std::pair<llvm::Function *, llvm::Function *> Reduction =
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
- auto *CE = cast<CallExpr>(InitOp);
- auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
+ const auto *CE = cast<CallExpr>(InitOp);
+ const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
- auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
- auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
+ const auto *LHSDRE =
+ cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
+ const auto *RHSDRE =
+ cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
- [=]() -> Address { return Private; });
+ [=]() { return Private; });
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
- [=]() -> Address { return Original; });
+ [=]() { return Original; });
(void)PrivateScope.Privatize();
RValue Func = RValue::get(Reduction.second);
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
CGF.EmitIgnoredExpr(InitOp);
} else {
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
+ std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
auto *GV = new llvm::GlobalVariable(
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage, Init, ".init");
+ llvm::GlobalValue::PrivateLinkage, Init, Name);
LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
RValue InitRVal;
switch (CGF.getEvaluationKind(Ty)) {
case TEK_Scalar:
- InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+ InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
break;
case TEK_Complex:
InitRVal =
- RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+ RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
break;
case TEK_Aggregate:
InitRVal = RValue::getAggregate(LV.getAddress());
break;
}
- OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+ OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
/*IsInitializer=*/false);
}
}
-/// \brief Emit initialization of arrays of complex types.
+/// Emit initialization of arrays of complex types.
/// \param DestAddr Address of the array.
/// \param Type Type of array.
/// \param Init Initial expression of array.
@@ -814,8 +822,8 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
QualType ElementTy;
// Drill down to the base element type on both arrays.
- auto ArrayTy = Type->getAsArrayTypeUnsafe();
- auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
+ const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
+ llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
DestAddr =
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
if (DRD)
@@ -825,18 +833,18 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
llvm::Value *SrcBegin = nullptr;
if (DRD)
SrcBegin = SrcAddr.getPointer();
- auto DestBegin = DestAddr.getPointer();
+ llvm::Value *DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
- auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
+ llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
// The basic structure here is a while-do loop.
- auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
- auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
- auto IsEmpty =
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
+ llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
+ llvm::Value *IsEmpty =
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
- auto EntryBB = CGF.Builder.GetInsertBlock();
+ llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
CGF.EmitBlock(BodyBB);
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
@@ -871,16 +879,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
if (DRD) {
// Shift the address forward by one element.
- auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
+ llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
}
// Shift the address forward by one element.
- auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
+ llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
// Check whether we've reached the end.
- auto Done =
+ llvm::Value *Done =
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
@@ -889,6 +897,25 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
+static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
+isDeclareTargetDeclaration(const ValueDecl *VD) {
+ for (const Decl *D : VD->redecls()) {
+ if (!D->hasAttrs())
+ continue;
+ if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
+ return Attr->getMapType();
+ }
+ if (const auto *V = dyn_cast<VarDecl>(VD)) {
+ if (const VarDecl *TD = V->getTemplateInstantiationPattern())
+ return isDeclareTargetDeclaration(TD);
+ } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
+ if (const auto *TD = FD->getTemplateInstantiationPattern())
+ return isDeclareTargetDeclaration(TD);
+ }
+
+ return llvm::None;
+}
+
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
return CGF.EmitOMPSharedLValue(E);
}
@@ -906,7 +933,7 @@ void ReductionCodeGen::emitAggregateInitialization(
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
- auto *PrivateVD =
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
bool EmitDeclareReductionInit =
DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
@@ -926,7 +953,7 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
BaseDecls.reserve(Shareds.size());
auto IPriv = Privates.begin();
auto IRed = ReductionOps.begin();
- for (const auto *Ref : Shareds) {
+ for (const Expr *Ref : Shareds) {
ClausesData.emplace_back(Ref, *IPriv, *IRed);
std::advance(IPriv, 1);
std::advance(IRed, 1);
@@ -942,7 +969,7 @@ void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
- auto *PrivateVD =
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
@@ -955,7 +982,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
}
llvm::Value *Size;
llvm::Value *SizeInChars;
- llvm::Type *ElemType =
+ auto *ElemType =
cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
@@ -981,7 +1008,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
llvm::Value *Size) {
- auto *PrivateVD =
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
if (!PrivateType->isVariablyModifiedType()) {
@@ -1002,9 +1029,10 @@ void ReductionCodeGen::emitInitialization(
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
assert(SharedAddresses.size() > N && "No variable was generated");
- auto *PrivateVD =
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ const OMPDeclareReductionDecl *DRD =
+ getReductionInit(ClausesData[N].ReductionOp);
QualType PrivateType = PrivateVD->getType();
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
@@ -1029,7 +1057,7 @@ void ReductionCodeGen::emitInitialization(
}
bool ReductionCodeGen::needCleanups(unsigned N) {
- auto *PrivateVD =
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
@@ -1038,7 +1066,7 @@ bool ReductionCodeGen::needCleanups(unsigned N) {
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
- auto *PrivateVD =
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
@@ -1054,9 +1082,9 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
- if (auto *PtrTy = BaseTy->getAs<PointerType>())
+ if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
- else {
+ } else {
LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
}
@@ -1097,28 +1125,32 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
return Address(Addr, BaseLVAlignment);
}
-Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
- Address PrivateAddr) {
- const DeclRefExpr *DE;
+static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
const VarDecl *OrigVD = nullptr;
- if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
- auto *Base = OASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+ if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
+ const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
Base = TempOASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
- } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
- auto *Base = ASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
+ const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
}
- if (OrigVD) {
+ return OrigVD;
+}
+
+Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr) {
+ const DeclRefExpr *DE;
+ if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
BaseDecls.emplace_back(OrigVD);
- auto OriginalBaseLValue = CGF.EmitLValue(DE);
+ LValue OriginalBaseLValue = CGF.EmitLValue(DE);
LValue BaseLValue =
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
OriginalBaseLValue);
@@ -1140,7 +1172,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
}
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
- auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ const OMPDeclareReductionDecl *DRD =
+ getReductionInit(ClausesData[N].ReductionOp);
return DRD && DRD->getInitializer();
}
@@ -1170,12 +1203,38 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
AlignmentSource::Decl);
}
-CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
- : CGM(CGM), OffloadEntriesInfoManager(CGM) {
- IdentTy = llvm::StructType::create(
- "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
- CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
- CGM.Int8PtrTy /* psource */);
+static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+ QualType FieldTy) {
+ auto *Field = FieldDecl::Create(
+ C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
+ C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ DC->addDecl(Field);
+ return Field;
+}
+
+CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
+ StringRef Separator)
+ : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
+ OffloadEntriesInfoManager(CGM) {
+ ASTContext &C = CGM.getContext();
+ RecordDecl *RD = C.buildImplicitRecord("ident_t");
+ QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ RD->startDefinition();
+ // reserved_1
+ addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ // flags
+ addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ // reserved_2
+ addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ // reserved_3
+ addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ // psource
+ addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ RD->completeDefinition();
+ IdentQTy = C.getRecordType(RD);
+ IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
loadOffloadInfoMetadata();
@@ -1185,12 +1244,23 @@ void CGOpenMPRuntime::clear() {
InternalVars.clear();
}
+std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
+ SmallString<128> Buffer;
+ llvm::raw_svector_ostream OS(Buffer);
+ StringRef Sep = FirstSeparator;
+ for (StringRef Part : Parts) {
+ OS << Sep << Part;
+ Sep = Separator;
+ }
+ return OS.str();
+}
+
static llvm::Function *
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
const Expr *CombinerInitializer, const VarDecl *In,
const VarDecl *Out, bool IsCombiner) {
// void .omp_combiner.(Ty *in, Ty *out);
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
@@ -1199,28 +1269,30 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
- auto &FnInfo =
+ const CGFunctionInfo &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
- auto *Fn = llvm::Function::Create(
- FnTy, llvm::GlobalValue::InternalLinkage,
- IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ std::string Name = CGM.getOpenMPRuntime().getName(
+ {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
+ Out->getLocation());
CodeGenFunction::OMPPrivateScope Scope(CGF);
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
- Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
+ Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
.getAddress();
});
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
- Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
+ Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
.getAddress();
});
@@ -1242,7 +1314,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
if (UDRMap.count(D) > 0)
return;
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
if (!In || !Out) {
In = &C.Idents.get("omp_in");
Out = &C.Idents.get("omp_out");
@@ -1252,7 +1324,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
cast<VarDecl>(D->lookup(Out).front()),
/*IsCombiner=*/true);
llvm::Function *Initializer = nullptr;
- if (auto *Init = D->getInitializer()) {
+ if (const Expr *Init = D->getInitializer()) {
if (!Priv || !Orig) {
Priv = &C.Idents.get("omp_priv");
Orig = &C.Idents.get("omp_orig");
@@ -1265,7 +1337,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
cast<VarDecl>(D->lookup(Priv).front()),
/*IsCombiner=*/false);
}
- UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
+ UDRMap.try_emplace(D, Combiner, Initializer);
if (CGF) {
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
Decls.second.push_back(D);
@@ -1281,25 +1353,6 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
return UDRMap.lookup(D);
}
-// Layout information for ident_t.
-static CharUnits getIdentAlign(CodeGenModule &CGM) {
- return CGM.getPointerAlign();
-}
-static CharUnits getIdentSize(CodeGenModule &CGM) {
- assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
- return CharUnits::fromQuantity(16) + CGM.getPointerSize();
-}
-static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
- // All the fields except the last are i32, so this works beautifully.
- return unsigned(Field) * CharUnits::fromQuantity(4);
-}
-static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
- IdentFieldIndex Field,
- const llvm::Twine &Name = "") {
- auto Offset = getOffsetOfIdentField(Field);
- return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
-}
-
static llvm::Value *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
@@ -1308,19 +1361,20 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction(
"thread id variable must be of type kmp_int32 *");
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
- if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
+ if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
HasCancel = OPD->hasCancel();
- else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
+ else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
HasCancel = OPSD->hasCancel();
- else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
+ else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
- else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
+ else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
- else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
+ else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
- else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
+ else if (const auto *OPFD =
+ dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
- else if (auto *OPFD =
+ else if (const auto *OPFD =
dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
@@ -1352,8 +1406,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
bool Tied, unsigned &NumberOfParts) {
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
PrePostActionTy &) {
- auto *ThreadID = getThreadID(CGF, D.getLocStart());
- auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+ llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart());
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
llvm::Value *TaskArgs[] = {
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
@@ -1366,21 +1420,69 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
CodeGen.setAction(Action);
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
- auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
- auto *TD = dyn_cast<OMPTaskDirective>(&D);
+ const OpenMPDirectiveKind Region =
+ isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
+ : OMPD_task;
+ const CapturedStmt *CS = D.getCapturedStmt(Region);
+ const auto *TD = dyn_cast<OMPTaskDirective>(&D);
CodeGenFunction CGF(CGM, true);
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
InnermostKind,
TD ? TD->hasCancel() : false, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
+ llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
NumberOfParts = Action.getNumberOfParts();
return Res;
}
+static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
+ const RecordDecl *RD, const CGRecordLayout &RL,
+ ArrayRef<llvm::Constant *> Data) {
+ llvm::StructType *StructTy = RL.getLLVMType();
+ unsigned PrevIdx = 0;
+ ConstantInitBuilder CIBuilder(CGM);
+ auto DI = Data.begin();
+ for (const FieldDecl *FD : RD->fields()) {
+ unsigned Idx = RL.getLLVMFieldNo(FD);
+ // Fill the alignment.
+ for (unsigned I = PrevIdx; I < Idx; ++I)
+ Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
+ PrevIdx = Idx + 1;
+ Fields.add(*DI);
+ ++DI;
+ }
+}
+
+template <class... As>
+static llvm::GlobalVariable *
+createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty,
+ ArrayRef<llvm::Constant *> Data, const Twine &Name,
+ As &&... Args) {
+ const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
+ const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
+ ConstantInitBuilder CIBuilder(CGM);
+ ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
+ buildStructValue(Fields, CGM, RD, RL, Data);
+ return Fields.finishAndCreateGlobal(
+ Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty),
+ /*isConstant=*/true, std::forward<As>(Args)...);
+}
+
+template <typename T>
+static void
+createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
+ ArrayRef<llvm::Constant *> Data,
+ T &Parent) {
+ const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
+ const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
+ ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
+ buildStructValue(Fields, CGM, RD, RL, Data);
+ Fields.finishAndAddTo(Parent);
+}
+
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
- CharUnits Align = getIdentAlign(CGM);
+ CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
if (!Entry) {
if (!DefaultOpenMPPSource) {
@@ -1394,17 +1496,15 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
- ConstantInitBuilder builder(CGM);
- auto fields = builder.beginStruct(IdentTy);
- fields.addInt(CGM.Int32Ty, 0);
- fields.addInt(CGM.Int32Ty, Flags);
- fields.addInt(CGM.Int32Ty, 0);
- fields.addInt(CGM.Int32Ty, 0);
- fields.add(DefaultOpenMPPSource);
- auto DefaultOpenMPLocation =
- fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
- llvm::GlobalValue::PrivateLinkage);
- DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
+ llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty),
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty),
+ DefaultOpenMPPSource};
+ llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct(
+ CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage);
+ DefaultOpenMPLocation->setUnnamedAddr(
+ llvm::GlobalValue::UnnamedAddr::Global);
OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
}
@@ -1422,17 +1522,17 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
assert(CGF.CurFn && "No function in current CodeGenFunction.");
+ CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
Address LocValue = Address::invalid();
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end())
- LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
+ LocValue = Address(I->second.DebugLoc, Align);
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
// GetOpenMPThreadID was called before this routine.
if (!LocValue.isValid()) {
// Generate "ident_t .kmpc_loc.addr;"
- Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
- ".kmpc_loc.addr");
+ Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
@@ -1440,29 +1540,30 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
- CGM.getSize(getIdentSize(CGF.CGM)));
+ CGF.getTypeSize(IdentQTy));
}
// char **psource = &.kmpc_loc_<flags>.addr.psource;
- Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
+ LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
+ auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
+ LValue PSource =
+ CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
- auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
+ llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
if (OMPDebugLoc == nullptr) {
SmallString<128> Buffer2;
llvm::raw_svector_ostream OS2(Buffer2);
// Build debug location
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
OS2 << ";" << PLoc.getFilename() << ";";
- if (const FunctionDecl *FD =
- dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
+ if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
OS2 << FD->getQualifiedNameAsString();
- }
OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
}
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
- CGF.Builder.CreateStore(OMPDebugLoc, PSource);
+ CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
// Our callers always pass this to a runtime function, so for
// convenience, go ahead and return a naked pointer.
@@ -1490,8 +1591,8 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (OMPRegionInfo->getThreadIDVariable()) {
// Check if this an outlined function with thread id passed as argument.
- auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
- ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+ LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
// If value loaded in entry block, cache it and use it everywhere in
// function.
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
@@ -1509,7 +1610,7 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
// function.
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
- auto *Call = CGF.Builder.CreateCall(
+ llvm::CallInst *Call = CGF.Builder.CreateCall(
createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
@@ -1523,17 +1624,14 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
if (OpenMPLocThreadIDMap.count(CGF.CurFn))
OpenMPLocThreadIDMap.erase(CGF.CurFn);
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
- for(auto *D : FunctionUDRMap[CGF.CurFn]) {
+ for(auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
- }
FunctionUDRMap.erase(CGF.CurFn);
}
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
- if (!IdentTy) {
- }
- return llvm::PointerType::getUnqual(IdentTy);
+ return IdentTy->getPointerTo();
}
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
@@ -1555,7 +1653,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// microtask, ...);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
getKmpc_MicroPointerTy()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
break;
@@ -1563,7 +1661,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_global_thread_num: {
// Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
break;
@@ -1574,7 +1672,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy, CGM.SizeTy,
CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
break;
@@ -1585,7 +1683,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
break;
@@ -1596,7 +1694,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy),
CGM.IntPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
break;
@@ -1605,21 +1703,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build void __kmpc_threadprivate_register(ident_t *, void *data,
// kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
// typedef void *(*kmpc_ctor)(void *);
- auto KmpcCtorTy =
+ auto *KmpcCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
/*isVarArg*/ false)->getPointerTo();
// typedef void *(*kmpc_cctor)(void *, void *);
llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto KmpcCopyCtorTy =
+ auto *KmpcCopyCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
- /*isVarArg*/ false)->getPointerTo();
+ /*isVarArg*/ false)
+ ->getPointerTo();
// typedef void (*kmpc_dtor)(void *);
- auto KmpcDtorTy =
+ auto *KmpcDtorTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
->getPointerTo();
llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
KmpcCopyCtorTy, KmpcDtorTy};
- auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
+ auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
/*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
break;
@@ -1630,7 +1729,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
break;
@@ -1639,7 +1738,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
break;
@@ -1647,7 +1746,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_barrier: {
// Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
break;
@@ -1655,7 +1754,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_for_static_fini: {
// Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
break;
@@ -1665,7 +1764,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// kmp_int32 num_threads)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
break;
@@ -1674,7 +1773,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
break;
@@ -1683,7 +1782,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
@@ -1691,7 +1790,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_flush: {
// Build void __kmpc_flush(ident_t *loc);
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
break;
@@ -1699,7 +1798,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_master: {
// Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
break;
@@ -1707,7 +1806,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_end_master: {
// Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
break;
@@ -1716,7 +1815,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
// int end_part);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
break;
@@ -1724,7 +1823,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_single: {
// Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
break;
@@ -1732,7 +1831,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_end_single: {
// Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
break;
@@ -1746,7 +1845,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
// Return void * and then cast to particular kmp_task_t type.
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
break;
@@ -1756,7 +1855,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
break;
@@ -1771,7 +1870,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
break;
@@ -1787,7 +1886,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
break;
@@ -1804,7 +1903,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
break;
@@ -1815,7 +1914,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
break;
@@ -1826,7 +1925,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
@@ -1837,7 +1936,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
@@ -1848,7 +1947,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy,
/*Name=*/"__kmpc_omp_task_complete_if0");
@@ -1857,7 +1956,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_ordered: {
// Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
break;
@@ -1865,7 +1964,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_end_ordered: {
// Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
break;
@@ -1873,7 +1972,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_omp_taskwait: {
// Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
break;
@@ -1881,7 +1980,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_taskgroup: {
// Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
break;
@@ -1889,7 +1988,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_end_taskgroup: {
// Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
break;
@@ -1898,7 +1997,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
// int proc_bind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
break;
@@ -1910,7 +2009,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
@@ -1923,7 +2022,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int32Ty, CGM.VoidPtrTy,
CGM.Int32Ty, CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
break;
@@ -1932,7 +2031,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
// global_tid, kmp_int32 cncl_kind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
break;
@@ -1941,7 +2040,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
break;
@@ -1951,7 +2050,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// kmp_int32 num_teams, kmp_int32 num_threads)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
break;
@@ -1961,7 +2060,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// microtask, ...);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
getKmpc_MicroPointerTy()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
break;
@@ -1981,7 +2080,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.IntTy,
CGM.Int64Ty,
CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
break;
@@ -1993,7 +2092,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
break;
@@ -2001,7 +2100,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
case OMPRTL__kmpc_doacross_fini: {
// Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
break;
@@ -2011,7 +2110,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// *vec);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
break;
@@ -2021,7 +2120,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// *vec);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
break;
@@ -2030,7 +2129,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
// *data);
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
@@ -2040,7 +2139,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
// Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
@@ -2057,7 +2156,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
@@ -2073,7 +2172,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
break;
@@ -2091,7 +2190,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.Int64Ty->getPointerTo(),
CGM.Int32Ty,
CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
break;
@@ -2109,7 +2208,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.Int64Ty->getPointerTo(),
CGM.Int32Ty,
CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
break;
@@ -2119,7 +2218,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
QualType ParamTy =
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
break;
@@ -2129,7 +2228,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
QualType ParamTy =
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
break;
@@ -2143,7 +2242,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
break;
@@ -2172,7 +2271,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
break;
@@ -2201,7 +2300,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int64Ty->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
break;
@@ -2230,12 +2329,12 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
- auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
- : "__kmpc_for_static_init_4u")
- : (IVSigned ? "__kmpc_for_static_init_8"
- : "__kmpc_for_static_init_8u");
- auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
- auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
+ : "__kmpc_for_static_init_4u")
+ : (IVSigned ? "__kmpc_for_static_init_8"
+ : "__kmpc_for_static_init_8u");
+ llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
+ auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
@@ -2247,7 +2346,7 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
ITy, // incr
ITy // chunk
};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
@@ -2256,11 +2355,11 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
- auto Name =
+ StringRef Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
- auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
+ llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
@@ -2269,7 +2368,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
ITy, // stride
ITy // chunk
};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
@@ -2278,7 +2377,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
- auto Name =
+ StringRef Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
: (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
@@ -2286,7 +2385,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
@@ -2295,12 +2394,12 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
- auto Name =
+ StringRef Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
- auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
- auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
+ auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
@@ -2309,18 +2408,48 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
PtrTy, // p_upper
PtrTy // p_stride
};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
+Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
+ if (CGM.getLangOpts().OpenMPSimd)
+ return Address::invalid();
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ isDeclareTargetDeclaration(VD);
+ if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
+ SmallString<64> PtrName;
+ {
+ llvm::raw_svector_ostream OS(PtrName);
+ OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
+ }
+ llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
+ if (!Ptr) {
+ QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
+ Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
+ PtrName);
+ if (!CGM.getLangOpts().OpenMPIsDevice) {
+ auto *GV = cast<llvm::GlobalVariable>(Ptr);
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ GV->setInitializer(CGM.GetAddrOfGlobal(VD));
+ }
+ CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
+ registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
+ }
+ return Address(Ptr, CGM.getContext().getDeclAlign(VD));
+ }
+ return Address::invalid();
+}
+
llvm::Constant *
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
assert(!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported());
// Lookup the entry, lazily creating it if necessary.
- return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
- Twine(CGM.getMangledName(VD)) + ".cache.");
+ std::string Suffix = getName({"cache", ""});
+ return getOrCreateInternalVariable(
+ CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
}
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
@@ -2331,7 +2460,7 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
CGM.getContext().getTargetInfo().isTLSSupported())
return VDAddr;
- auto VarTy = VDAddr.getElementType();
+ llvm::Type *VarTy = VDAddr.getElementType();
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
CGM.Int8PtrTy),
@@ -2347,15 +2476,14 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
// library.
- auto OMPLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
OMPLoc);
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
// to register constructor/destructor for variable.
- llvm::Value *Args[] = {OMPLoc,
- CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
- CGM.VoidPtrTy),
- Ctor, CopyCtor, Dtor};
+ llvm::Value *Args[] = {
+ OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
+ Ctor, CopyCtor, Dtor};
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
}
@@ -2373,29 +2501,31 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
- auto Init = VD->getAnyInitializer();
+ const Expr *Init = VD->getAnyInitializer();
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
// Generate function that re-emits the declaration's initializer into the
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
+ /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
- auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+ const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidPtrTy, Args);
- auto FTy = CGM.getTypes().GetFunctionType(FI);
- auto Fn = CGM.CreateGlobalInitOrDestructFunction(
- FTy, ".__kmpc_global_ctor_.", FI, Loc);
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ std::string Name = getName({"__kmpc_global_ctor_", ""});
+ llvm::Function *Fn =
+ CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
- Args, SourceLocation());
- auto ArgVal = CtorCGF.EmitLoadOfScalar(
+ Args, Loc, Loc);
+ llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
Address Arg = Address(ArgVal, VDAddr.getAlignment());
- Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
- CtorCGF.ConvertTypeForMem(ASTTy));
+ Arg = CtorCGF.Builder.CreateElementBitCast(
+ Arg, CtorCGF.ConvertTypeForMem(ASTTy));
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
/*IsInitializer=*/true);
ArgVal = CtorCGF.EmitLoadOfScalar(
@@ -2410,21 +2540,23 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
+ /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
- auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+ const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidTy, Args);
- auto FTy = CGM.getTypes().GetFunctionType(FI);
- auto Fn = CGM.CreateGlobalInitOrDestructFunction(
- FTy, ".__kmpc_global_dtor_.", FI, Loc);
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ std::string Name = getName({"__kmpc_global_dtor_", ""});
+ llvm::Function *Fn =
+ CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
- SourceLocation());
+ Loc, Loc);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
- auto ArgVal = DtorCGF.EmitLoadOfScalar(
+ llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
DtorCGF.GetAddrOfLocalVar(&Dst),
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
@@ -2438,34 +2570,36 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto CopyCtorTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
- /*isVarArg=*/false)->getPointerTo();
+ auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
+ /*isVarArg=*/false)
+ ->getPointerTo();
// Copying constructor for the threadprivate variable.
// Must be NULL - reserved by runtime, but currently it requires that this
// parameter is always NULL. Otherwise it fires assertion.
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
if (Ctor == nullptr) {
- auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
- /*isVarArg=*/false)->getPointerTo();
+ auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
+ /*isVarArg=*/false)
+ ->getPointerTo();
Ctor = llvm::Constant::getNullValue(CtorTy);
}
if (Dtor == nullptr) {
- auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
- /*isVarArg=*/false)->getPointerTo();
+ auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
+ /*isVarArg=*/false)
+ ->getPointerTo();
Dtor = llvm::Constant::getNullValue(DtorTy);
}
if (!CGF) {
- auto InitFunctionTy =
+ auto *InitFunctionTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
- auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
- InitFunctionTy, ".__omp_threadprivate_init_.",
- CGM.getTypes().arrangeNullaryFunction());
+ std::string Name = getName({"__omp_threadprivate_init_", ""});
+ llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
+ InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
CodeGenFunction InitCGF(CGM);
FunctionArgList ArgList;
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
CGM.getTypes().arrangeNullaryFunction(), ArgList,
- Loc);
+ Loc, Loc);
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
InitCGF.FinishFunction();
return InitFunction;
@@ -2475,19 +2609,156 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
+/// Obtain information that uniquely identifies a target entry. This
+/// consists of the file and device IDs as well as line number associated with
+/// the relevant entry source location.
+static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
+ unsigned &DeviceID, unsigned &FileID,
+ unsigned &LineNum) {
+ SourceManager &SM = C.getSourceManager();
+
+ // The loc should be always valid and have a file ID (the user cannot use
+ // #pragma directives in macros)
+
+ assert(Loc.isValid() && "Source location is expected to be always valid.");
+
+ PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ SM.getDiagnostics().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+
+ DeviceID = ID.getDevice();
+ FileID = ID.getFile();
+ LineNum = PLoc.getLine();
+}
+
+bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
+ llvm::GlobalVariable *Addr,
+ bool PerformInit) {
+ Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ isDeclareTargetDeclaration(VD);
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
+ return false;
+ VD = VD->getDefinition(CGM.getContext());
+ if (VD && !DeclareTargetWithDefinition.insert(VD).second)
+ return CGM.getLangOpts().OpenMPIsDevice;
+
+ QualType ASTTy = VD->getType();
+
+ SourceLocation Loc = VD->getCanonicalDecl()->getLocStart();
+ // Produce the unique prefix to identify the new target regions. We use
+ // the source location of the variable declaration which we know to not
+ // conflict with any target region.
+ unsigned DeviceID;
+ unsigned FileID;
+ unsigned Line;
+ getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
+ SmallString<128> Buffer, Out;
+ {
+ llvm::raw_svector_ostream OS(Buffer);
+ OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
+ << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
+ }
+
+ const Expr *Init = VD->getAnyInitializer();
+ if (CGM.getLangOpts().CPlusPlus && PerformInit) {
+ llvm::Constant *Ctor;
+ llvm::Constant *ID;
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ // Generate function that re-emits the declaration's initializer into
+ // the threadprivate copy of the variable VD
+ CodeGenFunction CtorCGF(CGM);
+
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
+ FTy, Twine(Buffer, "_ctor"), FI, Loc);
+ auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
+ CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
+ FunctionArgList(), Loc, Loc);
+ auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
+ CtorCGF.EmitAnyExprToMem(Init,
+ Address(Addr, CGM.getContext().getDeclAlign(VD)),
+ Init->getType().getQualifiers(),
+ /*IsInitializer=*/true);
+ CtorCGF.FinishFunction();
+ Ctor = Fn;
+ ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
+ CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
+ } else {
+ Ctor = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage,
+ llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
+ ID = Ctor;
+ }
+
+ // Register the information for the entry associated with the constructor.
+ Out.clear();
+ OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
+ DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
+ ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
+ }
+ if (VD->getType().isDestructedType() != QualType::DK_none) {
+ llvm::Constant *Dtor;
+ llvm::Constant *ID;
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ // Generate function that emits destructor call for the threadprivate
+ // copy of the variable VD
+ CodeGenFunction DtorCGF(CGM);
+
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
+ FTy, Twine(Buffer, "_dtor"), FI, Loc);
+ auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
+ DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
+ FunctionArgList(), Loc, Loc);
+ // Create a scope with an artificial location for the body of this
+ // function.
+ auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
+ DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
+ ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
+ DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
+ DtorCGF.FinishFunction();
+ Dtor = Fn;
+ ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
+ CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
+ } else {
+ Dtor = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage,
+ llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
+ ID = Dtor;
+ }
+ // Register the information for the entry associated with the destructor.
+ Out.clear();
+ OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
+ DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
+ ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
+ }
+ return CGM.getLangOpts().OpenMPIsDevice;
+}
+
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
- llvm::Twine VarName(Name, ".artificial.");
+ std::string Suffix = getName({"artificial", ""});
+ std::string CacheSuffix = getName({"cache", ""});
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
- llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
+ llvm::Value *GAddr =
+ getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
llvm::Value *Args[] = {
emitUpdateLocation(CGF, SourceLocation()),
getThreadID(CGF, SourceLocation()),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
/*IsSigned=*/false),
- getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
+ getOrCreateInternalVariable(
+ CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
return Address(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitRuntimeCall(
@@ -2496,13 +2767,6 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
CGM.getPointerAlign());
}
-/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
-/// function. Here is the logic:
-/// if (Cond) {
-/// ThenGen();
-/// } else {
-/// ElseGen();
-/// }
void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
const RegionCodeGenTy &ThenGen,
const RegionCodeGenTy &ElseGen) {
@@ -2521,9 +2785,9 @@ void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
// Otherwise, the condition did not fold, or we couldn't elide it. Just
// emit the conditional branch.
- auto ThenBlock = CGF.createBasicBlock("omp_if.then");
- auto ElseBlock = CGF.createBasicBlock("omp_if.else");
- auto ContBlock = CGF.createBasicBlock("omp_if.end");
+ llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
+ llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
+ llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
// Emit the 'then' code.
@@ -2548,11 +2812,11 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
const Expr *IfCond) {
if (!CGF.HaveInsertPoint())
return;
- auto *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
PrePostActionTy &) {
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
- auto &RT = CGF.CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
RTLoc,
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
@@ -2561,13 +2825,13 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
- auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
+ llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
};
auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
PrePostActionTy &) {
- auto &RT = CGF.CGM.getOpenMPRuntime();
- auto ThreadID = RT.getThreadID(CGF, Loc);
+ CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
+ llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
// Build calls:
// __kmpc_serialized_parallel(&Loc, GTid);
llvm::Value *Args[] = {RTLoc, ThreadID};
@@ -2575,13 +2839,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
// OutlinedFn(&GTid, &zero, CapturedStruct);
- auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
- Address ZeroAddr =
- CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ // ThreadId for serialized parallels is 0.
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -2592,9 +2855,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
EndArgs);
};
- if (IfCond)
+ if (IfCond) {
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
- else {
+ } else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
}
@@ -2613,10 +2876,10 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
if (OMPRegionInfo->getThreadIDVariable())
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
- auto ThreadID = getThreadID(CGF, Loc);
- auto Int32Ty =
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ QualType Int32Ty =
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
- auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
+ Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
CGF.EmitStoreOfScalar(ThreadID,
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
@@ -2629,8 +2892,8 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << Name;
- auto RuntimeName = Out.str();
- auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
+ StringRef RuntimeName = Out.str();
+ auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
if (Elem.second) {
assert(Elem.second->getType()->getPointerElementType() == Ty &&
"OMP internal variable has different type than requested");
@@ -2644,8 +2907,9 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
}
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
- llvm::Twine Name(".gomp_critical_user_", CriticalName);
- return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
+ std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
+ std::string Name = getName({Prefix, "var"});
+ return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
}
namespace {
@@ -2779,21 +3043,28 @@ static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
static llvm::Value *emitCopyprivateCopyFunction(
CodeGenModule &CGM, llvm::Type *ArgsType,
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
- ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
- auto &C = CGM.getContext();
+ ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
+ SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *Fn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- ".omp.copyprivate.copy_func", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+ const auto &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ std::string Name =
+ CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
+ auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
+ llvm::GlobalValue::InternalLinkage, Name,
+ &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
// Dest = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
@@ -2807,13 +3078,15 @@ static llvm::Value *emitCopyprivateCopyFunction(
// ...
// *(Typen*)Dst[n] = *(Typen*)Src[n];
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
- auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
+ const auto *DestVar =
+ cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
- auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
+ const auto *SrcVar =
+ cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
- auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
+ const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
QualType Type = VD->getType();
CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
}
@@ -2833,7 +3106,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
assert(CopyprivateVars.size() == SrcExprs.size() &&
CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size());
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
// int32 did_it = 0;
// if(__kmpc_single(ident_t *, gtid)) {
// SingleOpGen();
@@ -2846,7 +3119,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
Address DidIt = Address::invalid();
if (!CopyprivateVars.empty()) {
// int32 did_it = 0;
- auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ QualType KmpInt32Ty =
+ C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
}
@@ -2866,7 +3140,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
// <copy_func>, did_it);
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
- auto CopyprivateArrayTy =
+ QualType CopyprivateArrayTy =
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
@@ -2882,14 +3156,14 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
}
// Build function that copies private values from single region to all other
// threads in the corresponding parallel region.
- auto *CpyFn = emitCopyprivateCopyFunction(
+ llvm::Value *CpyFn = emitCopyprivateCopyFunction(
CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
- CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
- auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
+ CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
+ llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
Address CL =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
CGF.VoidPtrTy);
- auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
+ llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
getThreadID(CGF, Loc), // i32 <gtid>
@@ -2948,19 +3222,19 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
- auto *Result = CGF.EmitRuntimeCall(
+ llvm::Value *Result = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
if (EmitChecks) {
// if (__kmpc_cancel_barrier()) {
// exit from construct;
// }
- auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
- auto *ContBB = CGF.createBasicBlock(".cancel.continue");
- auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
+ llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
+ llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
CGF.EmitBlock(ExitBB);
// exit from construct;
- auto CancelDestination =
+ CodeGenFunction::JumpDest CancelDestination =
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
CGF.EmitBranchThroughCleanup(CancelDestination);
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
@@ -2971,7 +3245,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
}
-/// \brief Map the OpenMP loop schedule to the runtime enumeration.
+/// Map the OpenMP loop schedule to the runtime enumeration.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked, bool Ordered) {
switch (ScheduleKind) {
@@ -2993,7 +3267,7 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
llvm_unreachable("Unexpected runtime schedule");
}
-/// \brief Map the OpenMP distribute schedule to the runtime enumeration.
+/// Map the OpenMP distribute schedule to the runtime enumeration.
static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
// only static is allowed for dist_schedule
@@ -3002,19 +3276,20 @@ getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked) const {
- auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
return Schedule == OMP_sch_static;
}
bool CGOpenMPRuntime::isStaticNonchunked(
OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
- auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
return Schedule == OMP_dist_sch_static;
}
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
- auto Schedule =
+ OpenMPSchedType Schedule =
getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
return Schedule != OMP_sch_static;
@@ -3147,12 +3422,12 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
assert(isOpenMPWorksharingDirective(DKind) &&
"Expected loop-based or sections-based directive.");
- auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
+ llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
isOpenMPLoopDirective(DKind)
? OMP_IDENT_WORK_LOOP
: OMP_IDENT_WORK_SECTIONS);
- auto *ThreadId = getThreadID(CGF, Loc);
- auto *StaticInitFunction =
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
+ llvm::Constant *StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
@@ -3164,10 +3439,10 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
const CGOpenMPRuntime::StaticRTInput &Values) {
OpenMPSchedType ScheduleNum =
getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
- auto *UpdatedLocation =
+ llvm::Value *UpdatedLocation =
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
- auto *ThreadId = getThreadID(CGF, Loc);
- auto *StaticInitFunction =
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
+ llvm::Constant *StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
@@ -3223,7 +3498,7 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
llvm::Value *Call =
CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
return CGF.EmitScalarConversion(
- Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
+ Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
CGF.getContext().BoolTy, Loc);
}
@@ -3285,13 +3560,13 @@ void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
}
namespace {
-/// \brief Indexes of fields for type kmp_task_t.
+/// Indexes of fields for type kmp_task_t.
enum KmpTaskTFields {
- /// \brief List of shared variables.
+ /// List of shared variables.
KmpTaskTShareds,
- /// \brief Task routine.
+ /// Task routine.
KmpTaskTRoutine,
- /// \brief Partition id for the untied tasks.
+ /// Partition id for the untied tasks.
KmpTaskTPartId,
/// Function with call of destructors for private variables.
Data1,
@@ -3311,11 +3586,11 @@ enum KmpTaskTFields {
} // anonymous namespace
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
- // FIXME: Add other entries type when they become supported.
- return OffloadEntriesTargetRegion.empty();
+ return OffloadEntriesTargetRegion.empty() &&
+ OffloadEntriesDeviceGlobalVar.empty();
}
-/// \brief Initialize target region entry.
+/// Initialize target region entry.
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum,
@@ -3325,7 +3600,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
"code generation.");
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
- /*Flags=*/0);
+ OMPTargetRegionEntryTargetRegion);
++OffloadingEntriesNum;
}
@@ -3333,22 +3608,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum,
llvm::Constant *Addr, llvm::Constant *ID,
- int32_t Flags) {
+ OMPTargetRegionEntryKind Flags) {
// If we are emitting code for a target, the entry is already initialized,
// only has to be registered.
if (CGM.getLangOpts().OpenMPIsDevice) {
- assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
- "Entry must exist.");
+ if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "Unable to find target region on line '%0' in the device code.");
+ CGM.getDiags().Report(DiagID) << LineNum;
+ return;
+ }
auto &Entry =
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
assert(Entry.isValid() && "Entry not initialized!");
Entry.setAddress(Addr);
Entry.setID(ID);
Entry.setFlags(Flags);
- return;
} else {
- OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
+ OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
+ ++OffloadingEntriesNum;
}
}
@@ -3376,48 +3656,69 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
const OffloadTargetRegionEntryInfoActTy &Action) {
// Scan all target region entries and perform the provided action.
- for (auto &D : OffloadEntriesTargetRegion)
- for (auto &F : D.second)
- for (auto &P : F.second)
- for (auto &L : P.second)
+ for (const auto &D : OffloadEntriesTargetRegion)
+ for (const auto &F : D.second)
+ for (const auto &P : F.second)
+ for (const auto &L : P.second)
Action(D.first, F.first, P.first(), L.first, L.second);
}
-/// \brief Create a Ctor/Dtor-like function whose body is emitted through
-/// \a Codegen. This is used to emit the two functions that register and
-/// unregister the descriptor of the current compilation unit.
-static llvm::Function *
-createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
- const RegionCodeGenTy &Codegen) {
- auto &C = CGM.getContext();
- FunctionArgList Args;
- ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- Args.push_back(&DummyPtr);
+void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
+ initializeDeviceGlobalVarEntryInfo(StringRef Name,
+ OMPTargetGlobalVarEntryKind Flags,
+ unsigned Order) {
+ assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
+ "only required for the device "
+ "code generation.");
+ OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
+ ++OffloadingEntriesNum;
+}
- CodeGenFunction CGF(CGM);
- auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto FTy = CGM.getTypes().GetFunctionType(FI);
- auto *Fn =
- CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
- Codegen(CGF);
- CGF.FinishFunction();
- return Fn;
+void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
+ registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
+ CharUnits VarSize,
+ OMPTargetGlobalVarEntryKind Flags,
+ llvm::GlobalValue::LinkageTypes Linkage) {
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
+ assert(Entry.isValid() && Entry.getFlags() == Flags &&
+ "Entry not initialized!");
+ assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
+ "Resetting with the new address.");
+ if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
+ return;
+ Entry.setAddress(Addr);
+ Entry.setVarSize(VarSize);
+ Entry.setLinkage(Linkage);
+ } else {
+ if (hasDeviceGlobalVarEntryInfo(VarName))
+ return;
+ OffloadEntriesDeviceGlobalVar.try_emplace(
+ VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
+ ++OffloadingEntriesNum;
+ }
+}
+
+void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
+ actOnDeviceGlobalVarEntriesInfo(
+ const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
+ // Scan all target region entries and perform the provided action.
+ for (const auto &E : OffloadEntriesDeviceGlobalVar)
+ Action(E.getKey(), E.getValue());
}
llvm::Function *
CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
-
// If we don't have entries or if we are emitting code for the device, we
// don't need to do anything.
if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
return nullptr;
- auto &M = CGM.getModule();
- auto &C = CGM.getContext();
+ llvm::Module &M = CGM.getModule();
+ ASTContext &C = CGM.getContext();
// Get list of devices we care about
- auto &Devices = CGM.getLangOpts().OMPTargetTriples;
+ const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
// We should be creating an offloading descriptor only if there are devices
// specified.
@@ -3425,46 +3726,49 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
// Create the external variables that will point to the begin and end of the
// host entries section. These will be defined by the linker.
- auto *OffloadEntryTy =
+ llvm::Type *OffloadEntryTy =
CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
- llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
- M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
- ".omp_offloading.entries_begin");
- llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
+ std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
+ auto *HostEntriesBegin = new llvm::GlobalVariable(
M, OffloadEntryTy, /*isConstant=*/true,
llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
- ".omp_offloading.entries_end");
+ EntriesBeginName);
+ std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
+ auto *HostEntriesEnd =
+ new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
+ llvm::GlobalValue::ExternalLinkage,
+ /*Initializer=*/nullptr, EntriesEndName);
// Create all device images
auto *DeviceImageTy = cast<llvm::StructType>(
CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
ConstantInitBuilder DeviceImagesBuilder(CGM);
- auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
+ ConstantArrayBuilder DeviceImagesEntries =
+ DeviceImagesBuilder.beginArray(DeviceImageTy);
- for (unsigned i = 0; i < Devices.size(); ++i) {
- StringRef T = Devices[i].getTriple();
+ for (const llvm::Triple &Device : Devices) {
+ StringRef T = Device.getTriple();
+ std::string BeginName = getName({"omp_offloading", "img_start", ""});
auto *ImgBegin = new llvm::GlobalVariable(
M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr,
- Twine(".omp_offloading.img_start.") + Twine(T));
+ /*Initializer=*/nullptr, Twine(BeginName).concat(T));
+ std::string EndName = getName({"omp_offloading", "img_end", ""});
auto *ImgEnd = new llvm::GlobalVariable(
M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
+ /*Initializer=*/nullptr, Twine(EndName).concat(T));
- auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
- Dev.add(ImgBegin);
- Dev.add(ImgEnd);
- Dev.add(HostEntriesBegin);
- Dev.add(HostEntriesEnd);
- Dev.finishAndAddTo(DeviceImagesEntries);
+ llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
+ HostEntriesEnd};
+ createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
+ DeviceImagesEntries);
}
// Create device images global array.
+ std::string ImagesName = getName({"omp_offloading", "device_images"});
llvm::GlobalVariable *DeviceImages =
- DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
- CGM.getPointerAlign(),
- /*isConstant=*/true);
+ DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
+ CGM.getPointerAlign(),
+ /*isConstant=*/true);
DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// This is a Zero array to be used in the creation of the constant expressions
@@ -3472,49 +3776,64 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
llvm::Constant::getNullValue(CGM.Int32Ty)};
// Create the target region descriptor.
- auto *BinaryDescriptorTy = cast<llvm::StructType>(
- CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
- ConstantInitBuilder DescBuilder(CGM);
- auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
- DescInit.addInt(CGM.Int32Ty, Devices.size());
- DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
- DeviceImages,
- Index));
- DescInit.add(HostEntriesBegin);
- DescInit.add(HostEntriesEnd);
-
- auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
- CGM.getPointerAlign(),
- /*isConstant=*/true);
+ llvm::Constant *Data[] = {
+ llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
+ llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
+ DeviceImages, Index),
+ HostEntriesBegin, HostEntriesEnd};
+ std::string Descriptor = getName({"omp_offloading", "descriptor"});
+ llvm::GlobalVariable *Desc = createConstantGlobalStruct(
+ CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor);
// Emit code to register or unregister the descriptor at execution
// startup or closing, respectively.
- // Create a variable to drive the registration and unregistration of the
- // descriptor, so we can reuse the logic that emits Ctors and Dtors.
- auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
- ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
- IdentInfo, C.CharTy, ImplicitParamDecl::Other);
-
- auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
- CGM, ".omp_offloading.descriptor_unreg",
- [&](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
- Desc);
- });
- auto *RegFn = createOffloadingBinaryDescriptorFunction(
- CGM, ".omp_offloading.descriptor_reg",
- [&](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib),
- Desc);
- CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
- });
+ llvm::Function *UnRegFn;
+ {
+ FunctionArgList Args;
+ ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.push_back(&DummyPtr);
+
+ CodeGenFunction CGF(CGM);
+ // Disable debug info for global (de-)initializer because they are not part
+ // of some particular construct.
+ CGF.disableDebugInfo();
+ const auto &FI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
+ UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
+ Desc);
+ CGF.FinishFunction();
+ }
+ llvm::Function *RegFn;
+ {
+ CodeGenFunction CGF(CGM);
+ // Disable debug info for global (de-)initializer because they are not part
+ // of some particular construct.
+ CGF.disableDebugInfo();
+ const auto &FI = CGM.getTypes().arrangeNullaryFunction();
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ std::string Descriptor = getName({"omp_offloading", "descriptor_reg"});
+ RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
+ // Create a variable to drive the registration and unregistration of the
+ // descriptor, so we can reuse the logic that emits Ctors and Dtors.
+ ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
+ SourceLocation(), nullptr, C.CharTy,
+ ImplicitParamDecl::Other);
+ CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
+ CGF.FinishFunction();
+ }
if (CGM.supportsCOMDAT()) {
// It is sufficient to call registration function only once, so create a
// COMDAT group for registration/unregistration functions and associated
// data. That would reduce startup time and code size. Registration
// function serves as a COMDAT group key.
- auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
+ llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
RegFn->setComdat(ComdatKey);
@@ -3525,48 +3844,35 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
return RegFn;
}
-void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
- llvm::Constant *Addr, uint64_t Size,
- int32_t Flags) {
+void CGOpenMPRuntime::createOffloadEntry(
+ llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
+ llvm::GlobalValue::LinkageTypes Linkage) {
StringRef Name = Addr->getName();
- auto *TgtOffloadEntryType = cast<llvm::StructType>(
- CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
- llvm::LLVMContext &C = CGM.getModule().getContext();
llvm::Module &M = CGM.getModule();
-
- // Make sure the address has the right type.
- llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
+ llvm::LLVMContext &C = M.getContext();
// Create constant string with the name.
llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
- llvm::GlobalVariable *Str =
- new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
- llvm::GlobalValue::InternalLinkage, StrPtrInit,
- ".omp_offloading.entry_name");
+ std::string StringName = getName({"omp_offloading", "entry_name"});
+ auto *Str = new llvm::GlobalVariable(
+ M, StrPtrInit->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
-
- // We can't have any padding between symbols, so we need to have 1-byte
- // alignment.
- auto Align = CharUnits::fromQuantity(1);
-
- // Create the entry struct.
- ConstantInitBuilder EntryBuilder(CGM);
- auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
- EntryInit.add(AddrPtr);
- EntryInit.add(StrPtr);
- EntryInit.addInt(CGM.SizeTy, Size);
- EntryInit.addInt(CGM.Int32Ty, Flags);
- EntryInit.addInt(CGM.Int32Ty, 0);
- llvm::GlobalVariable *Entry =
- EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
- Align,
- /*constant*/ true,
- llvm::GlobalValue::ExternalLinkage);
+
+ llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
+ llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
+ llvm::ConstantInt::get(CGM.SizeTy, Size),
+ llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+ llvm::ConstantInt::get(CGM.Int32Ty, 0)};
+ std::string EntryName = getName({"omp_offloading", "entry", ""});
+ llvm::GlobalVariable *Entry = createConstantGlobalStruct(
+ CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name),
+ llvm::GlobalValue::WeakAnyLinkage);
// The entry has to be created in the section the linker expects it to be.
- Entry->setSection(".omp_offloading.entries");
+ std::string Section = getName({"omp_offloading", "entries"});
+ Entry->setSection(Section);
}
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
@@ -3579,71 +3885,142 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Right now we only generate metadata for function that contain target
// regions.
- // If we do not have entries, we dont need to do anything.
+ // If we do not have entries, we don't need to do anything.
if (OffloadEntriesInfoManager.empty())
return;
llvm::Module &M = CGM.getModule();
llvm::LLVMContext &C = M.getContext();
- SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
+ SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
OrderedEntries(OffloadEntriesInfoManager.size());
- // Create the offloading info metadata node.
- llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
-
// Auxiliary methods to create metadata values and strings.
- auto getMDInt = [&](unsigned v) {
+ auto &&GetMDInt = [this](unsigned V) {
return llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
+ llvm::ConstantInt::get(CGM.Int32Ty, V));
};
- auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
+ auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
+
+ // Create the offloading info metadata node.
+ llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
// Create function that emits metadata for each target region entry;
- auto &&TargetRegionMetadataEmitter = [&](
- unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
- OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
- llvm::SmallVector<llvm::Metadata *, 32> Ops;
- // Generate metadata for target regions. Each entry of this metadata
- // contains:
- // - Entry 0 -> Kind of this type of metadata (0).
- // - Entry 1 -> Device ID of the file where the entry was identified.
- // - Entry 2 -> File ID of the file where the entry was identified.
- // - Entry 3 -> Mangled name of the function where the entry was identified.
- // - Entry 4 -> Line in the file where the entry was identified.
- // - Entry 5 -> Order the entry was created.
- // The first element of the metadata node is the kind.
- Ops.push_back(getMDInt(E.getKind()));
- Ops.push_back(getMDInt(DeviceID));
- Ops.push_back(getMDInt(FileID));
- Ops.push_back(getMDString(ParentName));
- Ops.push_back(getMDInt(Line));
- Ops.push_back(getMDInt(E.getOrder()));
-
- // Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
-
- // Add metadata to the named metadata node.
- MD->addOperand(llvm::MDNode::get(C, Ops));
- };
+ auto &&TargetRegionMetadataEmitter =
+ [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
+ unsigned DeviceID, unsigned FileID, StringRef ParentName,
+ unsigned Line,
+ const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
+ // Generate metadata for target regions. Each entry of this metadata
+ // contains:
+ // - Entry 0 -> Kind of this type of metadata (0).
+ // - Entry 1 -> Device ID of the file where the entry was identified.
+ // - Entry 2 -> File ID of the file where the entry was identified.
+ // - Entry 3 -> Mangled name of the function where the entry was
+ // identified.
+ // - Entry 4 -> Line in the file where the entry was identified.
+ // - Entry 5 -> Order the entry was created.
+ // The first element of the metadata node is the kind.
+ llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
+ GetMDInt(FileID), GetMDString(ParentName),
+ GetMDInt(Line), GetMDInt(E.getOrder())};
+
+ // Save this entry in the right position of the ordered entries array.
+ OrderedEntries[E.getOrder()] = &E;
+
+ // Add metadata to the named metadata node.
+ MD->addOperand(llvm::MDNode::get(C, Ops));
+ };
OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
TargetRegionMetadataEmitter);
- for (auto *E : OrderedEntries) {
+ // Create function that emits metadata for each device global variable entry;
+ auto &&DeviceGlobalVarMetadataEmitter =
+ [&C, &OrderedEntries, &GetMDInt, &GetMDString,
+ MD](StringRef MangledName,
+ const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
+ &E) {
+ // Generate metadata for global variables. Each entry of this metadata
+ // contains:
+ // - Entry 0 -> Kind of this type of metadata (1).
+ // - Entry 1 -> Mangled name of the variable.
+ // - Entry 2 -> Declare target kind.
+ // - Entry 3 -> Order the entry was created.
+ // The first element of the metadata node is the kind.
+ llvm::Metadata *Ops[] = {
+ GetMDInt(E.getKind()), GetMDString(MangledName),
+ GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
+
+ // Save this entry in the right position of the ordered entries array.
+ OrderedEntries[E.getOrder()] = &E;
+
+ // Add metadata to the named metadata node.
+ MD->addOperand(llvm::MDNode::get(C, Ops));
+ };
+
+ OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
+ DeviceGlobalVarMetadataEmitter);
+
+ for (const auto *E : OrderedEntries) {
assert(E && "All ordered entries must exist!");
- if (auto *CE =
+ if (const auto *CE =
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
E)) {
- assert(CE->getID() && CE->getAddress() &&
- "Entry ID and Addr are invalid!");
- createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
- } else
+ if (!CE->getID() || !CE->getAddress()) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "Offloading entry for target region is incorrect: either the "
+ "address or the ID is invalid.");
+ CGM.getDiags().Report(DiagID);
+ continue;
+ }
+ createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
+ CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
+ } else if (const auto *CE =
+ dyn_cast<OffloadEntriesInfoManagerTy::
+ OffloadEntryInfoDeviceGlobalVar>(E)) {
+ OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
+ static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
+ CE->getFlags());
+ switch (Flags) {
+ case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
+ if (!CE->getAddress()) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "Offloading entry for declare target variable is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(DiagID);
+ continue;
+ }
+ break;
+ }
+ case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
+ assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
+ (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
+ "Declaret target link address is set.");
+ if (CGM.getLangOpts().OpenMPIsDevice)
+ continue;
+ if (!CE->getAddress()) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "Offloading entry for declare target variable is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(DiagID);
+ continue;
+ }
+ break;
+ }
+ createOffloadEntry(CE->getAddress(), CE->getAddress(),
+ CE->getVarSize().getQuantity(), Flags,
+ CE->getLinkage());
+ } else {
llvm_unreachable("Unsupported entry kind.");
+ }
}
}
-/// \brief Loads all the offload entries information from the host IR
+/// Loads all the offload entries information from the host IR
/// metadata.
void CGOpenMPRuntime::loadOffloadInfoMetadata() {
// If we are in target mode, load the metadata from the host IR. This code has
@@ -3656,44 +4033,57 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() {
return;
auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
- if (Buf.getError())
+ if (auto EC = Buf.getError()) {
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << CGM.getLangOpts().OMPHostIRFile << EC.message();
return;
+ }
llvm::LLVMContext C;
auto ME = expectedToErrorOrAndEmitErrors(
C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
- if (ME.getError())
+ if (auto EC = ME.getError()) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
+ CGM.getDiags().Report(DiagID)
+ << CGM.getLangOpts().OMPHostIRFile << EC.message();
return;
+ }
llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
if (!MD)
return;
- for (auto I : MD->operands()) {
- llvm::MDNode *MN = cast<llvm::MDNode>(I);
-
- auto getMDInt = [&](unsigned Idx) {
- llvm::ConstantAsMetadata *V =
- cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
+ for (llvm::MDNode *MN : MD->operands()) {
+ auto &&GetMDInt = [MN](unsigned Idx) {
+ auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
};
- auto getMDString = [&](unsigned Idx) {
- llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
+ auto &&GetMDString = [MN](unsigned Idx) {
+ auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
return V->getString();
};
- switch (getMDInt(0)) {
+ switch (GetMDInt(0)) {
default:
llvm_unreachable("Unexpected metadata!");
break;
case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
- OFFLOAD_ENTRY_INFO_TARGET_REGION:
+ OffloadingEntryInfoTargetRegion:
OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
- /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
- /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
- /*Order=*/getMDInt(5));
+ /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
+ /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
+ /*Order=*/GetMDInt(5));
+ break;
+ case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
+ OffloadingEntryInfoDeviceGlobalVar:
+ OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
+ /*MangledName=*/GetMDString(1),
+ static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
+ /*Flags=*/GetMDInt(2)),
+ /*Order=*/GetMDInt(3));
break;
}
}
@@ -3702,7 +4092,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() {
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
if (!KmpRoutineEntryPtrTy) {
// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
FunctionProtoType::ExtProtoInfo EPI;
KmpRoutineEntryPtrQTy = C.getPointerType(
@@ -3711,19 +4101,7 @@ void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
}
}
-static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
- QualType FieldTy) {
- auto *Field = FieldDecl::Create(
- C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
- C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- DC->addDecl(Field);
- return Field;
-}
-
QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
-
// Make sure the type of the entry is already created. This is the type we
// have to create:
// struct __tgt_offload_entry{
@@ -3736,7 +4114,7 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
// };
if (TgtOffloadEntryQTy.isNull()) {
ASTContext &C = CGM.getContext();
- auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
+ RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
RD->startDefinition();
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
@@ -3746,6 +4124,7 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
addFieldToRecordDecl(
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
RD->completeDefinition();
+ RD->addAttr(PackedAttr::CreateImplicit(C));
TgtOffloadEntryQTy = C.getRecordType(RD);
}
return TgtOffloadEntryQTy;
@@ -3765,7 +4144,7 @@ QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
// };
if (TgtDeviceImageQTy.isNull()) {
ASTContext &C = CGM.getContext();
- auto *RD = C.buildImplicitRecord("__tgt_device_image");
+ RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
RD->startDefinition();
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
@@ -3789,7 +4168,7 @@ QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
// };
if (TgtBinaryDescriptorQTy.isNull()) {
ASTContext &C = CGM.getContext();
- auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
+ RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
RD->startDefinition();
addFieldToRecordDecl(
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
@@ -3818,17 +4197,16 @@ typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
static RecordDecl *
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
if (!Privates.empty()) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
// Build struct .kmp_privates_t. {
// /* private vars */
// };
- auto *RD = C.buildImplicitRecord(".kmp_privates.t");
+ RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
RD->startDefinition();
- for (auto &&Pair : Privates) {
- auto *VD = Pair.second.Original;
- auto Type = VD->getType();
- Type = Type.getNonReferenceType();
- auto *FD = addFieldToRecordDecl(C, RD, Type);
+ for (const auto &Pair : Privates) {
+ const VarDecl *VD = Pair.second.Original;
+ QualType Type = VD->getType().getNonReferenceType();
+ FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
if (VD->hasAttrs()) {
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
E(VD->getAttrs().end());
@@ -3846,7 +4224,7 @@ static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
QualType KmpInt32Ty,
QualType KmpRoutineEntryPointerQTy) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
// Build struct kmp_task_t {
// void * shareds;
// kmp_routine_entry_t routine;
@@ -3860,13 +4238,13 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
// kmp_int32 liter;
// void * reductions;
// };
- auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
+ RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
UD->startDefinition();
addFieldToRecordDecl(C, UD, KmpInt32Ty);
addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
UD->completeDefinition();
QualType KmpCmplrdataTy = C.getRecordType(UD);
- auto *RD = C.buildImplicitRecord("kmp_task_t");
+ RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
RD->startDefinition();
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
@@ -3891,22 +4269,21 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
ArrayRef<PrivateDataTy> Privates) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
// Build struct kmp_task_t_with_privates {
// kmp_task_t task_data;
// .kmp_privates_t. privates;
// };
- auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
+ RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
RD->startDefinition();
addFieldToRecordDecl(C, RD, KmpTaskTQTy);
- if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
+ if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
- }
RD->completeDefinition();
return RD;
}
-/// \brief Emit a proxy function which accepts kmp_task_t as the second
+/// Emit a proxy function which accepts kmp_task_t as the second
/// argument.
/// \code
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
@@ -3924,7 +4301,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
QualType SharedsPtrTy, llvm::Value *TaskFunction,
llvm::Value *TaskPrivatesMap) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
ImplicitParamDecl::Other);
@@ -3933,49 +4310,53 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
- auto &TaskEntryFnInfo =
+ const auto &TaskEntryFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
- auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
- auto *TaskEntry =
- llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
- ".omp_task_entry.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
+ llvm::FunctionType *TaskEntryTy =
+ CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+ std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
+ auto *TaskEntry = llvm::Function::Create(
+ TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
+ TaskEntry->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
+ Loc, Loc);
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
// tt,
// For taskloops:
// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
// tt->task_data.shareds);
- auto *GtidParam = CGF.EmitLoadOfScalar(
+ llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
LValue TDBase = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(&TaskTypeArg),
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
- auto *KmpTaskTWithPrivatesQTyRD =
+ const auto *KmpTaskTWithPrivatesQTyRD =
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
LValue Base =
CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
- auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
+ const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
- auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
- auto *PartidParam = PartIdLVal.getPointer();
+ LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
+ llvm::Value *PartidParam = PartIdLVal.getPointer();
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
- auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
- auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
+ LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
+ llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(SharedsLVal, Loc),
CGF.ConvertTypeForMem(SharedsPtrTy));
auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
llvm::Value *PrivatesParam;
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
- auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
+ LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivatesLVal.getPointer(), CGF.VoidPtrTy);
- } else
+ } else {
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ }
llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
TaskPrivatesMap,
@@ -3987,20 +4368,20 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
std::end(CommonArgs));
if (isOpenMPTaskLoopDirective(Kind)) {
auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
- auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
- auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
+ LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
+ llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
- auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
- auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
+ LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
+ llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
- auto StLVal = CGF.EmitLValueForField(Base, *StFI);
- auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
+ LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
+ llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
- auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
- auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+ LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
+ llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
- auto RLVal = CGF.EmitLValueForField(Base, *RFI);
- auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
+ LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
+ llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
CallArgs.push_back(LBParam);
CallArgs.push_back(UBParam);
CallArgs.push_back(StParam);
@@ -4011,9 +4392,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
CallArgs);
- CGF.EmitStoreThroughLValue(
- RValue::get(CGF.Builder.getInt32(/*C=*/0)),
- CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
+ CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
+ CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
CGF.FinishFunction();
return TaskEntry;
}
@@ -4023,7 +4403,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
QualType KmpInt32Ty,
QualType KmpTaskTWithPrivatesPtrQTy,
QualType KmpTaskTWithPrivatesQTy) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
ImplicitParamDecl::Other);
@@ -4032,30 +4412,34 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
- auto &DestructorFnInfo =
+ const auto &DestructorFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
- auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
+ llvm::FunctionType *DestructorFnTy =
+ CGM.getTypes().GetFunctionType(DestructorFnInfo);
+ std::string Name =
+ CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
auto *DestructorFn =
llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
- ".omp_task_destructor.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
DestructorFnInfo);
+ DestructorFn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
- Args);
+ Args, Loc, Loc);
LValue Base = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(&TaskTypeArg),
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
- auto *KmpTaskTWithPrivatesQTyRD =
+ const auto *KmpTaskTWithPrivatesQTyRD =
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
Base = CGF.EmitLValueForField(Base, *FI);
- for (auto *Field :
+ for (const auto *Field :
cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
- if (auto DtorKind = Field->getType().isDestructedType()) {
- auto FieldLValue = CGF.EmitLValueForField(Base, Field);
+ if (QualType::DestructionKind DtorKind =
+ Field->getType().isDestructedType()) {
+ LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
}
}
@@ -4063,7 +4447,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
return DestructorFn;
}
-/// \brief Emit a privates mapping function for correct handling of private and
+/// Emit a privates mapping function for correct handling of private and
/// firstprivate variables.
/// \code
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
@@ -4080,7 +4464,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
ArrayRef<const Expr *> LastprivateVars,
QualType PrivatesQTy,
ArrayRef<PrivateDataTy> Privates) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl TaskPrivatesArg(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
@@ -4089,67 +4473,69 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
Args.push_back(&TaskPrivatesArg);
llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
unsigned Counter = 1;
- for (auto *E: PrivateVars) {
+ for (const Expr *E : PrivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
.withConst()
.withRestrict(),
ImplicitParamDecl::Other));
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
- for (auto *E : FirstprivateVars) {
+ for (const Expr *E : FirstprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
.withConst()
.withRestrict(),
ImplicitParamDecl::Other));
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
- for (auto *E: LastprivateVars) {
+ for (const Expr *E : LastprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
.withConst()
.withRestrict(),
ImplicitParamDecl::Other));
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
- auto &TaskPrivatesMapFnInfo =
+ const auto &TaskPrivatesMapFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *TaskPrivatesMapTy =
+ llvm::FunctionType *TaskPrivatesMapTy =
CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
+ std::string Name =
+ CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
auto *TaskPrivatesMap = llvm::Function::Create(
- TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
- ".omp_task_privates_map.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
+ TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
+ &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
TaskPrivatesMapFnInfo);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
- TaskPrivatesMapFnInfo, Args);
+ TaskPrivatesMapFnInfo, Args, Loc, Loc);
// *privi = &.privates.privi;
LValue Base = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
TaskPrivatesArg.getType()->castAs<PointerType>());
- auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
+ const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
Counter = 0;
- for (auto *Field : PrivatesQTyRD->fields()) {
- auto FieldLVal = CGF.EmitLValueForField(Base, Field);
- auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
- auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
- auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
+ for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
+ LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
+ const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
+ LValue RefLVal =
+ CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+ LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
++Counter;
@@ -4171,9 +4557,14 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
QualType SharedsTy, QualType SharedsPtrTy,
const OMPTaskDataTy &Data,
ArrayRef<PrivateDataTy> Privates, bool ForDup) {
- auto &C = CGF.getContext();
+ ASTContext &C = CGF.getContext();
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
+ OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
+ ? OMPD_taskloop
+ : OMPD_task;
+ const CapturedStmt &CS = *D.getCapturedStmt(Kind);
+ CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
LValue SrcBase;
bool IsTargetTask =
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
@@ -4182,40 +4573,38 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// PointersArray and SizesArray. The original variables for these arrays are
// not captured and we get their addresses explicitly.
if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
- (IsTargetTask && Data.FirstprivateVars.size() > 3)) {
+ (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
SrcBase = CGF.MakeAddrLValue(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
SharedsTy);
}
- OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
- ? OMPD_taskloop
- : OMPD_task;
- CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind));
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
- for (auto &&Pair : Privates) {
- auto *VD = Pair.second.PrivateCopy;
- auto *Init = VD->getAnyInitializer();
+ for (const PrivateDataTy &Pair : Privates) {
+ const VarDecl *VD = Pair.second.PrivateCopy;
+ const Expr *Init = VD->getAnyInitializer();
if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
!CGF.isTrivialInitializer(Init)))) {
LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
- if (auto *Elem = Pair.second.PrivateElemInit) {
- auto *OriginalVD = Pair.second.Original;
+ if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
+ const VarDecl *OriginalVD = Pair.second.Original;
// Check if the variable is the target-based BasePointersArray,
// PointersArray or SizesArray.
LValue SharedRefLValue;
QualType Type = OriginalVD->getType();
- if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) &&
- isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
- cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() ==
- 0 &&
- isa<TranslationUnitDecl>(
- cast<CapturedDecl>(OriginalVD->getDeclContext())
- ->getDeclContext())) {
+ const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
+ if (IsTargetTask && !SharedField) {
+ assert(isa<ImplicitParamDecl>(OriginalVD) &&
+ isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
+ cast<CapturedDecl>(OriginalVD->getDeclContext())
+ ->getNumParams() == 0 &&
+ isa<TranslationUnitDecl>(
+ cast<CapturedDecl>(OriginalVD->getDeclContext())
+ ->getDeclContext()) &&
+ "Expected artificial target data variable.");
SharedRefLValue =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
} else {
- auto *SharedField = CapturesInfo.lookup(OriginalVD);
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
SharedRefLValue = CGF.MakeAddrLValue(
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
@@ -4226,8 +4615,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// Initialize firstprivate array.
if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
// Perform simple memcpy.
- CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
- SharedRefLValue.getAddress(), Type);
+ CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
} else {
// Initialize firstprivate array using element-by-element
// initialization.
@@ -4258,8 +4646,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
CGF.EmitExprAsInit(Init, VD, PrivateLValue,
/*capturedByInit=*/false);
}
- } else
+ } else {
CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
+ }
}
++FI;
}
@@ -4269,11 +4658,13 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
static bool checkInitIsRequired(CodeGenFunction &CGF,
ArrayRef<PrivateDataTy> Privates) {
bool InitRequired = false;
- for (auto &&Pair : Privates) {
- auto *VD = Pair.second.PrivateCopy;
- auto *Init = VD->getAnyInitializer();
+ for (const PrivateDataTy &Pair : Privates) {
+ const VarDecl *VD = Pair.second.PrivateCopy;
+ const Expr *Init = VD->getAnyInitializer();
InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
!CGF.isTrivialInitializer(Init));
+ if (InitRequired)
+ break;
}
return InitRequired;
}
@@ -4297,7 +4688,7 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
QualType SharedsPtrTy, const OMPTaskDataTy &Data,
ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
KmpTaskTWithPrivatesPtrQTy,
@@ -4310,16 +4701,17 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
Args.push_back(&DstArg);
Args.push_back(&SrcArg);
Args.push_back(&LastprivArg);
- auto &TaskDupFnInfo =
+ const auto &TaskDupFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
- auto *TaskDup =
- llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
- ".omp_task_dup.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
+ llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
+ std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
+ auto *TaskDup = llvm::Function::Create(
+ TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
+ TaskDup->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
+ Loc);
LValue TDBase = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(&DstArg),
@@ -4362,9 +4754,9 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
static bool
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
bool NeedsCleanup = false;
- auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
- auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
- for (auto *FD : PrivateRD->fields()) {
+ auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
+ const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
+ for (const FieldDecl *FD : PrivateRD->fields()) {
NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
if (NeedsCleanup)
break;
@@ -4377,41 +4769,41 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
llvm::Value *TaskFunction, QualType SharedsTy,
Address Shareds, const OMPTaskDataTy &Data) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
llvm::SmallVector<PrivateDataTy, 4> Privates;
// Aggregate privates and sort them by the alignment.
auto I = Data.PrivateCopies.begin();
- for (auto *E : Data.PrivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- Privates.push_back(std::make_pair(
+ for (const Expr *E : Data.PrivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ Privates.emplace_back(
C.getDeclAlign(VD),
PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
- /*PrivateElemInit=*/nullptr)));
+ /*PrivateElemInit=*/nullptr));
++I;
}
I = Data.FirstprivateCopies.begin();
auto IElemInitRef = Data.FirstprivateInits.begin();
- for (auto *E : Data.FirstprivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- Privates.push_back(std::make_pair(
+ for (const Expr *E : Data.FirstprivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ Privates.emplace_back(
C.getDeclAlign(VD),
PrivateHelpersTy(
VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
- cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
+ cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
++I;
++IElemInitRef;
}
I = Data.LastprivateCopies.begin();
- for (auto *E : Data.LastprivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- Privates.push_back(std::make_pair(
+ for (const Expr *E : Data.LastprivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ Privates.emplace_back(
C.getDeclAlign(VD),
PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
- /*PrivateElemInit=*/nullptr)));
+ /*PrivateElemInit=*/nullptr));
++I;
}
std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
- auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
// Build type kmp_task_t (if not built yet).
@@ -4432,21 +4824,23 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
}
KmpTaskTQTy = SavedKmpTaskTQTy;
}
- auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
+ const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
// Build particular struct kmp_task_t for the given task.
- auto *KmpTaskTWithPrivatesQTyRD =
+ const RecordDecl *KmpTaskTWithPrivatesQTyRD =
createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
- auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
+ QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
QualType KmpTaskTWithPrivatesPtrQTy =
C.getPointerType(KmpTaskTWithPrivatesQTy);
- auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
- auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
- auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
+ llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
+ llvm::Type *KmpTaskTWithPrivatesPtrTy =
+ KmpTaskTWithPrivatesTy->getPointerTo();
+ llvm::Value *KmpTaskTWithPrivatesTySize =
+ CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
QualType SharedsPtrTy = C.getPointerType(SharedsTy);
// Emit initial values for private copies (if any).
llvm::Value *TaskPrivatesMap = nullptr;
- auto *TaskPrivatesMapTy =
+ llvm::Type *TaskPrivatesMapTy =
std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
@@ -4461,7 +4855,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
}
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
// kmp_task_t *tt);
- auto *TaskEntry = emitProxyTaskFunction(
+ llvm::Value *TaskEntry = emitProxyTaskFunction(
CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
TaskPrivatesMap);
@@ -4487,23 +4881,24 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
}
if (Data.Priority.getInt())
Flags = Flags | PriorityFlag;
- auto *TaskFlags =
+ llvm::Value *TaskFlags =
Data.Final.getPointer()
? CGF.Builder.CreateSelect(Data.Final.getPointer(),
CGF.Builder.getInt32(FinalFlag),
CGF.Builder.getInt32(/*C=*/0))
: CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
- auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
+ llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
getThreadID(CGF, Loc), TaskFlags,
KmpTaskTWithPrivatesTySize, SharedsSize,
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskEntry, KmpRoutineEntryPtrTy)};
- auto *NewTask = CGF.EmitRuntimeCall(
+ llvm::Value *NewTask = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
- auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- NewTask, KmpTaskTWithPrivatesPtrTy);
+ llvm::Value *NewTaskNewTaskTTy =
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ NewTask, KmpTaskTWithPrivatesPtrTy);
LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
KmpTaskTWithPrivatesQTy);
LValue TDBase =
@@ -4519,7 +4914,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
KmpTaskTShareds)),
Loc),
CGF.getNaturalTypeAlignment(SharedsTy));
- CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
+ LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
+ LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
+ CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
}
// Emit initial values for private copies (if any).
TaskResultTy Result;
@@ -4539,7 +4936,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
enum { Priority = 0, Destructors = 1 };
// Provide pointer to function with destructors for privates.
auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
- auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
+ const RecordDecl *KmpCmplrdataUD =
+ (*FI)->getType()->getAsUnionType()->getDecl();
if (NeedsCleanup) {
llvm::Value *DestructorFn = emitDestructorsFunction(
CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
@@ -4582,8 +4980,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *TaskEntry = Result.TaskEntry;
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
LValue TDBase = Result.TDBase;
- RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
- auto &C = CGM.getContext();
+ const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
+ ASTContext &C = CGM.getContext();
// Process list of dependences.
Address DependenciesArray = Address::invalid();
unsigned NumDependencies = Data.Dependences.size();
@@ -4603,8 +5001,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
KmpDependInfoRD->completeDefinition();
KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
- } else
+ } else {
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+ }
CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
// Define type kmp_depend_info[<Dependences.size()>];
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
@@ -4613,12 +5012,13 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// kmp_depend_info[<Dependences.size()>] deps;
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
- for (unsigned i = 0; i < NumDependencies; ++i) {
- const Expr *E = Data.Dependences[i].second;
- auto Addr = CGF.EmitLValue(E);
+ for (unsigned I = 0; I < NumDependencies; ++I) {
+ const Expr *E = Data.Dependences[I].second;
+ LValue Addr = CGF.EmitLValue(E);
llvm::Value *Size;
QualType Ty = E->getType();
- if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
+ if (const auto *ASE =
+ dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
LValue UpAddrLVal =
CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
llvm::Value *UpAddr =
@@ -4627,24 +5027,25 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
- } else
+ } else {
Size = CGF.getTypeSize(Ty);
- auto Base = CGF.MakeAddrLValue(
- CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
+ }
+ LValue Base = CGF.MakeAddrLValue(
+ CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize),
KmpDependInfoTy);
// deps[i].base_addr = &<Dependences[i].second>;
- auto BaseAddrLVal = CGF.EmitLValueForField(
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(
CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
BaseAddrLVal);
// deps[i].len = sizeof(<Dependences[i].second>);
- auto LenLVal = CGF.EmitLValueForField(
+ LValue LenLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), Len));
CGF.EmitStoreOfScalar(Size, LenLVal);
// deps[i].flags = <Dependences[i].first>;
RTLDependenceKindTy DepKind;
- switch (Data.Dependences[i].first) {
+ switch (Data.Dependences[I].first) {
case OMPC_DEPEND_in:
DepKind = DepIn;
break;
@@ -4658,7 +5059,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
case OMPC_DEPEND_unknown:
llvm_unreachable("Unknown task dependence type");
}
- auto FlagsLVal = CGF.EmitLValueForField(
+ LValue FlagsLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
FlagsLVal);
@@ -4668,14 +5069,14 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.VoidPtrTy);
}
- // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+ // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
// libcall.
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
// list is not empty
- auto *ThreadID = getThreadID(CGF, Loc);
- auto *UpLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
llvm::Value *DepTaskArgs[7];
if (NumDependencies) {
@@ -4692,7 +5093,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
- auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
+ LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
}
if (NumDependencies) {
@@ -4720,7 +5121,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
NumDependencies, &DepWaitTaskArgs,
Loc](CodeGenFunction &CGF, PrePostActionTy &) {
- auto &RT = CGF.CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
@@ -4750,9 +5151,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
RCG(CGF);
};
- if (IfCond)
+ if (IfCond) {
emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
- else {
+ } else {
RegionCodeGenTy ThenRCG(ThenCodeGen);
ThenRCG(CGF);
}
@@ -4768,7 +5169,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
return;
TaskResultTy Result =
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
- // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+ // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
// libcall.
// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
@@ -4779,27 +5180,28 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
if (IfCond) {
IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
/*isSigned=*/true);
- } else
+ } else {
IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
+ }
LValue LBLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
- auto *LBVar =
+ const auto *LBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
/*IsInitializer=*/true);
LValue UBLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
- auto *UBVar =
+ const auto *UBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
/*IsInitializer=*/true);
LValue StLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
- auto *StVar =
+ const auto *StVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
@@ -4807,9 +5209,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
LValue RedLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
- if (Data.Reductions)
+ if (Data.Reductions) {
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
- else {
+ } else {
CGF.EmitNullInitialization(RedLVal.getAddress(),
CGF.getContext().VoidPtrTy);
}
@@ -4821,7 +5223,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
IfVal,
LBLVal.getPointer(),
UBLVal.getPointer(),
- CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ CGF.EmitLoadOfScalar(StLVal, Loc),
llvm::ConstantInt::getNullValue(
CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
@@ -4838,7 +5240,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}
-/// \brief Emit reduction operation for each element of array (required for
+/// Emit reduction operation for each element of array (required for
/// array sections) LHS op = RHS.
/// \param Type Type of array.
/// \param LHSVar Variable on the left side of the reduction operation
@@ -4860,22 +5262,22 @@ static void EmitOMPAggregateReduction(
Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
// Drill down to the base element type on both arrays.
- auto ArrayTy = Type->getAsArrayTypeUnsafe();
- auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
+ const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
+ llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
- auto RHSBegin = RHSAddr.getPointer();
- auto LHSBegin = LHSAddr.getPointer();
+ llvm::Value *RHSBegin = RHSAddr.getPointer();
+ llvm::Value *LHSBegin = LHSAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
- auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
+ llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
// The basic structure here is a while-do loop.
- auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
- auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
- auto IsEmpty =
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
+ llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
+ llvm::Value *IsEmpty =
CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
- auto EntryBB = CGF.Builder.GetInsertBlock();
+ llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
CGF.EmitBlock(BodyBB);
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
@@ -4896,19 +5298,19 @@ static void EmitOMPAggregateReduction(
// Emit copy.
CodeGenFunction::OMPPrivateScope Scope(CGF);
- Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
- Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
+ Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
+ Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
Scope.Privatize();
RedOpGen(CGF, XExpr, EExpr, UpExpr);
Scope.ForceCleanup();
// Shift the address forward by one element.
- auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
+ llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
- auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
+ llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
// Check whether we've reached the end.
- auto Done =
+ llvm::Value *Done =
CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
@@ -4923,11 +5325,12 @@ static void EmitOMPAggregateReduction(
/// UDR combiner function.
static void emitReductionCombiner(CodeGenFunction &CGF,
const Expr *ReductionOp) {
- if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
- if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
- if (auto *DRE =
+ if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
+ if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+ if (const auto *DRE =
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
- if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
+ if (const auto *DRD =
+ dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
std::pair<llvm::Function *, llvm::Function *> Reduction =
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
RValue Func = RValue::get(Reduction.first);
@@ -4939,24 +5342,29 @@ static void emitReductionCombiner(CodeGenFunction &CGF,
}
llvm::Value *CGOpenMPRuntime::emitReductionFunction(
- CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
- ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
- ArrayRef<const Expr *> ReductionOps) {
- auto &C = CGM.getContext();
+ CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
+ ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
+ ASTContext &C = CGM.getContext();
// void reduction_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *Fn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- ".omp.reduction.reduction_func", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+ const auto &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ std::string Name = getName({"omp", "reduction", "reduction_func"});
+ auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
+ llvm::GlobalValue::InternalLinkage, Name,
+ &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
// Dst = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
@@ -4974,12 +5382,14 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction(
auto IPriv = Privates.begin();
unsigned Idx = 0;
for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
- auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
- Scope.addPrivate(RHSVar, [&]() -> Address {
+ const auto *RHSVar =
+ cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
+ Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
});
- auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
- Scope.addPrivate(LHSVar, [&]() -> Address {
+ const auto *LHSVar =
+ cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
+ Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
});
QualType PrivTy = (*IPriv)->getType();
@@ -4989,8 +5399,9 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction(
Address Elem =
CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
- auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
- auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
+ const VariableArrayType *VLA =
+ CGF.getContext().getAsVariableArrayType(PrivTy);
+ const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
CGF.EmitVariablyModifiedType(PrivTy);
@@ -5000,19 +5411,20 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction(
IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
- for (auto *E : ReductionOps) {
+ for (const Expr *E : ReductionOps) {
if ((*IPriv)->getType()->isArrayType()) {
// Emit reduction for array section.
- auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
EmitOMPAggregateReduction(
CGF, (*IPriv)->getType(), LHSVar, RHSVar,
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
emitReductionCombiner(CGF, E);
});
- } else
+ } else {
// Emit reduction for array subscript or single variable.
emitReductionCombiner(CGF, E);
+ }
++IPriv;
++ILHS;
++IRHS;
@@ -5029,16 +5441,17 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
const DeclRefExpr *RHS) {
if (PrivateRef->getType()->isArrayType()) {
// Emit reduction for array section.
- auto *LHSVar = cast<VarDecl>(LHS->getDecl());
- auto *RHSVar = cast<VarDecl>(RHS->getDecl());
+ const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
+ const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
EmitOMPAggregateReduction(
CGF, PrivateRef->getType(), LHSVar, RHSVar,
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
emitReductionCombiner(CGF, ReductionOp);
});
- } else
+ } else {
// Emit reduction for array subscript or single variable.
emitReductionCombiner(CGF, ReductionOp);
+ }
}
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
@@ -5088,14 +5501,14 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
// ...
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
if (SimpleReduction) {
CodeGenFunction::RunCleanupsScope Scope(CGF);
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
- for (auto *E : ReductionOps) {
+ for (const Expr *E : ReductionOps) {
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
cast<DeclRefExpr>(*IRHS));
++IPriv;
@@ -5108,7 +5521,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
// 1. Build a list of reduction variables.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
auto Size = RHSExprs.size();
- for (auto *E : Privates) {
+ for (const Expr *E : Privates) {
if (E->getType()->isVariablyModifiedType())
// Reserve place for array size.
++Size;
@@ -5136,7 +5549,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *Size = CGF.Builder.CreateIntCast(
CGF.getVLASize(
CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
- .first,
+ .NumElts,
CGF.SizeTy, /*isSigned=*/false);
CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
Elem);
@@ -5144,19 +5557,20 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
// 2. Emit reduce_func().
- auto *ReductionFn = emitReductionFunction(
- CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
- LHSExprs, RHSExprs, ReductionOps);
+ llvm::Value *ReductionFn = emitReductionFunction(
+ CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+ Privates, LHSExprs, RHSExprs, ReductionOps);
// 3. Create static kmp_critical_name lock = { 0 };
- auto *Lock = getCriticalRegionLock(".reduction");
+ std::string Name = getName({"reduction"});
+ llvm::Value *Lock = getCriticalRegionLock(Name);
// 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
// RedList, reduce_func, &<lock>);
- auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
- auto *ThreadId = getThreadID(CGF, Loc);
- auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
- auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
+ llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
ReductionList.getPointer(), CGF.VoidPtrTy);
llvm::Value *Args[] = {
IdentTLoc, // ident_t *<loc>
@@ -5167,14 +5581,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ReductionFn, // void (*) (void *, void *) <reduce_func>
Lock // kmp_critical_name *&<lock>
};
- auto Res = CGF.EmitRuntimeCall(
+ llvm::Value *Res = CGF.EmitRuntimeCall(
createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
: OMPRTL__kmpc_reduce),
Args);
// 5. Build switch(res)
- auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
- auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
+ llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+ llvm::SwitchInst *SwInst =
+ CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
// 6. Build case 1:
// ...
@@ -5182,7 +5597,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
// ...
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
// break;
- auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+ llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
CGF.EmitBlock(Case1BB);
@@ -5192,13 +5607,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ThreadId, // i32 <gtid>
Lock // kmp_critical_name *&<lock>
};
- auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
- CodeGenFunction &CGF, PrePostActionTy &Action) {
- auto &RT = CGF.CGM.getOpenMPRuntime();
+ auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
+ CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
- for (auto *E : ReductionOps) {
+ for (const Expr *E : ReductionOps) {
RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
cast<DeclRefExpr>(*IRHS));
++IPriv;
@@ -5222,44 +5637,44 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
// ...
// break;
- auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
+ llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
CGF.EmitBlock(Case2BB);
- auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
- CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
auto IPriv = Privates.begin();
- for (auto *E : ReductionOps) {
+ for (const Expr *E : ReductionOps) {
const Expr *XExpr = nullptr;
const Expr *EExpr = nullptr;
const Expr *UpExpr = nullptr;
BinaryOperatorKind BO = BO_Comma;
- if (auto *BO = dyn_cast<BinaryOperator>(E)) {
+ if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
if (BO->getOpcode() == BO_Assign) {
XExpr = BO->getLHS();
UpExpr = BO->getRHS();
}
}
// Try to emit update expression as a simple atomic.
- auto *RHSExpr = UpExpr;
+ const Expr *RHSExpr = UpExpr;
if (RHSExpr) {
// Analyze RHS part of the whole expression.
- if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
+ if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
RHSExpr->IgnoreParenImpCasts())) {
// If this is a conditional operator, analyze its condition for
// min/max reduction operator.
RHSExpr = ACO->getCond();
}
- if (auto *BORHS =
+ if (const auto *BORHS =
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
EExpr = BORHS->getRHS();
BO = BORHS->getOpcode();
}
}
if (XExpr) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
auto &&AtomicRedGen = [BO, VD,
Loc](CodeGenFunction &CGF, const Expr *XExpr,
const Expr *EExpr, const Expr *UpExpr) {
@@ -5273,7 +5688,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
[&CGF, UpExpr, VD, Loc](RValue XRValue) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(
- VD, [&CGF, VD, XRValue, Loc]() -> Address {
+ VD, [&CGF, VD, XRValue, Loc]() {
Address LHSTemp = CGF.CreateMemTemp(VD->getType());
CGF.emitOMPSimpleStore(
CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
@@ -5286,19 +5701,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
};
if ((*IPriv)->getType()->isArrayType()) {
// Emit atomic reduction for array section.
- auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ const auto *RHSVar =
+ cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
AtomicRedGen, XExpr, EExpr, UpExpr);
- } else
+ } else {
// Emit atomic reduction for array subscript or single variable.
AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
+ }
} else {
// Emit as a critical region.
auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
- const Expr *, const Expr *) {
- auto &RT = CGF.CGM.getOpenMPRuntime();
+ const Expr *, const Expr *) {
+ CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
+ std::string Name = RT.getName({"atomic_reduction"});
RT.emitCriticalRegion(
- CGF, ".atomic_reduction",
+ CGF, Name,
[=](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
emitReductionCombiner(CGF, E);
@@ -5306,12 +5724,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
Loc);
};
if ((*IPriv)->getType()->isArrayType()) {
- auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ const auto *LHSVar =
+ cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVar =
+ cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
CritRedGen);
- } else
+ } else {
CritRedGen(CGF, nullptr, nullptr, nullptr);
+ }
}
++ILHS;
++IRHS;
@@ -5331,20 +5752,29 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
EndArgs);
AtomicRCG.setAction(Action);
AtomicRCG(CGF);
- } else
+ } else {
AtomicRCG(CGF);
+ }
CGF.EmitBranch(DefaultBB);
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
}
/// Generates unique name for artificial threadprivate variables.
-/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
-static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
- unsigned N) {
+/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
+static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
+ const Expr *Ref) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
- Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
+ const clang::DeclRefExpr *DE;
+ const VarDecl *D = ::getBaseDecl(Ref, DE);
+ if (!D)
+ D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
+ D = D->getCanonicalDecl();
+ std::string Name = CGM.getOpenMPRuntime().getName(
+ {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
+ Out << Prefix << Name << "_"
+ << D->getCanonicalDecl()->getLocStart().getRawEncoding();
return Out.str();
}
@@ -5359,19 +5789,21 @@ static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
SourceLocation Loc,
ReductionCodeGen &RCG, unsigned N) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&Param);
- auto &FnInfo =
+ const auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
- ".red_init.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
Address PrivateAddr = CGF.EmitLoadOfPointer(
CGF.GetAddrOfLocalVar(&Param),
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
@@ -5381,10 +5813,9 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
if (RCG.getSizes(N).second) {
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
CGF, CGM.getContext().getSizeType(),
- generateUniqueName("reduction_size", Loc, N));
- Size =
- CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
- CGM.getContext().getSizeType(), SourceLocation());
+ generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
+ Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), Loc);
}
RCG.emitAggregateType(CGF, N, Size);
LValue SharedLVal;
@@ -5395,7 +5826,10 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
Address SharedAddr =
CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
CGF, CGM.getContext().VoidPtrTy,
- generateUniqueName("reduction", Loc, N));
+ generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
+ SharedAddr = CGF.EmitLoadOfPointer(
+ SharedAddr,
+ CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
} else {
SharedLVal = CGF.MakeNaturalAlignAddrLValue(
@@ -5427,40 +5861,42 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
const Expr *ReductionOp,
const Expr *LHS, const Expr *RHS,
const Expr *PrivateRef) {
- auto &C = CGM.getContext();
- auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
- auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
+ ASTContext &C = CGM.getContext();
+ const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
+ const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
FunctionArgList Args;
- ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&ParamInOut);
Args.emplace_back(&ParamIn);
- auto &FnInfo =
+ const auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
- ".red_comb.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
llvm::Value *Size = nullptr;
// If the size of the reduction item is non-constant, load it from global
// threadprivate variable.
if (RCG.getSizes(N).second) {
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
CGF, CGM.getContext().getSizeType(),
- generateUniqueName("reduction_size", Loc, N));
- Size =
- CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
- CGM.getContext().getSizeType(), SourceLocation());
+ generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
+ Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), Loc);
}
RCG.emitAggregateType(CGF, N, Size);
// Remap lhs and rhs variables to the addresses of the function arguments.
// %lhs = bitcast void* %arg0 to <type>*
// %rhs = bitcast void* %arg1 to <type>*
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
- PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
+ PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
// Pull out the pointer to the variable.
Address PtrAddr = CGF.EmitLoadOfPointer(
CGF.GetAddrOfLocalVar(&ParamInOut),
@@ -5468,7 +5904,7 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
return CGF.Builder.CreateElementBitCast(
PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
});
- PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
+ PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
// Pull out the pointer to the variable.
Address PtrAddr = CGF.EmitLoadOfPointer(
CGF.GetAddrOfLocalVar(&ParamIn),
@@ -5500,19 +5936,21 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
ReductionCodeGen &RCG, unsigned N) {
if (!RCG.needCleanups(N))
return nullptr;
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&Param);
- auto &FnInfo =
+ const auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
- ".red_fini.", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
Address PrivateAddr = CGF.EmitLoadOfPointer(
CGF.GetAddrOfLocalVar(&Param),
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
@@ -5522,10 +5960,9 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
if (RCG.getSizes(N).second) {
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
CGF, CGM.getContext().getSizeType(),
- generateUniqueName("reduction_size", Loc, N));
- Size =
- CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
- CGM.getContext().getSizeType(), SourceLocation());
+ generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
+ Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), Loc);
}
RCG.emitAggregateType(CGF, N, Size);
// Emit the finalizer body:
@@ -5551,7 +5988,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
// kmp_task_red_flags_t flags; // flags for additional info from compiler
// } kmp_task_red_input_t;
ASTContext &C = CGM.getContext();
- auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+ RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
RD->startDefinition();
const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
@@ -5652,14 +6089,14 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
/*isSigned=*/false);
Address SizeAddr = getAddrOfArtificialThreadPrivate(
CGF, CGM.getContext().getSizeType(),
- generateUniqueName("reduction_size", Loc, N));
+ generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
}
// Store address of the original reduction item if custom initializer is used.
if (RCG.usesReductionInitializer(N)) {
Address SharedAddr = getAddrOfArtificialThreadPrivate(
CGF, CGM.getContext().VoidPtrTy,
- generateUniqueName("reduction", Loc, N));
+ generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
@@ -5749,18 +6186,18 @@ void CGOpenMPRuntime::emitCancellationPointCall(
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
// Ignore return result until untied tasks are supported.
- auto *Result = CGF.EmitRuntimeCall(
+ llvm::Value *Result = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
// if (__kmpc_cancellationpoint()) {
// exit from construct;
// }
- auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
- auto *ContBB = CGF.createBasicBlock(".cancel.continue");
- auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
+ llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
+ llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
CGF.EmitBlock(ExitBB);
// exit from construct;
- auto CancelDest =
+ CodeGenFunction::JumpDest CancelDest =
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
CGF.EmitBranchThroughCleanup(CancelDest);
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
@@ -5779,70 +6216,42 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
PrePostActionTy &) {
- auto &RT = CGF.CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
// Ignore return result until untied tasks are supported.
- auto *Result = CGF.EmitRuntimeCall(
+ llvm::Value *Result = CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
// if (__kmpc_cancel()) {
// exit from construct;
// }
- auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
- auto *ContBB = CGF.createBasicBlock(".cancel.continue");
- auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
+ llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
+ llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
CGF.EmitBlock(ExitBB);
// exit from construct;
- auto CancelDest =
+ CodeGenFunction::JumpDest CancelDest =
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
CGF.EmitBranchThroughCleanup(CancelDest);
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
};
- if (IfCond)
+ if (IfCond) {
emitOMPIfClause(CGF, IfCond, ThenGen,
[](CodeGenFunction &, PrePostActionTy &) {});
- else {
+ } else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
}
}
}
-/// \brief Obtain information that uniquely identifies a target entry. This
-/// consists of the file and device IDs as well as line number associated with
-/// the relevant entry source location.
-static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
- unsigned &DeviceID, unsigned &FileID,
- unsigned &LineNum) {
-
- auto &SM = C.getSourceManager();
-
- // The loc should be always valid and have a file ID (the user cannot use
- // #pragma directives in macros)
-
- assert(Loc.isValid() && "Source location is expected to be always valid.");
- assert(Loc.isFileID() && "Source location is expected to refer to a file.");
-
- PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- assert(PLoc.isValid() && "Source location is expected to be always valid.");
-
- llvm::sys::fs::UniqueID ID;
- if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
- llvm_unreachable("Source file with target region no longer exists!");
-
- DeviceID = ID.getDevice();
- FileID = ID.getFile();
- LineNum = PLoc.getLine();
-}
-
void CGOpenMPRuntime::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
assert(!ParentName.empty() && "Invalid target region parent name!");
-
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
}
@@ -5872,7 +6281,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
<< llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
}
- const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
CodeGenFunction CGF(CGM, true);
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
@@ -5898,22 +6307,25 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
if (CGM.getLangOpts().OpenMPIsDevice) {
OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
- OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
- } else
+ OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+ OutlinedFn->setDSOLocal(false);
+ } else {
+ std::string Name = getName({EntryFnName, "region_id"});
OutlinedFnID = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::Constant::getNullValue(CGM.Int8Ty), Name);
+ }
// Register the information for the entry associated with this target region.
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
- /*Flags=*/0);
+ OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
}
/// discard all CompoundStmts intervening between two constructs
static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
- while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
+ while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
Body = CS->body_front();
return Body;
@@ -5931,12 +6343,11 @@ static llvm::Value *
emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
CodeGenFunction &CGF,
const OMPExecutableDirective &D) {
-
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// If the target directive is combined with a teams directive:
// Return the value in the num_teams clause, if any.
@@ -5944,8 +6355,8 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
- auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
- /*IgnoreResultAssign*/ true);
+ llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
+ /*IgnoreResultAssign*/ true);
return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
/*IsSigned=*/true);
}
@@ -5965,12 +6376,12 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
// the expression is captured in the enclosing target environment when the
// teams directive is not combined with target.
- const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
- if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
+ if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
ignoreCompoundStmts(CS.getCapturedStmt()))) {
if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
- if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+ if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
@@ -6000,12 +6411,11 @@ static llvm::Value *
emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
CodeGenFunction &CGF,
const OMPExecutableDirective &D) {
-
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
//
// If the target directive is combined with a teams directive:
@@ -6030,8 +6440,9 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
if (const auto *ThreadLimitClause =
D.getSingleClause<OMPThreadLimitClause>()) {
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
- auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
- /*IgnoreResultAssign*/ true);
+ llvm::Value *ThreadLimit =
+ CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
+ /*IgnoreResultAssign*/ true);
ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
/*IsSigned=*/true);
}
@@ -6068,12 +6479,12 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
// the expression is captured in the enclosing target environment when the
// teams directive is not combined with target.
- const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
- if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
+ if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
ignoreCompoundStmts(CS.getCapturedStmt()))) {
if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
- if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+ if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
@@ -6092,42 +6503,50 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
}
namespace {
-// \brief Utility to handle information from clauses associated with a given
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
+// Utility to handle information from clauses associated with a given
// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
// It provides a convenient interface to obtain the information and generate
// code for that information.
class MappableExprsHandler {
public:
- /// \brief Values for bit flags used to specify the mapping type for
+ /// Values for bit flags used to specify the mapping type for
/// offloading.
- enum OpenMPOffloadMappingFlags {
- /// \brief Allocate memory on the device and move data from host to device.
+ enum OpenMPOffloadMappingFlags : uint64_t {
+ /// No flags
+ OMP_MAP_NONE = 0x0,
+ /// Allocate memory on the device and move data from host to device.
OMP_MAP_TO = 0x01,
- /// \brief Allocate memory on the device and move data from device to host.
+ /// Allocate memory on the device and move data from device to host.
OMP_MAP_FROM = 0x02,
- /// \brief Always perform the requested mapping action on the element, even
+ /// Always perform the requested mapping action on the element, even
/// if it was already mapped before.
OMP_MAP_ALWAYS = 0x04,
- /// \brief Delete the element from the device environment, ignoring the
+ /// Delete the element from the device environment, ignoring the
/// current reference count associated with the element.
OMP_MAP_DELETE = 0x08,
- /// \brief The element being mapped is a pointer-pointee pair; both the
+ /// The element being mapped is a pointer-pointee pair; both the
/// pointer and the pointee should be mapped.
OMP_MAP_PTR_AND_OBJ = 0x10,
- /// \brief This flags signals that the base address of an entry should be
+ /// This flags signals that the base address of an entry should be
/// passed to the target kernel as an argument.
OMP_MAP_TARGET_PARAM = 0x20,
- /// \brief Signal that the runtime library has to return the device pointer
+ /// Signal that the runtime library has to return the device pointer
/// in the current position for the data being mapped. Used when we have the
/// use_device_ptr clause.
OMP_MAP_RETURN_PARAM = 0x40,
- /// \brief This flag signals that the reference being passed is a pointer to
+ /// This flag signals that the reference being passed is a pointer to
/// private data.
OMP_MAP_PRIVATE = 0x80,
- /// \brief Pass the element to the device by value.
+ /// Pass the element to the device by value.
OMP_MAP_LITERAL = 0x100,
/// Implicit map
OMP_MAP_IMPLICIT = 0x200,
+ /// The 16 MSBs of the flags indicate whether the entry is member of some
+ /// struct/class.
+ OMP_MAP_MEMBER_OF = 0xffff000000000000,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
};
/// Class that associates information with a base pointer to be passed to the
@@ -6147,21 +6566,60 @@ public:
void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
};
- typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
- typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
- typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
+ using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
+ using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
+ using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
+
+ /// Map between a struct and the its lowest & highest elements which have been
+ /// mapped.
+ /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
+ /// HE(FieldIndex, Pointer)}
+ struct StructRangeInfoTy {
+ std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
+ 0, Address::invalid()};
+ std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
+ 0, Address::invalid()};
+ Address Base = Address::invalid();
+ };
private:
- /// \brief Directive from where the map clauses were extracted.
+ /// Kind that defines how a device pointer has to be returned.
+ struct MapInfo {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
+ OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+ bool ReturnDevicePointer = false;
+ bool IsImplicit = false;
+
+ MapInfo() = default;
+ MapInfo(
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
+ OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+ bool ReturnDevicePointer, bool IsImplicit)
+ : Components(Components), MapType(MapType),
+ MapTypeModifier(MapTypeModifier),
+ ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
+ };
+
+ /// If use_device_ptr is used on a pointer which is a struct member and there
+ /// is no map information about it, then emission of that entry is deferred
+ /// until the whole struct has been processed.
+ struct DeferredDevicePtrEntryTy {
+ const Expr *IE = nullptr;
+ const ValueDecl *VD = nullptr;
+
+ DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
+ : IE(IE), VD(VD) {}
+ };
+
+ /// Directive from where the map clauses were extracted.
const OMPExecutableDirective &CurDir;
- /// \brief Function the directive is being generated for.
+ /// Function the directive is being generated for.
CodeGenFunction &CGF;
- /// \brief Set of all first private variables in the current directive.
+ /// Set of all first private variables in the current directive.
llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
- /// Set of all reduction variables in the current directive.
- llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
/// Map between device pointer declarations and their expression components.
/// The key value for declarations in 'this' is null.
@@ -6171,10 +6629,10 @@ private:
DevPointersMap;
llvm::Value *getExprTypeSize(const Expr *E) const {
- auto ExprTy = E->getType().getCanonicalType();
+ QualType ExprTy = E->getType().getCanonicalType();
// Reference types are ignored for mapping purposes.
- if (auto *RefTy = ExprTy->getAs<ReferenceType>())
+ if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
ExprTy = RefTy->getPointeeType().getCanonicalType();
// Given that an array section is considered a built-in type, we need to
@@ -6191,10 +6649,10 @@ private:
return CGF.getTypeSize(BaseTy);
llvm::Value *ElemSize;
- if (auto *PTy = BaseTy->getAs<PointerType>())
+ if (const auto *PTy = BaseTy->getAs<PointerType>()) {
ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
- else {
- auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
+ } else {
+ const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
assert(ATy && "Expecting array type if not a pointer type.");
ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
}
@@ -6204,7 +6662,7 @@ private:
if (!OAE->getLength())
return ElemSize;
- auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
+ llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
LengthVal =
CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
@@ -6212,14 +6670,16 @@ private:
return CGF.getTypeSize(ExprTy);
}
- /// \brief Return the corresponding bits for a given map clause modifier. Add
+ /// Return the corresponding bits for a given map clause modifier. Add
/// a flag marking the map as a pointer if requested. Add a flag marking the
/// map as the first one of a series of maps that relate to the same map
/// expression.
- uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
- OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
- bool AddIsTargetParamFlag) const {
- uint64_t Bits = 0u;
+ OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType,
+ OpenMPMapClauseKind MapTypeModifier,
+ bool IsImplicit, bool AddPtrFlag,
+ bool AddIsTargetParamFlag) const {
+ OpenMPOffloadMappingFlags Bits =
+ IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
switch (MapType) {
case OMPC_MAP_alloc:
case OMPC_MAP_release:
@@ -6229,20 +6689,20 @@ private:
// type modifiers.
break;
case OMPC_MAP_to:
- Bits = OMP_MAP_TO;
+ Bits |= OMP_MAP_TO;
break;
case OMPC_MAP_from:
- Bits = OMP_MAP_FROM;
+ Bits |= OMP_MAP_FROM;
break;
case OMPC_MAP_tofrom:
- Bits = OMP_MAP_TO | OMP_MAP_FROM;
+ Bits |= OMP_MAP_TO | OMP_MAP_FROM;
break;
case OMPC_MAP_delete:
- Bits = OMP_MAP_DELETE;
+ Bits |= OMP_MAP_DELETE;
break;
- default:
+ case OMPC_MAP_always:
+ case OMPC_MAP_unknown:
llvm_unreachable("Unexpected map type!");
- break;
}
if (AddPtrFlag)
Bits |= OMP_MAP_PTR_AND_OBJ;
@@ -6253,10 +6713,10 @@ private:
return Bits;
}
- /// \brief Return true if the provided expression is a final array section. A
+ /// Return true if the provided expression is a final array section. A
/// final array section, is one whose length can't be proved to be one.
bool isFinalArraySectionExpression(const Expr *E) const {
- auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+ const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
// It is not an array section and therefore not a unity-size one.
if (!OASE)
@@ -6266,16 +6726,16 @@ private:
if (OASE->getColonLoc().isInvalid())
return false;
- auto *Length = OASE->getLength();
+ const Expr *Length = OASE->getLength();
// If we don't have a length we have to check if the array has size 1
// for this dimension. Also, we should always expect a length if the
// base type is pointer.
if (!Length) {
- auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
- OASE->getBase()->IgnoreParenImpCasts())
- .getCanonicalType();
- if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
+ QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
+ OASE->getBase()->IgnoreParenImpCasts())
+ .getCanonicalType();
+ if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
return ATy->getSize().getSExtValue() != 1;
// If we don't have a constant dimension length, we have to consider
// the current section as having any size, so it is not necessarily
@@ -6291,7 +6751,7 @@ private:
return ConstLength.getSExtValue() != 1;
}
- /// \brief Generate the base pointers, section pointers, sizes and map type
+ /// Generate the base pointers, section pointers, sizes and map type
/// bits for the provided map type, map modifier, and expression components.
/// \a IsFirstComponent should be set to true if the provided set of
/// components is the first associated with a capture.
@@ -6300,10 +6760,10 @@ private:
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
- bool IsFirstComponentList, bool IsImplicit) const {
-
+ StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
+ bool IsImplicit) const {
// The following summarizes what has to be generated for each map and the
- // types bellow. The generated information is expressed in this order:
+ // types below. The generated information is expressed in this order:
// base pointer, section pointer, size, flags
// (to add to the ones that come from the map type and modifier).
//
@@ -6326,96 +6786,141 @@ private:
// S2 *ps;
//
// map(d)
- // &d, &d, sizeof(double), noflags
+ // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
//
// map(i)
- // &i, &i, 100*sizeof(int), noflags
+ // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
//
// map(i[1:23])
- // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
+ // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
//
// map(p)
- // &p, &p, sizeof(float*), noflags
+ // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
//
// map(p[1:24])
- // p, &p[1], 24*sizeof(float), noflags
+ // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
//
// map(s)
- // &s, &s, sizeof(S2), noflags
+ // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
//
// map(s.i)
- // &s, &(s.i), sizeof(int), noflags
+ // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
//
// map(s.s.f)
- // &s, &(s.i.f), 50*sizeof(int), noflags
+ // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
//
// map(s.p)
- // &s, &(s.p), sizeof(double*), noflags
+ // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
//
- // map(s.p[:22], s.a s.b)
- // &s, &(s.p), sizeof(double*), noflags
- // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
+ // map(to: s.p[:22])
+ // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
+ // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
+ // &(s.p), &(s.p[0]), 22*sizeof(double),
+ // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
+ // (*) alloc space for struct members, only this is a target parameter
+ // (**) map the pointer (nothing to be mapped in this example) (the compiler
+ // optimizes this entry out, same in the examples below)
+ // (***) map the pointee (map: to)
//
// map(s.ps)
- // &s, &(s.ps), sizeof(S2*), noflags
+ // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
//
- // map(s.ps->s.i)
- // &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
+ // map(from: s.ps->s.i)
+ // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
+ // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
+ // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
//
- // map(s.ps->ps)
- // &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
+ // map(to: s.ps->ps)
+ // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
+ // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
+ // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
//
// map(s.ps->ps->ps)
- // &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
- // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
+ // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
+ // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
+ // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
+ // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
//
- // map(s.ps->ps->s.f[:22])
- // &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
- // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
+ // map(to: s.ps->ps->s.f[:22])
+ // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
+ // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
+ // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
+ // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
//
// map(ps)
- // &ps, &ps, sizeof(S2*), noflags
+ // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
//
// map(ps->i)
- // ps, &(ps->i), sizeof(int), noflags
+ // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
//
// map(ps->s.f)
- // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
+ // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
//
- // map(ps->p)
- // ps, &(ps->p), sizeof(double*), noflags
+ // map(from: ps->p)
+ // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
//
- // map(ps->p[:22])
- // ps, &(ps->p), sizeof(double*), noflags
- // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
+ // map(to: ps->p[:22])
+ // ps, &(ps->p), sizeof(double*), TARGET_PARAM
+ // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
+ // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
//
// map(ps->ps)
- // ps, &(ps->ps), sizeof(S2*), noflags
+ // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
//
- // map(ps->ps->s.i)
- // ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
+ // map(from: ps->ps->s.i)
+ // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
+ // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
+ // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
//
- // map(ps->ps->ps)
- // ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
+ // map(from: ps->ps->ps)
+ // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
+ // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
+ // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
//
// map(ps->ps->ps->ps)
- // ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
- // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
+ // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
+ // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
+ // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
+ // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
//
- // map(ps->ps->ps->s.f[:22])
- // ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
- // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
+ // map(to: ps->ps->ps->s.f[:22])
+ // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
+ // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
+ // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
+ // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
+ //
+ // map(to: s.f[:22]) map(from: s.p[:33])
+ // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
+ // sizeof(double*) (**), TARGET_PARAM
+ // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
+ // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
+ // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
+ // (*) allocate contiguous space needed to fit all mapped members even if
+ // we allocate space for members not mapped (in this example,
+ // s.f[22..49] and s.s are not mapped, yet we must allocate space for
+ // them as well because they fall between &s.f[0] and &s.p)
+ //
+ // map(from: s.f[:22]) map(to: ps->p[:33])
+ // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
+ // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
+ // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
+ // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
+ // (*) the struct this entry pertains to is the 2nd element in the list of
+ // arguments, hence MEMBER_OF(2)
+ //
+ // map(from: s.f[:22], s.s) map(to: ps->p[:33])
+ // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
+ // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
+ // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
+ // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
+ // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
+ // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
+ // (*) the struct this entry pertains to is the 4th element in the list
+ // of arguments, hence MEMBER_OF(4)
// Track if the map information being generated is the first for a capture.
bool IsCaptureFirstInfo = IsFirstComponentList;
+ bool IsLink = false; // Is this variable a "declare target link"?
// Scan the components from the base to the complete expression.
auto CI = Components.rbegin();
@@ -6425,16 +6930,25 @@ private:
// Track if the map information being generated is the first for a list of
// components.
bool IsExpressionFirstInfo = true;
- llvm::Value *BP = nullptr;
+ Address BP = Address::invalid();
- if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
+ if (isa<MemberExpr>(I->getAssociatedExpression())) {
// The base is the 'this' pointer. The content of the pointer is going
// to be the base of the field being mapped.
- BP = CGF.EmitScalarExpr(ME->getBase());
+ BP = CGF.LoadCXXThisAddress();
} else {
// The base is the reference to the variable.
// BP = &Var.
- BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
+ BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
+ if (const auto *VD =
+ dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
+ if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ isDeclareTargetDeclaration(VD))
+ if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
+ IsLink = true;
+ BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ }
+ }
// If the variable is a pointer and is being dereferenced (i.e. is not
// the last component), the base has to be the pointer itself, not its
@@ -6442,10 +6956,7 @@ private:
QualType Ty =
I->getAssociatedDeclaration()->getType().getNonReferenceType();
if (Ty->isAnyPointerType() && std::next(I) != CE) {
- auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
- BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
- Ty->castAs<PointerType>())
- .getPointer();
+ BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
// We do not need to generate individual map information for the
// pointer, it can be associated with the combined storage.
@@ -6453,8 +6964,41 @@ private:
}
}
- uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
+ // Track whether a component of the list should be marked as MEMBER_OF some
+ // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
+ // in a component list should be marked as MEMBER_OF, all subsequent entries
+ // do not belong to the base struct. E.g.
+ // struct S2 s;
+ // s.ps->ps->ps->f[:]
+ // (1) (2) (3) (4)
+ // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
+ // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
+ // is the pointee of ps(2) which is not member of struct s, so it should not
+ // be marked as such (it is still PTR_AND_OBJ).
+ // The variable is initialized to false so that PTR_AND_OBJ entries which
+ // are not struct members are not considered (e.g. array of pointers to
+ // data).
+ bool ShouldBeMemberOf = false;
+
+ // Variable keeping track of whether or not we have encountered a component
+ // in the component list which is a member expression. Useful when we have a
+ // pointer or a final array section, in which case it is the previous
+ // component in the list which tells us whether we have a member expression.
+ // E.g. X.f[:]
+ // While processing the final array section "[:]" it is "f" which tells us
+ // whether we are dealing with a member of a declared struct.
+ const MemberExpr *EncounteredME = nullptr;
+
for (; I != CE; ++I) {
+ // If the current component is member of a struct (parent struct) mark it.
+ if (!EncounteredME) {
+ EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
+ // If we encounter a PTR_AND_OBJ entry from now on it should be marked
+ // as MEMBER_OF the parent struct.
+ if (EncounteredME)
+ ShouldBeMemberOf = true;
+ }
+
auto Next = std::next(I);
// We need to generate the addresses and sizes if this is the last
@@ -6472,14 +7016,12 @@ private:
const auto *OASE =
dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
bool IsPointer =
- (OASE &&
- OMPArraySectionExpr::getBaseOriginalType(OASE)
- .getCanonicalType()
- ->isAnyPointerType()) ||
+ (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
+ .getCanonicalType()
+ ->isAnyPointerType()) ||
I->getAssociatedExpression()->getType()->isAnyPointerType();
if (Next == CE || IsPointer || IsFinalArraySection) {
-
// If this is not the last component, we expect the pointer to be
// associated with an array expression or member expression.
assert((Next == CE ||
@@ -6488,44 +7030,68 @@ private:
isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
"Unexpected expression");
- llvm::Value *LB =
- CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
- auto *Size = getExprTypeSize(I->getAssociatedExpression());
-
- // If we have a member expression and the current component is a
- // reference, we have to map the reference too. Whenever we have a
- // reference, the section that reference refers to is going to be a
- // load instruction from the storage assigned to the reference.
- if (isa<MemberExpr>(I->getAssociatedExpression()) &&
- I->getAssociatedDeclaration()->getType()->isReferenceType()) {
- auto *LI = cast<llvm::LoadInst>(LB);
- auto *RefAddr = LI->getPointerOperand();
-
- BasePointers.push_back(BP);
- Pointers.push_back(RefAddr);
- Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
- Types.push_back(DefaultFlags |
- getMapTypeBits(
- /*MapType*/ OMPC_MAP_alloc,
- /*MapTypeModifier=*/OMPC_MAP_unknown,
- !IsExpressionFirstInfo, IsCaptureFirstInfo));
- IsExpressionFirstInfo = false;
- IsCaptureFirstInfo = false;
- // The reference will be the next base address.
- BP = RefAddr;
- }
+ Address LB =
+ CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
+ llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
+
+ // If this component is a pointer inside the base struct then we don't
+ // need to create any entry for it - it will be combined with the object
+ // it is pointing to into a single PTR_AND_OBJ entry.
+ bool IsMemberPointer =
+ IsPointer && EncounteredME &&
+ (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
+ EncounteredME);
+ if (!IsMemberPointer) {
+ BasePointers.push_back(BP.getPointer());
+ Pointers.push_back(LB.getPointer());
+ Sizes.push_back(Size);
+
+ // We need to add a pointer flag for each map that comes from the
+ // same expression except for the first one. We also need to signal
+ // this map is the first one that relates with the current capture
+ // (there is a set of entries for each capture).
+ OpenMPOffloadMappingFlags Flags = getMapTypeBits(
+ MapType, MapTypeModifier, IsImplicit,
+ !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
+
+ if (!IsExpressionFirstInfo) {
+ // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
+ // then we reset the TO/FROM/ALWAYS/DELETE flags.
+ if (IsPointer)
+ Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
+ OMP_MAP_DELETE);
+
+ if (ShouldBeMemberOf) {
+ // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
+ // should be later updated with the correct value of MEMBER_OF.
+ Flags |= OMP_MAP_MEMBER_OF;
+ // From now on, all subsequent PTR_AND_OBJ entries should not be
+ // marked as MEMBER_OF.
+ ShouldBeMemberOf = false;
+ }
+ }
- BasePointers.push_back(BP);
- Pointers.push_back(LB);
- Sizes.push_back(Size);
+ Types.push_back(Flags);
+ }
- // We need to add a pointer flag for each map that comes from the
- // same expression except for the first one. We also need to signal
- // this map is the first one that relates with the current capture
- // (there is a set of entries for each capture).
- Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier,
- !IsExpressionFirstInfo,
- IsCaptureFirstInfo));
+ // If we have encountered a member expression so far, keep track of the
+ // mapped member. If the parent is "*this", then the value declaration
+ // is nullptr.
+ if (EncounteredME) {
+ const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
+ unsigned FieldIndex = FD->getFieldIndex();
+
+ // Update info about the lowest and highest elements for this struct
+ if (!PartialStruct.Base.isValid()) {
+ PartialStruct.LowestElem = {FieldIndex, LB};
+ PartialStruct.HighestElem = {FieldIndex, LB};
+ PartialStruct.Base = BP;
+ } else if (FieldIndex < PartialStruct.LowestElem.first) {
+ PartialStruct.LowestElem = {FieldIndex, LB};
+ } else if (FieldIndex > PartialStruct.HighestElem.first) {
+ PartialStruct.HighestElem = {FieldIndex, LB};
+ }
+ }
// If we have a final array section, we are done with this expression.
if (IsFinalArraySection)
@@ -6541,11 +7107,11 @@ private:
}
}
- /// \brief Return the adjusted map modifiers if the declaration a capture
- /// refers to appears in a first-private clause. This is expected to be used
- /// only with directives that start with 'target'.
- unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
- unsigned CurrentModifiers) {
+ /// Return the adjusted map modifiers if the declaration a capture refers to
+ /// appears in a first-private clause. This is expected to be used only with
+ /// directives that start with 'target'.
+ MappableExprsHandler::OpenMPOffloadMappingFlags
+ getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
assert(Cap.capturesVariable() && "Expected capture by reference only!");
// A first private variable captured by reference will use only the
@@ -6554,15 +7120,29 @@ private:
if (FirstPrivateDecls.count(Cap.getCapturedVar()))
return MappableExprsHandler::OMP_MAP_PRIVATE |
MappableExprsHandler::OMP_MAP_TO;
- // Reduction variable will use only the 'private ptr' and 'map to_from'
- // flag.
- if (ReductionDecls.count(Cap.getCapturedVar())) {
- return MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM;
- }
+ return MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM;
+ }
+
+ static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
+ // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
+ return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
+ << 48);
+ }
+
+ static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
+ OpenMPOffloadMappingFlags MemberOfFlag) {
+ // If the entry is PTR_AND_OBJ but has not been marked with the special
+ // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
+ // marked as MEMBER_OF.
+ if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
+ ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
+ return;
- // We didn't modify anything.
- return CurrentModifiers;
+ // Reset the placeholder value to prepare the flag for the assignment of the
+ // proper MEMBER_OF value.
+ Flags &= ~OMP_MAP_MEMBER_OF;
+ Flags |= MemberOfFlag;
}
public:
@@ -6573,58 +7153,54 @@ public:
for (const auto *D : C->varlists())
FirstPrivateDecls.insert(
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
- for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
- for (const auto *D : C->varlists()) {
- ReductionDecls.insert(
- cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
- }
- }
// Extract device pointer clause information.
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
for (auto L : C->component_lists())
DevPointersMap[L.first].push_back(L.second);
}
- /// \brief Generate all the base pointers, section pointers, sizes and map
+ /// Generate code for the combined entry if we have a partially mapped struct
+ /// and take care of the mapping flags of the arguments corresponding to
+ /// individual struct members.
+ void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
+ const StructRangeInfoTy &PartialStruct) const {
+ // Base is the base of the struct
+ BasePointers.push_back(PartialStruct.Base.getPointer());
+ // Pointer is the address of the lowest element
+ llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
+ Pointers.push_back(LB);
+ // Size is (addr of {highest+1} element) - (addr of lowest element)
+ llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
+ llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
+ llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
+ llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
+ llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
+ llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
+ /*isSinged=*/false);
+ Sizes.push_back(Size);
+ // Map type is always TARGET_PARAM
+ Types.push_back(OMP_MAP_TARGET_PARAM);
+ // Remove TARGET_PARAM flag from the first element
+ (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
+
+ // All other current entries will be MEMBER_OF the combined entry
+ // (except for PTR_AND_OBJ entries which do not have a placeholder value
+ // 0xFFFF in the MEMBER_OF field).
+ OpenMPOffloadMappingFlags MemberOfFlag =
+ getMemberOfFlag(BasePointers.size() - 1);
+ for (auto &M : CurTypes)
+ setCorrectMemberOfFlag(M, MemberOfFlag);
+ }
+
+ /// Generate all the base pointers, section pointers, sizes and map
/// types for the extracted mappable expressions. Also, for each item that
/// relates with a device pointer, a pair of the relevant declaration and
/// index where it occurs is appended to the device pointers info array.
void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
- BasePointers.clear();
- Pointers.clear();
- Sizes.clear();
- Types.clear();
-
- struct MapInfo {
- /// Kind that defines how a device pointer has to be returned.
- enum ReturnPointerKind {
- // Don't have to return any pointer.
- RPK_None,
- // Pointer is the base of the declaration.
- RPK_Base,
- // Pointer is a member of the base declaration - 'this'
- RPK_Member,
- // Pointer is a reference and a member of the base declaration - 'this'
- RPK_MemberReference,
- };
- OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
- OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
- OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
- ReturnPointerKind ReturnDevicePointer = RPK_None;
- bool IsImplicit = false;
-
- MapInfo() = default;
- MapInfo(
- OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
- ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
- : Components(Components), MapType(MapType),
- MapTypeModifier(MapTypeModifier),
- ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
- };
-
// We have to process the component lists that relate with the same
// declaration in a single chunk so that we can generate the map flags
// correctly. Therefore, we organize all lists in a map.
@@ -6636,7 +7212,7 @@ public:
const ValueDecl *D,
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
- MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
+ bool ReturnDevicePointer, bool IsImplicit) {
const ValueDecl *VD =
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
@@ -6644,33 +7220,39 @@ public:
};
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
- for (auto L : C->component_lists()) {
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
+ for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
- MapInfo::RPK_None, C->isImplicit());
+ /*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
- for (auto L : C->component_lists()) {
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
+ for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
- MapInfo::RPK_None, C->isImplicit());
+ /*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
- for (auto L : C->component_lists()) {
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
+ for (const auto &L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
- MapInfo::RPK_None, C->isImplicit());
+ /*ReturnDevicePointer=*/false, C->isImplicit());
}
// Look at the use_device_ptr clause information and mark the existing map
// entries as such. If there is no map information for an entry in the
// use_device_ptr list, we create one with map type 'alloc' and zero size
- // section. It is the user fault if that was not mapped before.
+ // section. It is the user fault if that was not mapped before. If there is
+ // no map information and the pointer is a struct member, then we defer the
+ // emission of that entry until the whole struct has been processed.
+ llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
+ DeferredInfo;
+
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
- for (auto L : C->component_lists()) {
+ for (const auto *C :
+ this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
+ for (const auto &L : C->component_lists()) {
assert(!L.second.empty() && "Not expecting empty list of components!");
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
VD = cast<ValueDecl>(VD->getCanonicalDecl());
- auto *IE = L.second.back().getAssociatedExpression();
+ const Expr *IE = L.second.back().getAssociatedExpression();
// If the first component is a member expression, we have to look into
// 'this', which maps to null in the map of map information. Otherwise
// look directly for the information.
@@ -6686,113 +7268,135 @@ public:
// If we found a map entry, signal that the pointer has to be returned
// and move on to the next declaration.
if (CI != It->second.end()) {
- CI->ReturnDevicePointer = isa<MemberExpr>(IE)
- ? (VD->getType()->isReferenceType()
- ? MapInfo::RPK_MemberReference
- : MapInfo::RPK_Member)
- : MapInfo::RPK_Base;
+ CI->ReturnDevicePointer = true;
continue;
}
}
// We didn't find any match in our map information - generate a zero
- // size array section.
+ // size array section - if the pointer is a struct member we defer this
+ // action until the whole struct has been processed.
// FIXME: MSVC 2013 seems to require this-> to find member CGF.
- llvm::Value *Ptr =
- this->CGF
- .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
- .getScalarVal();
- BasePointers.push_back({Ptr, VD});
- Pointers.push_back(Ptr);
- Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
- Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+ if (isa<MemberExpr>(IE)) {
+ // Insert the pointer into Info to be processed by
+ // generateInfoForComponentList. Because it is a member pointer
+ // without a pointee, no entry will be generated for it, therefore
+ // we need to generate one after the whole struct has been processed.
+ // Nonetheless, generateInfoForComponentList must be called to take
+ // the pointer into account for the calculation of the range of the
+ // partial struct.
+ InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown,
+ /*ReturnDevicePointer=*/false, C->isImplicit());
+ DeferredInfo[nullptr].emplace_back(IE, VD);
+ } else {
+ llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
+ this->CGF.EmitLValue(IE), IE->getExprLoc());
+ BasePointers.emplace_back(Ptr, VD);
+ Pointers.push_back(Ptr);
+ Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
+ Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+ }
}
+ }
- for (auto &M : Info) {
+ for (const auto &M : Info) {
// We need to know when we generate information for the first component
// associated with a capture, because the mapping flags depend on it.
bool IsFirstComponentList = true;
- for (MapInfo &L : M.second) {
+
+ // Temporary versions of arrays
+ MapBaseValuesArrayTy CurBasePointers;
+ MapValuesArrayTy CurPointers;
+ MapValuesArrayTy CurSizes;
+ MapFlagsArrayTy CurTypes;
+ StructRangeInfoTy PartialStruct;
+
+ for (const MapInfo &L : M.second) {
assert(!L.Components.empty() &&
"Not expecting declaration with no component lists.");
// Remember the current base pointer index.
- unsigned CurrentBasePointersIdx = BasePointers.size();
+ unsigned CurrentBasePointersIdx = CurBasePointers.size();
// FIXME: MSVC 2013 seems to require this-> to find the member method.
this->generateInfoForComponentList(
- L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
- Sizes, Types, IsFirstComponentList, L.IsImplicit);
+ L.MapType, L.MapTypeModifier, L.Components, CurBasePointers,
+ CurPointers, CurSizes, CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
- if (IsFirstComponentList &&
- L.ReturnDevicePointer != MapInfo::RPK_None) {
- // If the pointer is not the base of the map, we need to skip the
- // base. If it is a reference in a member field, we also need to skip
- // the map of the reference.
- if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
- ++CurrentBasePointersIdx;
- if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
- ++CurrentBasePointersIdx;
- }
- assert(BasePointers.size() > CurrentBasePointersIdx &&
+ if (L.ReturnDevicePointer) {
+ assert(CurBasePointers.size() > CurrentBasePointersIdx &&
"Unexpected number of mapped base pointers.");
- auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
+ const ValueDecl *RelevantVD =
+ L.Components.back().getAssociatedDeclaration();
assert(RelevantVD &&
"No relevant declaration related with device pointer??");
- BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
- Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
+ CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
+ CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
}
IsFirstComponentList = false;
}
+
+ // Append any pending zero-length pointers which are struct members and
+ // used with use_device_ptr.
+ auto CI = DeferredInfo.find(M.first);
+ if (CI != DeferredInfo.end()) {
+ for (const DeferredDevicePtrEntryTy &L : CI->second) {
+ llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
+ llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
+ this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
+ CurBasePointers.emplace_back(BasePtr, L.VD);
+ CurPointers.push_back(Ptr);
+ CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
+ // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
+ // value MEMBER_OF=FFFF so that the entry is later updated with the
+ // correct value of MEMBER_OF.
+ CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
+ OMP_MAP_MEMBER_OF);
+ }
+ }
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid())
+ emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
+ PartialStruct);
+
+ // We need to append the results of this capture to what we already have.
+ BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+ Pointers.append(CurPointers.begin(), CurPointers.end());
+ Sizes.append(CurSizes.begin(), CurSizes.end());
+ Types.append(CurTypes.begin(), CurTypes.end());
}
}
- /// \brief Generate the base pointers, section pointers, sizes and map types
+ /// Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
llvm::Value *Arg,
MapBaseValuesArrayTy &BasePointers,
MapValuesArrayTy &Pointers,
- MapValuesArrayTy &Sizes,
- MapFlagsArrayTy &Types) const {
+ MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
+ StructRangeInfoTy &PartialStruct) const {
assert(!Cap->capturesVariableArrayType() &&
"Not expecting to generate map info for a variable array type!");
- BasePointers.clear();
- Pointers.clear();
- Sizes.clear();
- Types.clear();
-
// We need to know when we generating information for the first component
// associated with a capture, because the mapping flags depend on it.
bool IsFirstComponentList = true;
- const ValueDecl *VD =
- Cap->capturesThis()
- ? nullptr
- : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
+ const ValueDecl *VD = Cap->capturesThis()
+ ? nullptr
+ : Cap->getCapturedVar()->getCanonicalDecl();
// If this declaration appears in a is_device_ptr clause we just have to
// pass the pointer by value. If it is a reference to a declaration, we just
- // pass its value, otherwise, if it is a member expression, we need to map
- // 'to' the field.
- if (!VD) {
- auto It = DevPointersMap.find(VD);
- if (It != DevPointersMap.end()) {
- for (auto L : It->second) {
- generateInfoForComponentList(
- /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
- BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
- /*IsImplicit=*/false);
- IsFirstComponentList = false;
- }
- return;
- }
- } else if (DevPointersMap.count(VD)) {
- BasePointers.push_back({Arg, VD});
+ // pass its value.
+ if (DevPointersMap.count(VD)) {
+ BasePointers.emplace_back(Arg, VD);
Pointers.push_back(Arg);
Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
@@ -6800,35 +7404,63 @@ public:
}
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
- for (auto L : C->decl_component_lists(VD)) {
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
+ for (const auto &L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
assert(!L.second.empty() &&
"Not expecting declaration with no component lists.");
- generateInfoForComponentList(
- C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
- Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
+ generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
+ L.second, BasePointers, Pointers, Sizes,
+ Types, PartialStruct, IsFirstComponentList,
+ C->isImplicit());
IsFirstComponentList = false;
}
+ }
- return;
+ /// Generate the base pointers, section pointers, sizes and map types
+ /// associated with the declare target link variables.
+ void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers,
+ MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types) const {
+ // Map other list items in the map clause which are not captured variables
+ // but "declare target link" global variables.,
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
+ for (const auto &L : C->component_lists()) {
+ if (!L.first)
+ continue;
+ const auto *VD = dyn_cast<VarDecl>(L.first);
+ if (!VD)
+ continue;
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ isDeclareTargetDeclaration(VD);
+ if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
+ continue;
+ StructRangeInfoTy PartialStruct;
+ generateInfoForComponentList(
+ C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
+ Pointers, Sizes, Types, PartialStruct,
+ /*IsFirstComponentList=*/true, C->isImplicit());
+ assert(!PartialStruct.Base.isValid() &&
+ "No partial structs for declare target link expected.");
+ }
+ }
}
- /// \brief Generate the default map information for a given capture \a CI,
+ /// Generate the default map information for a given capture \a CI,
/// record field declaration \a RI and captured value \a CV.
void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
const FieldDecl &RI, llvm::Value *CV,
MapBaseValuesArrayTy &CurBasePointers,
MapValuesArrayTy &CurPointers,
MapValuesArrayTy &CurSizes,
- MapFlagsArrayTy &CurMapTypes) {
-
+ MapFlagsArrayTy &CurMapTypes) const {
// Do the default mapping.
if (CI.capturesThis()) {
CurBasePointers.push_back(CV);
CurPointers.push_back(CV);
- const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
+ const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
// Default map type.
CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
@@ -6843,7 +7475,7 @@ public:
} else {
// Pointers are implicitly mapped with a zero size and no flags
// (other than first map that is added for all implicit maps).
- CurMapTypes.push_back(0u);
+ CurMapTypes.push_back(OMP_MAP_NONE);
CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
}
} else {
@@ -6851,30 +7483,30 @@ public:
CurBasePointers.push_back(CV);
CurPointers.push_back(CV);
- const ReferenceType *PtrTy =
- cast<ReferenceType>(RI.getType().getTypePtr());
+ const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
QualType ElementType = PtrTy->getPointeeType();
CurSizes.push_back(CGF.getTypeSize(ElementType));
// The default map type for a scalar/complex type is 'to' because by
// default the value doesn't have to be retrieved. For an aggregate
// type, the default is 'tofrom'.
- CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
- CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
- : OMP_MAP_TO));
+ CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
}
// Every default map produces a single argument which is a target parameter.
CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
+
+ // Add flag stating this is an implicit map.
+ CurMapTypes.back() |= OMP_MAP_IMPLICIT;
}
};
enum OpenMPOffloadingReservedDeviceIDs {
- /// \brief Device ID if the device was not defined, runtime should get it
+ /// Device ID if the device was not defined, runtime should get it
/// from environment variables in the spec.
OMP_DEVICEID_UNDEF = -1,
};
} // anonymous namespace
-/// \brief Emit the arrays used to pass the captures and map information to the
+/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
static void
@@ -6884,8 +7516,8 @@ emitOffloadingArrays(CodeGenFunction &CGF,
MappableExprsHandler::MapValuesArrayTy &Sizes,
MappableExprsHandler::MapFlagsArrayTy &MapTypes,
CGOpenMPRuntime::TargetDataInfo &Info) {
- auto &CGM = CGF.CGM;
- auto &Ctx = CGF.getContext();
+ CodeGenModule &CGM = CGF.CGM;
+ ASTContext &Ctx = CGF.getContext();
// Reset the array information.
Info.clearArrayInfo();
@@ -6895,7 +7527,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
// Detect if we have any capture size requiring runtime evaluation of the
// size so that a constant array could be eventually used.
bool hasRuntimeEvaluationCaptureSize = false;
- for (auto *S : Sizes)
+ for (llvm::Value *S : Sizes)
if (!isa<llvm::Constant>(S)) {
hasRuntimeEvaluationCaptureSize = true;
break;
@@ -6924,48 +7556,53 @@ emitOffloadingArrays(CodeGenFunction &CGF,
// We expect all the sizes to be constant, so we collect them to create
// a constant array.
SmallVector<llvm::Constant *, 16> ConstSizes;
- for (auto S : Sizes)
+ for (llvm::Value *S : Sizes)
ConstSizes.push_back(cast<llvm::Constant>(S));
auto *SizesArrayInit = llvm::ConstantArray::get(
llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
+ std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
auto *SizesArrayGbl = new llvm::GlobalVariable(
CGM.getModule(), SizesArrayInit->getType(),
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
- SizesArrayInit, ".offload_sizes");
+ SizesArrayInit, Name);
SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
Info.SizesArray = SizesArrayGbl;
}
// The map types are always constant so we don't need to generate code to
// fill arrays. Instead, we create an array constant.
+ SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
+ llvm::copy(MapTypes, Mapping.begin());
llvm::Constant *MapTypesArrayInit =
- llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
+ llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
+ std::string MaptypesName =
+ CGM.getOpenMPRuntime().getName({"offload_maptypes"});
auto *MapTypesArrayGbl = new llvm::GlobalVariable(
CGM.getModule(), MapTypesArrayInit->getType(),
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
- MapTypesArrayInit, ".offload_maptypes");
+ MapTypesArrayInit, MaptypesName);
MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
Info.MapTypesArray = MapTypesArrayGbl;
- for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
- llvm::Value *BPVal = *BasePointers[i];
+ for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
+ llvm::Value *BPVal = *BasePointers[I];
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.BasePointersArray, 0, i);
+ Info.BasePointersArray, 0, I);
BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(BPVal, BPAddr);
if (Info.requiresDevicePointerInfo())
- if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
- Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
+ if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
- llvm::Value *PVal = Pointers[i];
+ llvm::Value *PVal = Pointers[I];
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.PointersArray, 0, i);
+ Info.PointersArray, 0, I);
P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
@@ -6976,22 +7613,22 @@ emitOffloadingArrays(CodeGenFunction &CGF,
llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
Info.SizesArray,
/*Idx0=*/0,
- /*Idx1=*/i);
+ /*Idx1=*/I);
Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
CGF.Builder.CreateStore(
- CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
+ CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
SAddr);
}
}
}
}
-/// \brief Emit the arguments to be passed to the runtime library based on the
+/// Emit the arguments to be passed to the runtime library based on the
/// arrays of pointers, sizes and map types.
static void emitOffloadingArraysArgument(
CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
- auto &CGM = CGF.CGM;
+ CodeGenModule &CGM = CGF.CGM;
if (Info.NumberOfPtrs) {
BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
@@ -7023,86 +7660,27 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device,
- ArrayRef<llvm::Value *> CapturedVars) {
+ const Expr *IfCond, const Expr *Device) {
if (!CGF.HaveInsertPoint())
return;
assert(OutlinedFn && "Invalid outlined function!");
- // Fill up the arrays with all the captured variables.
- MappableExprsHandler::MapValuesArrayTy KernelArgs;
- MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
- MappableExprsHandler::MapValuesArrayTy Pointers;
- MappableExprsHandler::MapValuesArrayTy Sizes;
- MappableExprsHandler::MapFlagsArrayTy MapTypes;
-
- MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
- MappableExprsHandler::MapValuesArrayTy CurPointers;
- MappableExprsHandler::MapValuesArrayTy CurSizes;
- MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
-
- // Get mappable expression information.
- MappableExprsHandler MEHandler(D, CGF);
-
- const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
- auto RI = CS.getCapturedRecordDecl()->field_begin();
- auto CV = CapturedVars.begin();
- for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
- CE = CS.capture_end();
- CI != CE; ++CI, ++RI, ++CV) {
- CurBasePointers.clear();
- CurPointers.clear();
- CurSizes.clear();
- CurMapTypes.clear();
-
- // VLA sizes are passed to the outlined region by copy and do not have map
- // information associated.
- if (CI->capturesVariableArrayType()) {
- CurBasePointers.push_back(*CV);
- CurPointers.push_back(*CV);
- CurSizes.push_back(CGF.getTypeSize(RI->getType()));
- // Copy to the device as an argument. No need to retrieve it.
- CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
- MappableExprsHandler::OMP_MAP_TARGET_PARAM);
- } else {
- // If we have any information in the map clause, we use it, otherwise we
- // just do a default mapping.
- MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
- CurSizes, CurMapTypes);
- if (CurBasePointers.empty())
- MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
- CurPointers, CurSizes, CurMapTypes);
- }
- // We expect to have at least an element of information for this capture.
- assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
- assert(CurBasePointers.size() == CurPointers.size() &&
- CurBasePointers.size() == CurSizes.size() &&
- CurBasePointers.size() == CurMapTypes.size() &&
- "Inconsistent map information sizes!");
-
- // The kernel args are always the first elements of the base pointers
- // associated with a capture.
- KernelArgs.push_back(*CurBasePointers.front());
- // We need to append the results of this capture to what we already have.
- BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
- Pointers.append(CurPointers.begin(), CurPointers.end());
- Sizes.append(CurSizes.begin(), CurSizes.end());
- MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
- }
+ const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
+ llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
+ auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ };
+ emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
+ CodeGenFunction::OMPTargetDataInfo InputInfo;
+ llvm::Value *MapTypesArray = nullptr;
// Fill up the pointer arrays and transfer execution to the device.
- auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
- OutlinedFn, OutlinedFnID, &D,
- &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {
- auto &RT = CGF.CGM.getOpenMPRuntime();
- // Emit the offloading arrays.
- TargetDataInfo Info;
- emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
- emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
- Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info);
-
+ auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
+ &MapTypesArray, &CS, RequiresOuterTask,
+ &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
// pointer is used by the runtime library to identify the current target
@@ -7125,13 +7703,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
}
// Emit the number of elements in the offloading arrays.
- llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
+ llvm::Value *PointerNum =
+ CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
// Return value of the runtime offloading call.
llvm::Value *Return;
- auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
- auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
+ llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
+ llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
@@ -7169,25 +7748,30 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// passed to the runtime library - a 32-bit integer with the value zero.
assert(NumThreads && "Thread limit expression should be available along "
"with number of teams.");
- llvm::Value *OffloadingArgs[] = {
- DeviceID, OutlinedFnID,
- PointerNum, Info.BasePointersArray,
- Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, NumTeams,
- NumThreads};
+ llvm::Value *OffloadingArgs[] = {DeviceID,
+ OutlinedFnID,
+ PointerNum,
+ InputInfo.BasePointersArray.getPointer(),
+ InputInfo.PointersArray.getPointer(),
+ InputInfo.SizesArray.getPointer(),
+ MapTypesArray,
+ NumTeams,
+ NumThreads};
Return = CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
- : OMPRTL__tgt_target_teams),
+ createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
+ : OMPRTL__tgt_target_teams),
OffloadingArgs);
} else {
- llvm::Value *OffloadingArgs[] = {
- DeviceID, OutlinedFnID,
- PointerNum, Info.BasePointersArray,
- Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray};
+ llvm::Value *OffloadingArgs[] = {DeviceID,
+ OutlinedFnID,
+ PointerNum,
+ InputInfo.BasePointersArray.getPointer(),
+ InputInfo.PointersArray.getPointer(),
+ InputInfo.SizesArray.getPointer(),
+ MapTypesArray};
Return = CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
- : OMPRTL__tgt_target),
+ createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
+ : OMPRTL__tgt_target),
OffloadingArgs);
}
@@ -7200,17 +7784,120 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
CGF.EmitBlock(OffloadFailedBlock);
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
+ if (RequiresOuterTask) {
+ CapturedVars.clear();
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ }
+ emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
CGF.EmitBranch(OffloadContBlock);
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
};
// Notify that the host version must be executed.
- auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF,
- PrePostActionTy &) {
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn,
- KernelArgs);
+ auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
+ RequiresOuterTask](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ if (RequiresOuterTask) {
+ CapturedVars.clear();
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ }
+ emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
+ };
+
+ auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
+ &CapturedVars, RequiresOuterTask,
+ &CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ // Fill up the arrays with all the captured variables.
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
+ MappableExprsHandler::MapValuesArrayTy Pointers;
+ MappableExprsHandler::MapValuesArrayTy Sizes;
+ MappableExprsHandler::MapFlagsArrayTy MapTypes;
+
+ // Get mappable expression information.
+ MappableExprsHandler MEHandler(D, CGF);
+
+ auto RI = CS.getCapturedRecordDecl()->field_begin();
+ auto CV = CapturedVars.begin();
+ for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+ CE = CS.capture_end();
+ CI != CE; ++CI, ++RI, ++CV) {
+ MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
+ MappableExprsHandler::MapValuesArrayTy CurPointers;
+ MappableExprsHandler::MapValuesArrayTy CurSizes;
+ MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+ MappableExprsHandler::StructRangeInfoTy PartialStruct;
+
+ // VLA sizes are passed to the outlined region by copy and do not have map
+ // information associated.
+ if (CI->capturesVariableArrayType()) {
+ CurBasePointers.push_back(*CV);
+ CurPointers.push_back(*CV);
+ CurSizes.push_back(CGF.getTypeSize(RI->getType()));
+ // Copy to the device as an argument. No need to retrieve it.
+ CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
+ MappableExprsHandler::OMP_MAP_TARGET_PARAM);
+ } else {
+ // If we have any information in the map clause, we use it, otherwise we
+ // just do a default mapping.
+ MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
+ CurSizes, CurMapTypes, PartialStruct);
+ if (CurBasePointers.empty())
+ MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
+ CurPointers, CurSizes, CurMapTypes);
+ }
+ // We expect to have at least an element of information for this capture.
+ assert(!CurBasePointers.empty() &&
+ "Non-existing map pointer for capture!");
+ assert(CurBasePointers.size() == CurPointers.size() &&
+ CurBasePointers.size() == CurSizes.size() &&
+ CurBasePointers.size() == CurMapTypes.size() &&
+ "Inconsistent map information sizes!");
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid())
+ MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
+ CurMapTypes, PartialStruct);
+
+ // We need to append the results of this capture to what we already have.
+ BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+ Pointers.append(CurPointers.begin(), CurPointers.end());
+ Sizes.append(CurSizes.begin(), CurSizes.end());
+ MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
+ }
+ // Map other list items in the map clause which are not captured variables
+ // but "declare target link" global variables.
+ MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
+ MapTypes);
+
+ TargetDataInfo Info;
+ // Fill up the arrays and create the arguments.
+ emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+ emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
+ Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray, Info);
+ InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+ InputInfo.BasePointersArray =
+ Address(Info.BasePointersArray, CGM.getPointerAlign());
+ InputInfo.PointersArray =
+ Address(Info.PointersArray, CGM.getPointerAlign());
+ InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
+ MapTypesArray = Info.MapTypesArray;
+ if (RequiresOuterTask)
+ CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+ else
+ emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+ };
+
+ auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
+ CodeGenFunction &CGF, PrePostActionTy &) {
+ if (RequiresOuterTask) {
+ CodeGenFunction::OMPTargetDataInfo InputInfo;
+ CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
+ } else {
+ emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
+ }
};
// If we have a target function ID it means that we need to support
@@ -7218,14 +7905,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// regardless of the conditional in the if clause if, e.g., the user do not
// specify target triples.
if (OutlinedFnID) {
- if (IfCond)
- emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
- else {
- RegionCodeGenTy ThenRCG(ThenGen);
+ if (IfCond) {
+ emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
+ } else {
+ RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
}
} else {
- RegionCodeGenTy ElseRCG(ElseGen);
+ RegionCodeGenTy ElseRCG(TargetElseGen);
ElseRCG(CGF);
}
}
@@ -7236,13 +7923,13 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
return;
// Codegen OMP target directives that offload compute to the device.
- bool requiresDeviceCodegen =
+ bool RequiresDeviceCodegen =
isa<OMPExecutableDirective>(S) &&
isOpenMPTargetExecutionDirective(
cast<OMPExecutableDirective>(S)->getDirectiveKind());
- if (requiresDeviceCodegen) {
- auto &E = *cast<OMPExecutableDirective>(S);
+ if (RequiresDeviceCodegen) {
+ const auto &E = *cast<OMPExecutableDirective>(S);
unsigned DeviceID;
unsigned FileID;
unsigned Line;
@@ -7255,66 +7942,118 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
ParentName, Line))
return;
- switch (S->getStmtClass()) {
- case Stmt::OMPTargetDirectiveClass:
- CodeGenFunction::EmitOMPTargetDeviceFunction(
- CGM, ParentName, cast<OMPTargetDirective>(*S));
+ switch (E.getDirectiveKind()) {
+ case OMPD_target:
+ CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
+ cast<OMPTargetDirective>(E));
break;
- case Stmt::OMPTargetParallelDirectiveClass:
+ case OMPD_target_parallel:
CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
- CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
+ CGM, ParentName, cast<OMPTargetParallelDirective>(E));
break;
- case Stmt::OMPTargetTeamsDirectiveClass:
+ case OMPD_target_teams:
CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
- CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
+ CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
break;
- case Stmt::OMPTargetTeamsDistributeDirectiveClass:
+ case OMPD_target_teams_distribute:
CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
- CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S));
+ CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
break;
- case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
+ case OMPD_target_teams_distribute_simd:
CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
- CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S));
+ CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
break;
- case Stmt::OMPTargetParallelForDirectiveClass:
+ case OMPD_target_parallel_for:
CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
- CGM, ParentName, cast<OMPTargetParallelForDirective>(*S));
+ CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
break;
- case Stmt::OMPTargetParallelForSimdDirectiveClass:
+ case OMPD_target_parallel_for_simd:
CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
- CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S));
+ CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
break;
- case Stmt::OMPTargetSimdDirectiveClass:
+ case OMPD_target_simd:
CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
- CGM, ParentName, cast<OMPTargetSimdDirective>(*S));
+ CGM, ParentName, cast<OMPTargetSimdDirective>(E));
break;
- default:
+ case OMPD_target_teams_distribute_parallel_for:
+ CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
+ CGM, ParentName,
+ cast<OMPTargetTeamsDistributeParallelForDirective>(E));
+ break;
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ CodeGenFunction::
+ EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+ CGM, ParentName,
+ cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
+ break;
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
return;
}
- if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
- if (!E->hasAssociatedStmt())
+ if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
+ if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
return;
scanForTargetRegionsFunctions(
- cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
- ParentName);
+ E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
return;
}
// If this is a lambda function, look into its body.
- if (auto *L = dyn_cast<LambdaExpr>(S))
+ if (const auto *L = dyn_cast<LambdaExpr>(S))
S = L->getBody();
// Keep looking for target regions recursively.
- for (auto *II : S->children())
+ for (const Stmt *II : S->children())
scanForTargetRegionsFunctions(II, ParentName);
}
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
- auto &FD = *cast<FunctionDecl>(GD.getDecl());
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
// If emitting code for the host, we do not process FD here. Instead we do
// the normal code generation.
@@ -7322,12 +8061,11 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
return false;
// Try to detect target regions in the function.
- scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
+ scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD));
- // We should not emit any function other that the ones created during the
- // scanning. Therefore, we signal that this function is completely dealt
- // with.
- return true;
+ // Do not to emit function if it is not marked as declare target.
+ return !isDeclareTargetDeclaration(FD) &&
+ AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0;
}
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
@@ -7338,33 +8076,101 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
// regions in it. We use the complete variant to produce the kernel name
// mangling.
QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
- if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
- for (auto *Ctor : RD->ctors()) {
+ if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
+ for (const CXXConstructorDecl *Ctor : RD->ctors()) {
StringRef ParentName =
CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
}
- auto *Dtor = RD->getDestructor();
- if (Dtor) {
+ if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
StringRef ParentName =
CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
}
}
- // If we are in target mode, we do not emit any global (declare target is not
- // implemented yet). Therefore we signal that GD was processed in this case.
- return true;
+ // Do not to emit variable if it is not marked as declare target.
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl()));
+ return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link;
+}
+
+void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
+ llvm::Constant *Addr) {
+ if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ isDeclareTargetDeclaration(VD)) {
+ OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
+ StringRef VarName;
+ CharUnits VarSize;
+ llvm::GlobalValue::LinkageTypes Linkage;
+ switch (*Res) {
+ case OMPDeclareTargetDeclAttr::MT_To:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+ VarName = CGM.getMangledName(VD);
+ VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
+ Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ break;
+ case OMPDeclareTargetDeclAttr::MT_Link:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ VarName = Addr->getName();
+ Addr = nullptr;
+ } else {
+ VarName = getAddrOfDeclareTargetLink(VD).getName();
+ Addr =
+ cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ }
+ VarSize = CGM.getPointerSize();
+ Linkage = llvm::GlobalValue::WeakAnyLinkage;
+ break;
+ }
+ OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
+ VarName, Addr, VarSize, Flags, Linkage);
+ }
}
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
- auto *VD = GD.getDecl();
- if (isa<FunctionDecl>(VD))
+ if (isa<FunctionDecl>(GD.getDecl()))
return emitTargetFunctions(GD);
return emitTargetGlobalVariable(GD);
}
+CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
+ CodeGenModule &CGM)
+ : CGM(CGM) {
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
+ CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
+ }
+}
+
+CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
+ if (CGM.getLangOpts().OpenMPIsDevice)
+ CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
+}
+
+bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
+ if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
+ return true;
+
+ const auto *D = cast<FunctionDecl>(GD.getDecl());
+ const FunctionDecl *FD = D->getCanonicalDecl();
+ // Do not to emit function if it is marked as declare target as it was already
+ // emitted.
+ if (isDeclareTargetDeclaration(D)) {
+ if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
+ if (auto *F = dyn_cast_or_null<llvm::Function>(
+ CGM.GetGlobalValue(CGM.getMangledName(GD))))
+ return !F->isDeclaration();
+ return false;
+ }
+ return true;
+ }
+
+ return !AlreadyEmittedTargetFunctions.insert(FD).second;
+}
+
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
// If we have offloading in the current module, we need to emit the entries
// now and register the offloading descriptor.
@@ -7384,7 +8190,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
- auto *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
CodeGenFunction::RunCleanupsScope Scope(CGF);
// Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
@@ -7396,7 +8202,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
- auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
+ llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
}
@@ -7407,16 +8213,16 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
- auto *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *NumTeamsVal =
- (NumTeams)
+ NumTeams
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
CGF.CGM.Int32Ty, /* isSigned = */ true)
: CGF.Builder.getInt32(0);
llvm::Value *ThreadLimitVal =
- (ThreadLimit)
+ ThreadLimit
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
CGF.CGM.Int32Ty, /* isSigned = */ true)
: CGF.Builder.getInt32(0);
@@ -7473,7 +8279,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
}
// Emit the number of elements in the offloading arrays.
- auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
+ llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg,
@@ -7509,7 +8315,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
}
// Emit the number of elements in the offloading arrays.
- auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
+ llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg,
@@ -7596,9 +8402,6 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
OpenMPRTLFunction RTLFn;
switch (D.getDirectiveKind()) {
- default:
- llvm_unreachable("Unexpected standalone target data directive.");
- break;
case OMPD_target_enter_data:
RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
: OMPRTL__tgt_target_data_begin;
@@ -7611,6 +8414,58 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
: OMPRTL__tgt_target_data_update;
break;
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_target:
+ case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams:
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected standalone target data directive.");
+ break;
}
CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
};
@@ -7644,13 +8499,13 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
if (D.hasClausesOfKind<OMPDependClause>())
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
else
- emitInlinedDirective(CGF, OMPD_target_update, ThenGen);
+ emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
};
- if (IfCond)
+ if (IfCond) {
emitOMPIfClause(CGF, IfCond, TargetThenGen,
[](CodeGenFunction &CGF, PrePostActionTy &) {});
- else {
+ } else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
}
@@ -7693,11 +8548,11 @@ static unsigned evaluateCDTSize(const FunctionDecl *FD,
return 0;
ASTContext &C = FD->getASTContext();
QualType CDT;
- if (!RetType.isNull() && !RetType->isVoidType())
+ if (!RetType.isNull() && !RetType->isVoidType()) {
CDT = RetType;
- else {
+ } else {
unsigned Offset = 0;
- if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
+ if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
if (ParamAttrs[Offset].Kind == Vector)
CDT = C.getPointerType(C.getRecordType(MD->getParent()));
++Offset;
@@ -7755,17 +8610,18 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
Masked.push_back('M');
break;
}
- for (auto Mask : Masked) {
- for (auto &Data : ISAData) {
+ for (char Mask : Masked) {
+ for (const ISADataTy &Data : ISAData) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << "_ZGV" << Data.ISA << Mask;
if (!VLENVal) {
Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
evaluateCDTSize(FD, ParamAttrs));
- } else
+ } else {
Out << VLENVal;
- for (auto &ParamAttr : ParamAttrs) {
+ }
+ for (const ParamAttrTy &ParamAttr : ParamAttrs) {
switch (ParamAttr.Kind){
case LinearWithVarStride:
Out << 's' << ParamAttr.StrideOrArg;
@@ -7794,90 +8650,95 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
llvm::Function *Fn) {
ASTContext &C = CGM.getContext();
- FD = FD->getCanonicalDecl();
+ FD = FD->getMostRecentDecl();
// Map params to their positions in function decl.
llvm::DenseMap<const Decl *, unsigned> ParamPositions;
if (isa<CXXMethodDecl>(FD))
- ParamPositions.insert({FD, 0});
+ ParamPositions.try_emplace(FD, 0);
unsigned ParamPos = ParamPositions.size();
- for (auto *P : FD->parameters()) {
- ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
+ for (const ParmVarDecl *P : FD->parameters()) {
+ ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
++ParamPos;
}
- for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
- llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
- // Mark uniform parameters.
- for (auto *E : Attr->uniforms()) {
- E = E->IgnoreParenImpCasts();
- unsigned Pos;
- if (isa<CXXThisExpr>(E))
- Pos = ParamPositions[FD];
- else {
- auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
- ->getCanonicalDecl();
- Pos = ParamPositions[PVD];
- }
- ParamAttrs[Pos].Kind = Uniform;
- }
- // Get alignment info.
- auto NI = Attr->alignments_begin();
- for (auto *E : Attr->aligneds()) {
- E = E->IgnoreParenImpCasts();
- unsigned Pos;
- QualType ParmTy;
- if (isa<CXXThisExpr>(E)) {
- Pos = ParamPositions[FD];
- ParmTy = E->getType();
- } else {
- auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
- ->getCanonicalDecl();
- Pos = ParamPositions[PVD];
- ParmTy = PVD->getType();
+ while (FD) {
+ for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
+ llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
+ // Mark uniform parameters.
+ for (const Expr *E : Attr->uniforms()) {
+ E = E->IgnoreParenImpCasts();
+ unsigned Pos;
+ if (isa<CXXThisExpr>(E)) {
+ Pos = ParamPositions[FD];
+ } else {
+ const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+ ->getCanonicalDecl();
+ Pos = ParamPositions[PVD];
+ }
+ ParamAttrs[Pos].Kind = Uniform;
}
- ParamAttrs[Pos].Alignment =
- (*NI) ? (*NI)->EvaluateKnownConstInt(C)
+ // Get alignment info.
+ auto NI = Attr->alignments_begin();
+ for (const Expr *E : Attr->aligneds()) {
+ E = E->IgnoreParenImpCasts();
+ unsigned Pos;
+ QualType ParmTy;
+ if (isa<CXXThisExpr>(E)) {
+ Pos = ParamPositions[FD];
+ ParmTy = E->getType();
+ } else {
+ const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+ ->getCanonicalDecl();
+ Pos = ParamPositions[PVD];
+ ParmTy = PVD->getType();
+ }
+ ParamAttrs[Pos].Alignment =
+ (*NI)
+ ? (*NI)->EvaluateKnownConstInt(C)
: llvm::APSInt::getUnsigned(
C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
.getQuantity());
- ++NI;
- }
- // Mark linear parameters.
- auto SI = Attr->steps_begin();
- auto MI = Attr->modifiers_begin();
- for (auto *E : Attr->linears()) {
- E = E->IgnoreParenImpCasts();
- unsigned Pos;
- if (isa<CXXThisExpr>(E))
- Pos = ParamPositions[FD];
- else {
- auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
- ->getCanonicalDecl();
- Pos = ParamPositions[PVD];
+ ++NI;
}
- auto &ParamAttr = ParamAttrs[Pos];
- ParamAttr.Kind = Linear;
- if (*SI) {
- if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
- Expr::SE_AllowSideEffects)) {
- if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
- if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
- ParamAttr.Kind = LinearWithVarStride;
- ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
- ParamPositions[StridePVD->getCanonicalDecl()]);
+ // Mark linear parameters.
+ auto SI = Attr->steps_begin();
+ auto MI = Attr->modifiers_begin();
+ for (const Expr *E : Attr->linears()) {
+ E = E->IgnoreParenImpCasts();
+ unsigned Pos;
+ if (isa<CXXThisExpr>(E)) {
+ Pos = ParamPositions[FD];
+ } else {
+ const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+ ->getCanonicalDecl();
+ Pos = ParamPositions[PVD];
+ }
+ ParamAttrTy &ParamAttr = ParamAttrs[Pos];
+ ParamAttr.Kind = Linear;
+ if (*SI) {
+ if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
+ Expr::SE_AllowSideEffects)) {
+ if (const auto *DRE =
+ cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
+ if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
+ ParamAttr.Kind = LinearWithVarStride;
+ ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
+ ParamPositions[StridePVD->getCanonicalDecl()]);
+ }
}
}
}
+ ++SI;
+ ++MI;
}
- ++SI;
- ++MI;
+ llvm::APSInt VLENVal;
+ if (const Expr *VLEN = Attr->getSimdlen())
+ VLENVal = VLEN->EvaluateKnownConstInt(C);
+ OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
+ if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
+ CGM.getTriple().getArch() == llvm::Triple::x86_64)
+ emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
}
- llvm::APSInt VLENVal;
- if (const Expr *VLEN = Attr->getSimdlen())
- VLENVal = VLEN->EvaluateKnownConstInt(C);
- OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
- if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
- CGM.getTriple().getArch() == llvm::Triple::x86_64)
- emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
+ FD = FD->getPreviousDecl();
}
}
@@ -7926,8 +8787,9 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
addFieldToRecordDecl(C, RD, Int64Ty);
RD->completeDefinition();
KmpDimTy = C.getRecordType(RD);
- } else
+ } else {
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
+ }
Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
@@ -7979,18 +8841,19 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
getThreadID(CGF, C->getLocStart()),
CntAddr.getPointer()};
llvm::Value *RTLFn;
- if (C->getDependencyKind() == OMPC_DEPEND_source)
+ if (C->getDependencyKind() == OMPC_DEPEND_source) {
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
- else {
+ } else {
assert(C->getDependencyKind() == OMPC_DEPEND_sink);
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
}
CGF.EmitRuntimeCall(RTLFn, Args);
}
-void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
- ArrayRef<llvm::Value *> Args,
- SourceLocation Loc) const {
+void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *Callee,
+ ArrayRef<llvm::Value *> Args) const {
+ assert(Loc.isValid() && "Outlined function call location must be valid.");
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
@@ -8005,8 +8868,7 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
void CGOpenMPRuntime::emitOutlinedFunctionCall(
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args) const {
- assert(Loc.isValid() && "Outlined function call location must be valid.");
- emitCall(CGF, OutlinedFn, Args, Loc);
+ emitCall(CGF, Loc, OutlinedFn, Args);
}
Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
@@ -8014,3 +8876,303 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
const VarDecl *TargetParam) const {
return CGF.GetAddrOfLocalVar(NativeParam);
}
+
+Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
+ const VarDecl *VD) {
+ return Address::invalid();
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+ bool Tied, unsigned &NumberOfParts) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars,
+ const Expr *IfCond) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitCriticalRegion(
+ CodeGenFunction &CGF, StringRef CriticalName,
+ const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
+ const Expr *Hint) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &MasterOpGen,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
+ CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitSingleRegion(
+ CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
+ SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
+ ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
+ ArrayRef<const Expr *> AssignmentOps) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &OrderedOpGen,
+ SourceLocation Loc,
+ bool IsThreads) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ OpenMPDirectiveKind Kind,
+ bool EmitChecks,
+ bool ForceSimpleCall) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForDispatchInit(
+ CodeGenFunction &CGF, SourceLocation Loc,
+ const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
+ bool Ordered, const DispatchRTInput &DispatchValues) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForStaticInit(
+ CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
+ const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
+ CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ unsigned IVSize,
+ bool IVSigned) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ OpenMPDirectiveKind DKind) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ unsigned IVSize, bool IVSigned,
+ Address IL, Address LB,
+ Address UB, Address ST) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
+ llvm::Value *NumThreads,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
+ const VarDecl *VD,
+ Address VDAddr,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
+ const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
+ CodeGenFunction *CGF) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
+ CodeGenFunction &CGF, QualType VarType, StringRef Name) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
+ ArrayRef<const Expr *> Vars,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPExecutableDirective &D,
+ llvm::Value *TaskFunction,
+ QualType SharedsTy, Address Shareds,
+ const Expr *IfCond,
+ const OMPTaskDataTy &Data) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskLoopCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+ llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
+ const Expr *IfCond, const OMPTaskDataTy &Data) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitReduction(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
+ assert(Options.SimpleReduction && "Only simple reduction is expected.");
+ CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
+ ReductionOps, Options);
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG,
+ unsigned N) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ llvm::Value *ReductionsPtr,
+ LValue SharedLVal) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitCancellationPointCall(
+ CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind CancelRegion) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
+ SourceLocation Loc, const Expr *IfCond,
+ OpenMPDirectiveKind CancelRegion) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
+ const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ llvm::Value *OutlinedFn,
+ llvm::Value *OutlinedFnID,
+ const Expr *IfCond, const Expr *Device) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
+ return false;
+}
+
+llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
+ return nullptr;
+}
+
+void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
+ const Expr *NumTeams,
+ const Expr *ThreadLimit,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetDataCalls(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+ const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+ const Expr *Device) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
+ const OMPLoopDirective &D) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
+ const OMPDependClause *C) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+const VarDecl *
+CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
+ const VarDecl *NativeParam) const {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address
+CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
+ const VarDecl *NativeParam,
+ const VarDecl *TargetParam) const {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 94a143841373..01ff0c20fd66 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -133,7 +133,7 @@ private:
/// Base declarations for the reduction items.
SmallVector<const VarDecl *, 4> BaseDecls;
- /// Emits lvalue for shared expresion.
+ /// Emits lvalue for shared expression.
LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E);
/// Emits upper bound for shared expression (if array section).
LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E);
@@ -191,21 +191,41 @@ public:
}
/// Returns the base declaration of the reduction item.
const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; }
+ /// Returns the base declaration of the reduction item.
+ const Expr *getRefExpr(unsigned N) const { return ClausesData[N].Ref; }
/// Returns true if the initialization of the reduction item uses initializer
/// from declare reduction construct.
bool usesReductionInitializer(unsigned N) const;
};
class CGOpenMPRuntime {
+public:
+ /// Allows to disable automatic handling of functions used in target regions
+ /// as those marked as `omp declare target`.
+ class DisableAutoDeclareTargetRAII {
+ CodeGenModule &CGM;
+ bool SavedShouldMarkAsGlobal;
+
+ public:
+ DisableAutoDeclareTargetRAII(CodeGenModule &CGM);
+ ~DisableAutoDeclareTargetRAII();
+ };
+
protected:
CodeGenModule &CGM;
+ StringRef FirstSeparator, Separator;
- /// \brief Creates offloading entry for the provided entry ID \a ID,
+ /// Constructor allowing to redefine the name separator for the variables.
+ explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
+ StringRef Separator);
+
+ /// Creates offloading entry for the provided entry ID \a ID,
/// address \a Addr, size \a Size, and flags \a Flags.
virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
- uint64_t Size, int32_t Flags = 0);
+ uint64_t Size, int32_t Flags,
+ llvm::GlobalValue::LinkageTypes Linkage);
- /// \brief Helper to emit outlined function for 'target' directive.
+ /// Helper to emit outlined function for 'target' directive.
/// \param D Directive to emit.
/// \param ParentName Name of the function that encloses the target region.
/// \param OutlinedFn Outlined function value to be defined by this call.
@@ -221,7 +241,7 @@ protected:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
- /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
+ /// Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
/// ThenGen();
@@ -232,52 +252,56 @@ protected:
const RegionCodeGenTy &ThenGen,
const RegionCodeGenTy &ElseGen);
- /// \brief Emits object of ident_t type with info for source location.
+ /// Emits object of ident_t type with info for source location.
/// \param Flags Flags for OpenMP location.
///
llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
unsigned Flags = 0);
- /// \brief Returns pointer to ident_t type.
+ /// Returns pointer to ident_t type.
llvm::Type *getIdentTyPointerTy();
- /// \brief Gets thread id value for the current thread.
+ /// Gets thread id value for the current thread.
///
llvm::Value *getThreadID(CodeGenFunction &CGF, SourceLocation Loc);
- /// \brief Get the function name of an outlined region.
+ /// Get the function name of an outlined region.
// The name can be customized depending on the target.
//
virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; }
/// Emits \p Callee function call with arguments \p Args with location \p Loc.
- void emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
- ArrayRef<llvm::Value *> Args = llvm::None,
- SourceLocation Loc = SourceLocation()) const;
+ void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee,
+ ArrayRef<llvm::Value *> Args = llvm::None) const;
+
+ /// Emits address of the word in a memory where current thread id is
+ /// stored.
+ virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
private:
- /// \brief Default const ident_t object used for initialization of all other
+ /// Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource = nullptr;
- /// \brief Map of flags and corresponding default locations.
+ /// Map of flags and corresponding default locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
Address getOrCreateDefaultLocation(unsigned Flags);
+ QualType IdentQTy;
llvm::StructType *IdentTy = nullptr;
- /// \brief Map for SourceLocation and OpenMP runtime library debug locations.
+ /// Map for SourceLocation and OpenMP runtime library debug locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
OpenMPDebugLocMapTy OpenMPDebugLocMap;
- /// \brief The type for a microtask which gets passed to __kmpc_fork_call().
+ /// The type for a microtask which gets passed to __kmpc_fork_call().
/// Original representation is:
/// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
llvm::FunctionType *Kmpc_MicroTy = nullptr;
- /// \brief Stores debug location and ThreadID for the function.
+ /// Stores debug location and ThreadID for the function.
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
llvm::Value *ThreadID;
};
- /// \brief Map of local debug location, ThreadId and functions.
+ /// Map of local debug location, ThreadId and functions.
typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
OpenMPLocThreadIDMapTy;
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap;
@@ -295,20 +319,20 @@ private:
IdentifierInfo *Out = nullptr;
IdentifierInfo *Priv = nullptr;
IdentifierInfo *Orig = nullptr;
- /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
+ /// Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
- /// \brief An ordered map of auto-generated variables to their unique names.
+ /// An ordered map of auto-generated variables to their unique names.
/// It stores variables with the following names: 1) ".gomp_critical_user_" +
/// <critical_section_name> + ".var" for "omp critical" directives; 2)
/// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
/// variables.
llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
InternalVars;
- /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
+ /// Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
llvm::Type *KmpRoutineEntryPtrTy = nullptr;
QualType KmpRoutineEntryPtrQTy;
- /// \brief Type typedef struct kmp_task {
+ /// Type typedef struct kmp_task {
/// void * shareds; /**< pointer to block of pointers to
/// shared vars */
/// kmp_routine_entry_t routine; /**< pointer to routine to call for
@@ -322,7 +346,7 @@ private:
QualType SavedKmpTaskTQTy;
/// Saved kmp_task_t for taskloop-based directive.
QualType SavedKmpTaskloopTQTy;
- /// \brief Type typedef struct kmp_depend_info {
+ /// Type typedef struct kmp_depend_info {
/// kmp_intptr_t base_addr;
/// size_t len;
/// struct {
@@ -337,7 +361,7 @@ private:
/// kmp_int64 st; // stride
/// };
QualType KmpDimTy;
- /// \brief Type struct __tgt_offload_entry{
+ /// Type struct __tgt_offload_entry{
/// void *addr; // Pointer to the offload entry info.
/// // (function or global)
/// char *name; // Name of the function or global.
@@ -365,112 +389,195 @@ private:
/// // entries (non inclusive).
/// };
QualType TgtBinaryDescriptorQTy;
- /// \brief Entity that registers the offloading constants that were emitted so
+ /// Entity that registers the offloading constants that were emitted so
/// far.
class OffloadEntriesInfoManagerTy {
CodeGenModule &CGM;
- /// \brief Number of entries registered so far.
- unsigned OffloadingEntriesNum;
+ /// Number of entries registered so far.
+ unsigned OffloadingEntriesNum = 0;
public:
/// Base class of the entries info.
class OffloadEntryInfo {
public:
- /// Kind of a given entry. Currently, only target regions are
- /// supported.
+ /// Kind of a given entry.
enum OffloadingEntryInfoKinds : unsigned {
- // Entry is a target region.
- OFFLOAD_ENTRY_INFO_TARGET_REGION = 0,
- // Invalid entry info.
- OFFLOAD_ENTRY_INFO_INVALID = ~0u
+ /// Entry is a target region.
+ OffloadingEntryInfoTargetRegion = 0,
+ /// Entry is a declare target variable.
+ OffloadingEntryInfoDeviceGlobalVar = 1,
+ /// Invalid entry info.
+ OffloadingEntryInfoInvalid = ~0u
};
- OffloadEntryInfo()
- : Flags(0), Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {}
+ protected:
+ OffloadEntryInfo() = delete;
+ explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
- int32_t Flags)
+ uint32_t Flags)
: Flags(Flags), Order(Order), Kind(Kind) {}
+ ~OffloadEntryInfo() = default;
+ public:
bool isValid() const { return Order != ~0u; }
unsigned getOrder() const { return Order; }
OffloadingEntryInfoKinds getKind() const { return Kind; }
- int32_t getFlags() const { return Flags; }
- void setFlags(int32_t NewFlags) { Flags = NewFlags; }
+ uint32_t getFlags() const { return Flags; }
+ void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
+ llvm::Constant *getAddress() const {
+ return cast_or_null<llvm::Constant>(Addr);
+ }
+ void setAddress(llvm::Constant *V) {
+ assert(!Addr.pointsToAliveValue() && "Address has been set before!");
+ Addr = V;
+ }
static bool classof(const OffloadEntryInfo *Info) { return true; }
private:
+ /// Address of the entity that has to be mapped for offloading.
+ llvm::WeakTrackingVH Addr;
+
/// Flags associated with the device global.
- int32_t Flags;
+ uint32_t Flags = 0u;
/// Order this entry was emitted.
- unsigned Order;
+ unsigned Order = ~0u;
- OffloadingEntryInfoKinds Kind;
+ OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
};
- /// \brief Return true if a there are no entries defined.
+ /// Return true if a there are no entries defined.
bool empty() const;
- /// \brief Return number of entries defined so far.
+ /// Return number of entries defined so far.
unsigned size() const { return OffloadingEntriesNum; }
- OffloadEntriesInfoManagerTy(CodeGenModule &CGM)
- : CGM(CGM), OffloadingEntriesNum(0) {}
-
- ///
- /// Target region entries related.
- ///
- /// \brief Target region entries info.
- class OffloadEntryInfoTargetRegion : public OffloadEntryInfo {
- // \brief Address of the entity that has to be mapped for offloading.
- llvm::Constant *Addr;
- // \brief Address that can be used as the ID of the entry.
- llvm::Constant *ID;
+ OffloadEntriesInfoManagerTy(CodeGenModule &CGM) : CGM(CGM) {}
+
+ //
+ // Target region entries related.
+ //
+
+ /// Kind of the target registry entry.
+ enum OMPTargetRegionEntryKind : uint32_t {
+ /// Mark the entry as target region.
+ OMPTargetRegionEntryTargetRegion = 0x0,
+ /// Mark the entry as a global constructor.
+ OMPTargetRegionEntryCtor = 0x02,
+ /// Mark the entry as a global destructor.
+ OMPTargetRegionEntryDtor = 0x04,
+ };
+
+ /// Target region entries info.
+ class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
+ /// Address that can be used as the ID of the entry.
+ llvm::Constant *ID = nullptr;
public:
OffloadEntryInfoTargetRegion()
- : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u,
- /*Flags=*/0),
- Addr(nullptr), ID(nullptr) {}
+ : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
explicit OffloadEntryInfoTargetRegion(unsigned Order,
llvm::Constant *Addr,
- llvm::Constant *ID, int32_t Flags)
- : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order, Flags),
- Addr(Addr), ID(ID) {}
+ llvm::Constant *ID,
+ OMPTargetRegionEntryKind Flags)
+ : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
+ ID(ID) {
+ setAddress(Addr);
+ }
- llvm::Constant *getAddress() const { return Addr; }
llvm::Constant *getID() const { return ID; }
- void setAddress(llvm::Constant *V) {
- assert(!Addr && "Address as been set before!");
- Addr = V;
- }
void setID(llvm::Constant *V) {
- assert(!ID && "ID as been set before!");
+ assert(!ID && "ID has been set before!");
ID = V;
}
static bool classof(const OffloadEntryInfo *Info) {
- return Info->getKind() == OFFLOAD_ENTRY_INFO_TARGET_REGION;
+ return Info->getKind() == OffloadingEntryInfoTargetRegion;
}
};
- /// \brief Initialize target region entry.
+
+ /// Initialize target region entry.
void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum,
unsigned Order);
- /// \brief Register target region entry.
+ /// Register target region entry.
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum,
llvm::Constant *Addr, llvm::Constant *ID,
- int32_t Flags);
- /// \brief Return true if a target region entry with the provided
- /// information exists.
+ OMPTargetRegionEntryKind Flags);
+ /// Return true if a target region entry with the provided information
+ /// exists.
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum) const;
/// brief Applies action \a Action on all registered entries.
typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned,
- OffloadEntryInfoTargetRegion &)>
+ const OffloadEntryInfoTargetRegion &)>
OffloadTargetRegionEntryInfoActTy;
void actOnTargetRegionEntriesInfo(
const OffloadTargetRegionEntryInfoActTy &Action);
+ //
+ // Device global variable entries related.
+ //
+
+ /// Kind of the global variable entry..
+ enum OMPTargetGlobalVarEntryKind : uint32_t {
+ /// Mark the entry as a to declare target.
+ OMPTargetGlobalVarEntryTo = 0x0,
+ /// Mark the entry as a to declare target link.
+ OMPTargetGlobalVarEntryLink = 0x1,
+ };
+
+ /// Device global variable entries info.
+ class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
+ /// Type of the global variable.
+ CharUnits VarSize;
+ llvm::GlobalValue::LinkageTypes Linkage;
+
+ public:
+ OffloadEntryInfoDeviceGlobalVar()
+ : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
+ explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
+ OMPTargetGlobalVarEntryKind Flags)
+ : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
+ explicit OffloadEntryInfoDeviceGlobalVar(
+ unsigned Order, llvm::Constant *Addr, CharUnits VarSize,
+ OMPTargetGlobalVarEntryKind Flags,
+ llvm::GlobalValue::LinkageTypes Linkage)
+ : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
+ VarSize(VarSize), Linkage(Linkage) {
+ setAddress(Addr);
+ }
+
+ CharUnits getVarSize() const { return VarSize; }
+ void setVarSize(CharUnits Size) { VarSize = Size; }
+ llvm::GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
+ void setLinkage(llvm::GlobalValue::LinkageTypes LT) { Linkage = LT; }
+ static bool classof(const OffloadEntryInfo *Info) {
+ return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
+ }
+ };
+
+ /// Initialize device global variable entry.
+ void initializeDeviceGlobalVarEntryInfo(StringRef Name,
+ OMPTargetGlobalVarEntryKind Flags,
+ unsigned Order);
+
+ /// Register device global variable entry.
+ void
+ registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
+ CharUnits VarSize,
+ OMPTargetGlobalVarEntryKind Flags,
+ llvm::GlobalValue::LinkageTypes Linkage);
+ /// Checks if the variable with the given name has been registered already.
+ bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
+ return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
+ }
+ /// Applies action \a Action on all registered entries.
+ typedef llvm::function_ref<void(StringRef,
+ const OffloadEntryInfoDeviceGlobalVar &)>
+ OffloadDeviceGlobalVarEntryInfoActTy;
+ void actOnDeviceGlobalVarEntriesInfo(
+ const OffloadDeviceGlobalVarEntryInfoActTy &Action);
+
private:
// Storage for target region entries kind. The storage is to be indexed by
// file ID, device ID, parent function name and line number.
@@ -484,75 +591,79 @@ private:
OffloadEntriesTargetRegionPerDevice;
typedef OffloadEntriesTargetRegionPerDevice OffloadEntriesTargetRegionTy;
OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
+ /// Storage for device global variable entries kind. The storage is to be
+ /// indexed by mangled name.
+ typedef llvm::StringMap<OffloadEntryInfoDeviceGlobalVar>
+ OffloadEntriesDeviceGlobalVarTy;
+ OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
};
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager;
- /// \brief Creates and registers offloading binary descriptor for the current
+ bool ShouldMarkAsGlobal = true;
+ llvm::SmallDenseSet<const FunctionDecl *> AlreadyEmittedTargetFunctions;
+
+ /// Creates and registers offloading binary descriptor for the current
/// compilation unit. The function that does the registration is returned.
llvm::Function *createOffloadingBinaryDescriptorRegistration();
- /// \brief Creates all the offload entries in the current compilation unit
+ /// Creates all the offload entries in the current compilation unit
/// along with the associated metadata.
void createOffloadEntriesAndInfoMetadata();
- /// \brief Loads all the offload entries information from the host IR
+ /// Loads all the offload entries information from the host IR
/// metadata.
void loadOffloadInfoMetadata();
- /// \brief Returns __tgt_offload_entry type.
+ /// Returns __tgt_offload_entry type.
QualType getTgtOffloadEntryQTy();
- /// \brief Returns __tgt_device_image type.
+ /// Returns __tgt_device_image type.
QualType getTgtDeviceImageQTy();
- /// \brief Returns __tgt_bin_desc type.
+ /// Returns __tgt_bin_desc type.
QualType getTgtBinaryDescriptorQTy();
- /// \brief Start scanning from statement \a S and and emit all target regions
+ /// Start scanning from statement \a S and and emit all target regions
/// found along the way.
/// \param S Starting statement.
/// \param ParentName Name of the function declaration that is being scanned.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName);
- /// \brief Build type kmp_routine_entry_t (if not built yet).
+ /// Build type kmp_routine_entry_t (if not built yet).
void emitKmpRoutineEntryT(QualType KmpInt32Ty);
- /// \brief Returns pointer to kmpc_micro type.
+ /// Returns pointer to kmpc_micro type.
llvm::Type *getKmpc_MicroPointerTy();
- /// \brief Returns specified OpenMP runtime function.
+ /// Returns specified OpenMP runtime function.
/// \param Function OpenMP runtime function.
/// \return Specified function.
llvm::Constant *createRuntimeFunction(unsigned Function);
- /// \brief Returns __kmpc_for_static_init_* runtime function for the specified
+ /// Returns __kmpc_for_static_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::Constant *createForStaticInitFunction(unsigned IVSize, bool IVSigned);
- /// \brief Returns __kmpc_dispatch_init_* runtime function for the specified
+ /// Returns __kmpc_dispatch_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned);
- /// \brief Returns __kmpc_dispatch_next_* runtime function for the specified
+ /// Returns __kmpc_dispatch_next_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned);
- /// \brief Returns __kmpc_dispatch_fini_* runtime function for the specified
+ /// Returns __kmpc_dispatch_fini_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::Constant *createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
- /// \brief If the specified mangled name is not in the module, create and
+ /// If the specified mangled name is not in the module, create and
/// return threadprivate cache object. This object is a pointer's worth of
/// storage that's reserved for use by the OpenMP runtime.
/// \param VD Threadprivate variable.
/// \return Cache variable for the specified threadprivate.
llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
- /// \brief Emits address of the word in a memory where current thread id is
- /// stored.
- virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
-
- /// \brief Gets (if variable with the given name already exist) or creates
+ /// Gets (if variable with the given name already exist) or creates
/// internal global variable with the specified Name. The created variable has
/// linkage CommonLinkage by default and is initialized by null value.
/// \param Ty Type of the global variable. If it is exist already the type
@@ -561,10 +672,13 @@ private:
llvm::Constant *getOrCreateInternalVariable(llvm::Type *Ty,
const llvm::Twine &Name);
- /// \brief Set of threadprivate variables with the generated initializer.
+ /// Set of threadprivate variables with the generated initializer.
llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition;
- /// \brief Emits initialization code for the threadprivate variables.
+ /// Set of declare target variables with the generated initializer.
+ llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition;
+
+ /// Emits initialization code for the threadprivate variables.
/// \param VDAddr Address of the global variable \a VD.
/// \param Ctor Pointer to a global init function for \a VD.
/// \param CopyCtor Pointer to a global copy function for \a VD.
@@ -574,7 +688,7 @@ private:
llvm::Value *Ctor, llvm::Value *CopyCtor,
llvm::Value *Dtor, SourceLocation Loc);
- /// \brief Returns corresponding lock object for the specified critical region
+ /// Returns corresponding lock object for the specified critical region
/// name. If the lock object does not exist it is created, otherwise the
/// reference to the existing copy is returned.
/// \param CriticalName Name of the critical region.
@@ -586,7 +700,7 @@ private:
llvm::Value *TaskEntry = nullptr;
llvm::Value *NewTaskNewTaskTTy = nullptr;
LValue TDBase;
- RecordDecl *KmpTaskTQTyRD = nullptr;
+ const RecordDecl *KmpTaskTQTyRD = nullptr;
llvm::Value *TaskDupFn = nullptr;
};
/// Emit task region for the task directive. The task region is emitted in
@@ -617,10 +731,14 @@ private:
Address Shareds, const OMPTaskDataTy &Data);
public:
- explicit CGOpenMPRuntime(CodeGenModule &CGM);
+ explicit CGOpenMPRuntime(CodeGenModule &CGM)
+ : CGOpenMPRuntime(CGM, ".", ".") {}
virtual ~CGOpenMPRuntime() {}
virtual void clear();
+ /// Get the platform-specific name separator.
+ std::string getName(ArrayRef<StringRef> Parts) const;
+
/// Emit code for the specified user defined reduction construct.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF,
const OMPDeclareReductionDecl *D);
@@ -628,7 +746,7 @@ public:
virtual std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl *D);
- /// \brief Emits outlined function for the specified OpenMP parallel directive
+ /// Emits outlined function for the specified OpenMP parallel directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
@@ -640,7 +758,7 @@ public:
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
- /// \brief Emits outlined function for the specified OpenMP teams directive
+ /// Emits outlined function for the specified OpenMP teams directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
@@ -652,7 +770,7 @@ public:
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
- /// \brief Emits outlined function for the OpenMP task directive \a D. This
+ /// Emits outlined function for the OpenMP task directive \a D. This
/// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t*
/// TaskT).
/// \param D OpenMP directive.
@@ -673,11 +791,11 @@ public:
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
bool Tied, unsigned &NumberOfParts);
- /// \brief Cleans up references to the objects in finished function.
+ /// Cleans up references to the objects in finished function.
///
- void functionFinished(CodeGenFunction &CGF);
+ virtual void functionFinished(CodeGenFunction &CGF);
- /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// Emits code for parallel or serial call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
@@ -692,7 +810,7 @@ public:
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond);
- /// \brief Emits a critical region.
+ /// Emits a critical region.
/// \param CriticalName Name of the critical region.
/// \param CriticalOpGen Generator for the statement associated with the given
/// critical region.
@@ -702,24 +820,24 @@ public:
SourceLocation Loc,
const Expr *Hint = nullptr);
- /// \brief Emits a master region.
+ /// Emits a master region.
/// \param MasterOpGen Generator for the statement associated with the given
/// master region.
virtual void emitMasterRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &MasterOpGen,
SourceLocation Loc);
- /// \brief Emits code for a taskyield directive.
+ /// Emits code for a taskyield directive.
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc);
- /// \brief Emit a taskgroup region.
+ /// Emit a taskgroup region.
/// \param TaskgroupOpGen Generator for the statement associated with the
/// given taskgroup region.
virtual void emitTaskgroupRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &TaskgroupOpGen,
SourceLocation Loc);
- /// \brief Emits a single region.
+ /// Emits a single region.
/// \param SingleOpGen Generator for the statement associated with the given
/// single region.
virtual void emitSingleRegion(CodeGenFunction &CGF,
@@ -730,14 +848,14 @@ public:
ArrayRef<const Expr *> SrcExprs,
ArrayRef<const Expr *> AssignmentOps);
- /// \brief Emit an ordered region.
+ /// Emit an ordered region.
/// \param OrderedOpGen Generator for the statement associated with the given
/// ordered region.
virtual void emitOrderedRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &OrderedOpGen,
SourceLocation Loc, bool IsThreads);
- /// \brief Emit an implicit/explicit barrier for OpenMP threads.
+ /// Emit an implicit/explicit barrier for OpenMP threads.
/// \param Kind Directive for which this implicit barrier call must be
/// generated. Must be OMPD_barrier for explicit barrier generation.
/// \param EmitChecks true if need to emit checks for cancellation barriers.
@@ -750,7 +868,7 @@ public:
bool EmitChecks = true,
bool ForceSimpleCall = false);
- /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+ /// Check if the specified \a ScheduleKind is static non-chunked.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
/// \param Chunked True if chunk is specified in the clause.
@@ -758,7 +876,7 @@ public:
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked) const;
- /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+ /// Check if the specified \a ScheduleKind is static non-chunked.
/// This kind of distribute directive is emitted without outer loop.
/// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
/// \param Chunked True if chunk is specified in the clause.
@@ -766,7 +884,7 @@ public:
virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
bool Chunked) const;
- /// \brief Check if the specified \a ScheduleKind is dynamic.
+ /// Check if the specified \a ScheduleKind is dynamic.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
///
@@ -839,7 +957,7 @@ public:
: IVSize(IVSize), IVSigned(IVSigned), Ordered(Ordered), IL(IL), LB(LB),
UB(UB), ST(ST), Chunk(Chunk) {}
};
- /// \brief Call the appropriate runtime routine to initialize it before start
+ /// Call the appropriate runtime routine to initialize it before start
/// of loop.
///
/// This is used only in case of static schedule, when the user did not
@@ -870,7 +988,7 @@ public:
OpenMPDistScheduleClauseKind SchedKind,
const StaticRTInput &Values);
- /// \brief Call the appropriate runtime routine to notify that we finished
+ /// Call the appropriate runtime routine to notify that we finished
/// iteration of the ordered loop with the dynamic scheduling.
///
/// \param CGF Reference to current CodeGenFunction.
@@ -882,7 +1000,7 @@ public:
SourceLocation Loc, unsigned IVSize,
bool IVSigned);
- /// \brief Call the appropriate runtime routine to notify that we finished
+ /// Call the appropriate runtime routine to notify that we finished
/// all the work with current loop.
///
/// \param CGF Reference to current CodeGenFunction.
@@ -911,7 +1029,7 @@ public:
Address IL, Address LB,
Address UB, Address ST);
- /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+ /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
/// \param NumThreads An integer value of threads.
@@ -919,13 +1037,13 @@ public:
llvm::Value *NumThreads,
SourceLocation Loc);
- /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+ /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
virtual void emitProcBindClause(CodeGenFunction &CGF,
OpenMPProcBindClauseKind ProcBind,
SourceLocation Loc);
- /// \brief Returns address of the threadprivate variable for the current
+ /// Returns address of the threadprivate variable for the current
/// thread.
/// \param VD Threadprivate variable.
/// \param VDAddr Address of the global variable \a VD.
@@ -936,7 +1054,11 @@ public:
Address VDAddr,
SourceLocation Loc);
- /// \brief Emit a code for initialization of threadprivate variable. It emits
+ /// Returns the address of the variable marked as declare target with link
+ /// clause.
+ virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD);
+
+ /// Emit a code for initialization of threadprivate variable. It emits
/// a call to runtime library which adds initial value to the newly created
/// threadprivate variable (if it is not constant) and registers destructor
/// for the variable (if any).
@@ -949,6 +1071,14 @@ public:
SourceLocation Loc, bool PerformInit,
CodeGenFunction *CGF = nullptr);
+ /// Emit a code for initialization of declare target variable.
+ /// \param VD Declare target variable.
+ /// \param Addr Address of the global variable \a VD.
+ /// \param PerformInit true if initialization expression is not constant.
+ virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD,
+ llvm::GlobalVariable *Addr,
+ bool PerformInit);
+
/// Creates artificial threadprivate variable with name \p Name and type \p
/// VarType.
/// \param VarType Type of the artificial threadprivate variable.
@@ -957,12 +1087,12 @@ public:
QualType VarType,
StringRef Name);
- /// \brief Emit flush of the variables specified in 'omp flush' directive.
+ /// Emit flush of the variables specified in 'omp flush' directive.
/// \param Vars List of variables to flush.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
SourceLocation Loc);
- /// \brief Emit task region for the task directive. The task region is
+ /// Emit task region for the task directive. The task region is
/// emitted in several steps:
/// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
/// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
@@ -1029,7 +1159,7 @@ public:
llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
const Expr *IfCond, const OMPTaskDataTy &Data);
- /// \brief Emit code for the directive that does not require outlining.
+ /// Emit code for the directive that does not require outlining.
///
/// \param InnermostKind Kind of innermost directive (for simple directives it
/// is a directive itself, for combined - its innermost directive).
@@ -1048,7 +1178,8 @@ public:
/// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
/// or 'operator binop(LHS, RHS)'.
- llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType,
+ llvm::Value *emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc,
+ llvm::Type *ArgsType,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
@@ -1066,7 +1197,7 @@ public:
bool SimpleReduction;
OpenMPDirectiveKind ReductionKind;
};
- /// \brief Emit a code for reduction clause. Next code should be emitted for
+ /// Emit a code for reduction clause. Next code should be emitted for
/// reduction:
/// \code
///
@@ -1160,10 +1291,10 @@ public:
llvm::Value *ReductionsPtr,
LValue SharedLVal);
- /// \brief Emit code for 'taskwait' directive.
+ /// Emit code for 'taskwait' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
- /// \brief Emit code for 'cancellation point' construct.
+ /// Emit code for 'cancellation point' construct.
/// \param CancelRegion Region kind for which the cancellation point must be
/// emitted.
///
@@ -1171,7 +1302,7 @@ public:
SourceLocation Loc,
OpenMPDirectiveKind CancelRegion);
- /// \brief Emit code for 'cancel' construct.
+ /// Emit code for 'cancel' construct.
/// \param IfCond Condition in the associated 'if' clause, if it was
/// specified, nullptr otherwise.
/// \param CancelRegion Region kind for which the cancel must be emitted.
@@ -1180,7 +1311,7 @@ public:
const Expr *IfCond,
OpenMPDirectiveKind CancelRegion);
- /// \brief Emit outilined function for 'target' directive.
+ /// Emit outilined function for 'target' directive.
/// \param D Directive to emit.
/// \param ParentName Name of the function that encloses the target region.
/// \param OutlinedFn Outlined function value to be defined by this call.
@@ -1196,7 +1327,7 @@ public:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
- /// \brief Emit the target offloading code associated with \a D. The emitted
+ /// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
/// \param D Directive to emit.
@@ -1206,36 +1337,39 @@ public:
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
- /// \param CapturedVars Values captured in the current region.
virtual void emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
llvm::Value *OutlinedFnID, const Expr *IfCond,
- const Expr *Device,
- ArrayRef<llvm::Value *> CapturedVars);
+ const Expr *Device);
- /// \brief Emit the target regions enclosed in \a GD function definition or
+ /// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
/// \a GD was dealt with successfully.
/// \param GD Function to scan.
virtual bool emitTargetFunctions(GlobalDecl GD);
- /// \brief Emit the global variable if it is a valid device global variable.
+ /// Emit the global variable if it is a valid device global variable.
/// Returns true if \a GD was dealt with successfully.
/// \param GD Variable declaration to emit.
virtual bool emitTargetGlobalVariable(GlobalDecl GD);
- /// \brief Emit the global \a GD if it is meaningful for the target. Returns
+ /// Checks if the provided global decl \a GD is a declare target variable and
+ /// registers it when emitting code for the host.
+ virtual void registerTargetGlobalVariable(const VarDecl *VD,
+ llvm::Constant *Addr);
+
+ /// Emit the global \a GD if it is meaningful for the target. Returns
/// if it was emitted successfully.
/// \param GD Global to scan.
virtual bool emitTargetGlobal(GlobalDecl GD);
- /// \brief Creates the offloading descriptor in the event any target region
+ /// Creates the offloading descriptor in the event any target region
/// was emitted in the current module and return the function that registers
/// it.
virtual llvm::Function *emitRegistrationFunction();
- /// \brief Emits code for teams call of the \a OutlinedFn with
+ /// Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
/// \param OutlinedFn Outlined function to be run by team masters. Type of
@@ -1248,7 +1382,7 @@ public:
SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars);
- /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
+ /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
/// for num_teams clause.
/// \param NumTeams An integer expression of teams.
@@ -1296,7 +1430,7 @@ public:
bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
};
- /// \brief Emit the target data mapping code associated with \a D.
+ /// Emit the target data mapping code associated with \a D.
/// \param D Directive to emit.
/// \param IfCond Expression evaluated in if clause associated with the
/// target directive, or null if no device clause is used.
@@ -1310,7 +1444,7 @@ public:
const RegionCodeGenTy &CodeGen,
TargetDataInfo &Info);
- /// \brief Emit the data mapping/movement code associated with the directive
+ /// Emit the data mapping/movement code associated with the directive
/// \a D that should be of the form 'target [{enter|exit} data | update]'.
/// \param D Directive to emit.
/// \param IfCond Expression evaluated in if clause associated with the target
@@ -1341,7 +1475,7 @@ public:
/// Translates the native parameter of outlined function if this is required
/// for target.
- /// \param FD Field decl from captured record for the paramater.
+ /// \param FD Field decl from captured record for the parameter.
/// \param NativeParam Parameter itself.
virtual const VarDecl *translateParameter(const FieldDecl *FD,
const VarDecl *NativeParam) const {
@@ -1362,6 +1496,582 @@ public:
emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args = llvm::None) const;
+
+ /// Emits OpenMP-specific function prolog.
+ /// Required for device constructs.
+ virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {}
+
+ /// Gets the OpenMP-specific address of the local variable.
+ virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+ const VarDecl *VD);
+
+ /// Marks the declaration as alread emitted for the device code and returns
+ /// true, if it was marked already, and false, otherwise.
+ bool markAsGlobalTarget(GlobalDecl GD);
+
+};
+
+/// Class supports emissionof SIMD-only code.
+class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
+public:
+ explicit CGOpenMPSIMDRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) {}
+ ~CGOpenMPSIMDRuntime() override {}
+
+ /// Emits outlined function for the specified OpenMP parallel directive
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ llvm::Value *
+ emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Emits outlined function for the specified OpenMP teams directive
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ llvm::Value *
+ emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Emits outlined function for the OpenMP task directive \a D. This
+ /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t*
+ /// TaskT).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param PartIDVar Variable for partition id in the current OpenMP untied
+ /// task region.
+ /// \param TaskTVar Variable for task_t argument.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ /// \param Tied true if task is generated for tied task, false otherwise.
+ /// \param NumberOfParts Number of parts in untied task. Ignored for tied
+ /// tasks.
+ ///
+ llvm::Value *emitTaskOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+ bool Tied, unsigned &NumberOfParts) override;
+
+ /// Emits code for parallel or serial call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
+ ///
+ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars,
+ const Expr *IfCond) override;
+
+ /// Emits a critical region.
+ /// \param CriticalName Name of the critical region.
+ /// \param CriticalOpGen Generator for the statement associated with the given
+ /// critical region.
+ /// \param Hint Value of the 'hint' clause (optional).
+ void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+ const RegionCodeGenTy &CriticalOpGen,
+ SourceLocation Loc,
+ const Expr *Hint = nullptr) override;
+
+ /// Emits a master region.
+ /// \param MasterOpGen Generator for the statement associated with the given
+ /// master region.
+ void emitMasterRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &MasterOpGen,
+ SourceLocation Loc) override;
+
+ /// Emits code for a taskyield directive.
+ void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override;
+
+ /// Emit a taskgroup region.
+ /// \param TaskgroupOpGen Generator for the statement associated with the
+ /// given taskgroup region.
+ void emitTaskgroupRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &TaskgroupOpGen,
+ SourceLocation Loc) override;
+
+ /// Emits a single region.
+ /// \param SingleOpGen Generator for the statement associated with the given
+ /// single region.
+ void emitSingleRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &SingleOpGen, SourceLocation Loc,
+ ArrayRef<const Expr *> CopyprivateVars,
+ ArrayRef<const Expr *> DestExprs,
+ ArrayRef<const Expr *> SrcExprs,
+ ArrayRef<const Expr *> AssignmentOps) override;
+
+ /// Emit an ordered region.
+ /// \param OrderedOpGen Generator for the statement associated with the given
+ /// ordered region.
+ void emitOrderedRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &OrderedOpGen,
+ SourceLocation Loc, bool IsThreads) override;
+
+ /// Emit an implicit/explicit barrier for OpenMP threads.
+ /// \param Kind Directive for which this implicit barrier call must be
+ /// generated. Must be OMPD_barrier for explicit barrier generation.
+ /// \param EmitChecks true if need to emit checks for cancellation barriers.
+ /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+ /// runtime class decides which one to emit (simple or with cancellation
+ /// checks).
+ ///
+ void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool EmitChecks = true,
+ bool ForceSimpleCall = false) override;
+
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ /// Depending on the loop schedule, it is necessary to call some runtime
+ /// routine before start of the OpenMP loop to get the loop upper / lower
+ /// bounds \a LB and \a UB and stride \a ST.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the iteration variable.
+ /// \param Ordered true if loop is ordered, false otherwise.
+ /// \param DispatchValues struct containing llvm values for lower bound, upper
+ /// bound, and chunk expression.
+ /// For the default (nullptr) value, the chunk 1 will be used.
+ ///
+ void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
+ const OpenMPScheduleTy &ScheduleKind,
+ unsigned IVSize, bool IVSigned, bool Ordered,
+ const DispatchRTInput &DispatchValues) override;
+
+ /// Call the appropriate runtime routine to initialize it before start
+ /// of loop.
+ ///
+ /// This is used only in case of static schedule, when the user did not
+ /// specify a ordered clause on the loop construct.
+ /// Depending on the loop schedule, it is necessary to call some runtime
+ /// routine before start of the OpenMP loop to get the loop upper / lower
+ /// bounds LB and UB and stride ST.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param DKind Kind of the directive.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ /// \param Values Input arguments for the construct.
+ ///
+ void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind DKind,
+ const OpenMPScheduleTy &ScheduleKind,
+ const StaticRTInput &Values) override;
+
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
+ /// \param Values Input arguments for the construct.
+ ///
+ void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDistScheduleClauseKind SchedKind,
+ const StaticRTInput &Values) override;
+
+ /// Call the appropriate runtime routine to notify that we finished
+ /// iteration of the ordered loop with the dynamic scheduling.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the iteration variable.
+ ///
+ void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc,
+ unsigned IVSize, bool IVSigned) override;
+
+ /// Call the appropriate runtime routine to notify that we finished
+ /// all the work with current loop.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param DKind Kind of the directive for which the static finish is emitted.
+ ///
+ void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind DKind) override;
+
+ /// Call __kmpc_dispatch_next(
+ /// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
+ /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
+ /// kmp_int[32|64] *p_stride);
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the iteration variable.
+ /// \param IL Address of the output variable in which the flag of the
+ /// last iteration is returned.
+ /// \param LB Address of the output variable in which the lower iteration
+ /// number is returned.
+ /// \param UB Address of the output variable in which the upper iteration
+ /// number is returned.
+ /// \param ST Address of the output variable in which the stride value is
+ /// returned.
+ llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc,
+ unsigned IVSize, bool IVSigned, Address IL,
+ Address LB, Address UB, Address ST) override;
+
+ /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+ /// clause.
+ /// \param NumThreads An integer value of threads.
+ void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
+ SourceLocation Loc) override;
+
+ /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+ /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+ void emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) override;
+
+ /// Returns address of the threadprivate variable for the current
+ /// thread.
+ /// \param VD Threadprivate variable.
+ /// \param VDAddr Address of the global variable \a VD.
+ /// \param Loc Location of the reference to threadprivate var.
+ /// \return Address of the threadprivate variable for the current thread.
+ Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD,
+ Address VDAddr, SourceLocation Loc) override;
+
+ /// Emit a code for initialization of threadprivate variable. It emits
+ /// a call to runtime library which adds initial value to the newly created
+ /// threadprivate variable (if it is not constant) and registers destructor
+ /// for the variable (if any).
+ /// \param VD Threadprivate variable.
+ /// \param VDAddr Address of the global variable \a VD.
+ /// \param Loc Location of threadprivate declaration.
+ /// \param PerformInit true if initialization expression is not constant.
+ llvm::Function *
+ emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr,
+ SourceLocation Loc, bool PerformInit,
+ CodeGenFunction *CGF = nullptr) override;
+
+ /// Creates artificial threadprivate variable with name \p Name and type \p
+ /// VarType.
+ /// \param VarType Type of the artificial threadprivate variable.
+ /// \param Name Name of the artificial threadprivate variable.
+ Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+ QualType VarType,
+ StringRef Name) override;
+
+ /// Emit flush of the variables specified in 'omp flush' directive.
+ /// \param Vars List of variables to flush.
+ void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
+ SourceLocation Loc) override;
+
+ /// Emit task region for the task directive. The task region is
+ /// emitted in several steps:
+ /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+ /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+ /// function:
+ /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+ /// TaskFunction(gtid, tt->part_id, tt->shareds);
+ /// return 0;
+ /// }
+ /// 2. Copy a list of shared variables to field shareds of the resulting
+ /// structure kmp_task_t returned by the previous call (if any).
+ /// 3. Copy a pointer to destructions function to field destructions of the
+ /// resulting structure kmp_task_t.
+ /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
+ /// kmp_task_t *new_task), where new_task is a resulting structure from
+ /// previous items.
+ /// \param D Current task directive.
+ /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+ /// /*part_id*/, captured_struct */*__context*/);
+ /// \param SharedsTy A type which contains references the shared variables.
+ /// \param Shareds Context with the list of shared variables from the \p
+ /// TaskFunction.
+ /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+ /// otherwise.
+ /// \param Data Additional data for task generation like tiednsee, final
+ /// state, list of privates etc.
+ void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPExecutableDirective &D, llvm::Value *TaskFunction,
+ QualType SharedsTy, Address Shareds, const Expr *IfCond,
+ const OMPTaskDataTy &Data) override;
+
+ /// Emit task region for the taskloop directive. The taskloop region is
+ /// emitted in several steps:
+ /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+ /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+ /// function:
+ /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+ /// TaskFunction(gtid, tt->part_id, tt->shareds);
+ /// return 0;
+ /// }
+ /// 2. Copy a list of shared variables to field shareds of the resulting
+ /// structure kmp_task_t returned by the previous call (if any).
+ /// 3. Copy a pointer to destructions function to field destructions of the
+ /// resulting structure kmp_task_t.
+ /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t
+ /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int
+ /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task
+ /// is a resulting structure from
+ /// previous items.
+ /// \param D Current task directive.
+ /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+ /// /*part_id*/, captured_struct */*__context*/);
+ /// \param SharedsTy A type which contains references the shared variables.
+ /// \param Shareds Context with the list of shared variables from the \p
+ /// TaskFunction.
+ /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+ /// otherwise.
+ /// \param Data Additional data for task generation like tiednsee, final
+ /// state, list of privates etc.
+ void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPLoopDirective &D, llvm::Value *TaskFunction,
+ QualType SharedsTy, Address Shareds, const Expr *IfCond,
+ const OMPTaskDataTy &Data) override;
+
+ /// Emit a code for reduction clause. Next code should be emitted for
+ /// reduction:
+ /// \code
+ ///
+ /// static kmp_critical_name lock = { 0 };
+ ///
+ /// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+ /// ...
+ /// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
+ /// ...
+ /// }
+ ///
+ /// ...
+ /// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
+ /// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+ /// RedList, reduce_func, &<lock>)) {
+ /// case 1:
+ /// ...
+ /// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+ /// ...
+ /// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+ /// break;
+ /// case 2:
+ /// ...
+ /// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+ /// ...
+ /// break;
+ /// default:;
+ /// }
+ /// \endcode
+ ///
+ /// \param Privates List of private copies for original reduction arguments.
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ /// \param Options List of options for reduction codegen:
+ /// WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ /// SimpleReduction Emit reduction operation only. Used for omp simd
+ /// directive on the host.
+ /// ReductionKind The kind of reduction to perform.
+ void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps,
+ ReductionOptionsTy Options) override;
+
+ /// Emit a code for initialization of task reduction clause. Next code
+ /// should be emitted for reduction:
+ /// \code
+ ///
+ /// _task_red_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// \endcode
+ ///
+ /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
+ /// \param Data Additional data for task generation like tiedness, final
+ /// state, list of privates, reductions etc.
+ llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ const OMPTaskDataTy &Data) override;
+
+ /// Required to resolve existing problems in the runtime. Emits threadprivate
+ /// variables to store the size of the VLAs/array sections for
+ /// initializer/combiner/finalizer functions + emits threadprivate variable to
+ /// store the pointer to the original reduction item for the custom
+ /// initializer defined by declare reduction construct.
+ /// \param RCG Allows to reuse an existing data for the reductions.
+ /// \param N Reduction item for which fixups must be emitted.
+ void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) override;
+
+ /// Get the address of `void *` type of the privatue copy of the reduction
+ /// item specified by the \p SharedLVal.
+ /// \param ReductionsPtr Pointer to the reduction data returned by the
+ /// emitTaskReductionInit function.
+ /// \param SharedLVal Address of the original reduction item.
+ Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *ReductionsPtr,
+ LValue SharedLVal) override;
+
+ /// Emit code for 'taskwait' directive.
+ void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override;
+
+ /// Emit code for 'cancellation point' construct.
+ /// \param CancelRegion Region kind for which the cancellation point must be
+ /// emitted.
+ ///
+ void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind CancelRegion) override;
+
+ /// Emit code for 'cancel' construct.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
+ /// \param CancelRegion Region kind for which the cancel must be emitted.
+ ///
+ void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const Expr *IfCond,
+ OpenMPDirectiveKind CancelRegion) override;
+
+ /// Emit outilined function for 'target' directive.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Emit the target offloading code associated with \a D. The emitted
+ /// code attempts offloading the execution to the device, an the event of
+ /// a failure it executes the host version outlined in \a OutlinedFn.
+ /// \param D Directive to emit.
+ /// \param OutlinedFn Host version of the code to be offloaded.
+ /// \param OutlinedFnID ID of host version of the code to be offloaded.
+ /// \param IfCond Expression evaluated in if clause associated with the target
+ /// directive, or null if no if clause is used.
+ /// \param Device Expression evaluated in device clause associated with the
+ /// target directive, or null if no device clause is used.
+ void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID,
+ const Expr *IfCond, const Expr *Device) override;
+
+ /// Emit the target regions enclosed in \a GD function definition or
+ /// the function itself in case it is a valid device function. Returns true if
+ /// \a GD was dealt with successfully.
+ /// \param GD Function to scan.
+ bool emitTargetFunctions(GlobalDecl GD) override;
+
+ /// Emit the global variable if it is a valid device global variable.
+ /// Returns true if \a GD was dealt with successfully.
+ /// \param GD Variable declaration to emit.
+ bool emitTargetGlobalVariable(GlobalDecl GD) override;
+
+ /// Emit the global \a GD if it is meaningful for the target. Returns
+ /// if it was emitted successfully.
+ /// \param GD Global to scan.
+ bool emitTargetGlobal(GlobalDecl GD) override;
+
+ /// Creates the offloading descriptor in the event any target region
+ /// was emitted in the current module and return the function that registers
+ /// it.
+ llvm::Function *emitRegistrationFunction() override;
+
+ /// Emits code for teams call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// \param OutlinedFn Outlined function to be run by team masters. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ ///
+ void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ SourceLocation Loc, llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars) override;
+
+ /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
+ /// for num_teams clause.
+ /// \param NumTeams An integer expression of teams.
+ /// \param ThreadLimit An integer expression of threads.
+ void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+ const Expr *ThreadLimit, SourceLocation Loc) override;
+
+ /// Emit the target data mapping code associated with \a D.
+ /// \param D Directive to emit.
+ /// \param IfCond Expression evaluated in if clause associated with the
+ /// target directive, or null if no device clause is used.
+ /// \param Device Expression evaluated in device clause associated with the
+ /// target directive, or null if no device clause is used.
+ /// \param Info A record used to store information that needs to be preserved
+ /// until the region is closed.
+ void emitTargetDataCalls(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D, const Expr *IfCond,
+ const Expr *Device, const RegionCodeGenTy &CodeGen,
+ TargetDataInfo &Info) override;
+
+ /// Emit the data mapping/movement code associated with the directive
+ /// \a D that should be of the form 'target [{enter|exit} data | update]'.
+ /// \param D Directive to emit.
+ /// \param IfCond Expression evaluated in if clause associated with the target
+ /// directive, or null if no if clause is used.
+ /// \param Device Expression evaluated in device clause associated with the
+ /// target directive, or null if no device clause is used.
+ void emitTargetDataStandAloneCall(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ const Expr *IfCond,
+ const Expr *Device) override;
+
+ /// Emit initialization for doacross loop nesting support.
+ /// \param D Loop-based construct used in doacross nesting construct.
+ void emitDoacrossInit(CodeGenFunction &CGF,
+ const OMPLoopDirective &D) override;
+
+ /// Emit code for doacross ordered directive with 'depend' clause.
+ /// \param C 'depend' clause with 'sink|source' dependency kind.
+ void emitDoacrossOrdered(CodeGenFunction &CGF,
+ const OMPDependClause *C) override;
+
+ /// Translates the native parameter of outlined function if this is required
+ /// for target.
+ /// \param FD Field decl from captured record for the parameter.
+ /// \param NativeParam Parameter itself.
+ const VarDecl *translateParameter(const FieldDecl *FD,
+ const VarDecl *NativeParam) const override;
+
+ /// Gets the address of the native argument basing on the address of the
+ /// target-specific parameter.
+ /// \param NativeParam Parameter itself.
+ /// \param TargetParam Corresponding target-specific parameter.
+ Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
+ const VarDecl *TargetParam) const override;
};
} // namespace CodeGen
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 7b2993cfd38d..036b5371fe0b 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -13,33 +13,35 @@
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntimeNVPTX.h"
-#include "clang/AST/DeclOpenMP.h"
#include "CodeGenFunction.h"
+#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/ADT/SmallPtrSet.h"
using namespace clang;
using namespace CodeGen;
namespace {
enum OpenMPRTLFunctionNVPTX {
- /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
+ /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime);
OMPRTL_NVPTX__kmpc_kernel_init,
- /// \brief Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+ /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
OMPRTL_NVPTX__kmpc_kernel_deinit,
- /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
OMPRTL_NVPTX__kmpc_spmd_kernel_init,
- /// \brief Call to void __kmpc_spmd_kernel_deinit();
+ /// Call to void __kmpc_spmd_kernel_deinit();
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
- /// \brief Call to void __kmpc_kernel_prepare_parallel(void
- /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t
+ /// Call to void __kmpc_kernel_prepare_parallel(void
+ /// *outlined_function, int16_t
/// IsOMPRuntimeInitialized);
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
- /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void
- /// ***args, int16_t IsOMPRuntimeInitialized);
+ /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
+ /// int16_t IsOMPRuntimeInitialized);
OMPRTL_NVPTX__kmpc_kernel_parallel,
- /// \brief Call to void __kmpc_kernel_end_parallel();
+ /// Call to void __kmpc_kernel_end_parallel();
OMPRTL_NVPTX__kmpc_kernel_end_parallel,
/// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
/// global_tid);
@@ -47,19 +49,25 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL_NVPTX__kmpc_end_serialized_parallel,
- /// \brief Call to int32_t __kmpc_shuffle_int32(int32_t element,
+ /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
/// int16_t lane_offset, int16_t warp_size);
OMPRTL_NVPTX__kmpc_shuffle_int32,
- /// \brief Call to int64_t __kmpc_shuffle_int64(int64_t element,
+ /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
/// int16_t lane_offset, int16_t warp_size);
OMPRTL_NVPTX__kmpc_shuffle_int64,
- /// \brief Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32
+ /// Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32
/// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
- /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
+ /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32
+ /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+ OMPRTL_NVPTX__kmpc_simd_reduce_nowait,
+ /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
/// int32_t num_vars, size_t reduce_size, void *reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
/// lane_offset, int16_t shortCircuit),
@@ -69,17 +77,38 @@ enum OpenMPRTLFunctionNVPTX {
/// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t
/// index, int32_t width, int32_t reduce))
OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
- /// \brief Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
- OMPRTL_NVPTX__kmpc_end_reduce_nowait
+ /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+ OMPRTL_NVPTX__kmpc_end_reduce_nowait,
+ /// Call to void __kmpc_data_sharing_init_stack();
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
+ /// Call to void __kmpc_data_sharing_init_stack_spmd();
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
+ /// Call to void* __kmpc_data_sharing_push_stack(size_t size,
+ /// int16_t UseSharedMemory);
+ OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
+ /// Call to void __kmpc_data_sharing_pop_stack(void *a);
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
+ /// Call to void __kmpc_begin_sharing_variables(void ***args,
+ /// size_t n_args);
+ OMPRTL_NVPTX__kmpc_begin_sharing_variables,
+ /// Call to void __kmpc_end_sharing_variables();
+ OMPRTL_NVPTX__kmpc_end_sharing_variables,
+ /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
+ OMPRTL_NVPTX__kmpc_get_shared_variables,
+ /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL_NVPTX__kmpc_parallel_level,
+ /// Call to int8_t __kmpc_is_spmd_exec_mode();
+ OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
class NVPTXActionTy final : public PrePostActionTy {
- llvm::Value *EnterCallee;
+ llvm::Value *EnterCallee = nullptr;
ArrayRef<llvm::Value *> EnterArgs;
- llvm::Value *ExitCallee;
+ llvm::Value *ExitCallee = nullptr;
ArrayRef<llvm::Value *> ExitArgs;
- bool Conditional;
+ bool Conditional = false;
llvm::BasicBlock *ContBlock = nullptr;
public:
@@ -109,21 +138,21 @@ public:
}
};
-// A class to track the execution mode when codegening directives within
-// a target region. The appropriate mode (generic/spmd) is set on entry
-// to the target region and used by containing directives such as 'parallel'
-// to emit optimized code.
+/// A class to track the execution mode when codegening directives within
+/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
+/// to the target region and used by containing directives such as 'parallel'
+/// to emit optimized code.
class ExecutionModeRAII {
private:
CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
public:
- ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode,
- CGOpenMPRuntimeNVPTX::ExecutionMode NewMode)
+ ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
: Mode(Mode) {
SavedMode = Mode;
- Mode = NewMode;
+ Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
+ : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
}
~ExecutionModeRAII() { Mode = SavedMode; }
};
@@ -149,6 +178,353 @@ enum NamedBarrier : unsigned {
/// barrier.
NB_Parallel = 1,
};
+
+/// Get the list of variables that can escape their declaration context.
+class CheckVarsEscapingDeclContext final
+ : public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
+ CodeGenFunction &CGF;
+ llvm::SetVector<const ValueDecl *> EscapedDecls;
+ llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
+ llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
+ RecordDecl *GlobalizedRD = nullptr;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+ bool AllEscaped = false;
+ bool IsForCombinedParallelRegion = false;
+
+ static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
+ isDeclareTargetDeclaration(const ValueDecl *VD) {
+ for (const Decl *D : VD->redecls()) {
+ if (!D->hasAttrs())
+ continue;
+ if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
+ return Attr->getMapType();
+ }
+ return llvm::None;
+ }
+
+ void markAsEscaped(const ValueDecl *VD) {
+ // Do not globalize declare target variables.
+ if (!isa<VarDecl>(VD) || isDeclareTargetDeclaration(VD))
+ return;
+ VD = cast<ValueDecl>(VD->getCanonicalDecl());
+ // Variables captured by value must be globalized.
+ if (auto *CSI = CGF.CapturedStmtInfo) {
+ if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
+ // Check if need to capture the variable that was already captured by
+ // value in the outer region.
+ if (!IsForCombinedParallelRegion) {
+ if (!FD->hasAttrs())
+ return;
+ const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
+ if (!Attr)
+ return;
+ if (!isOpenMPPrivate(
+ static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) ||
+ Attr->getCaptureKind() == OMPC_map)
+ return;
+ }
+ if (!FD->getType()->isReferenceType()) {
+ assert(!VD->getType()->isVariablyModifiedType() &&
+ "Parameter captured by value with variably modified type");
+ EscapedParameters.insert(VD);
+ } else if (!IsForCombinedParallelRegion) {
+ return;
+ }
+ }
+ }
+ if ((!CGF.CapturedStmtInfo ||
+ (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
+ VD->getType()->isReferenceType())
+ // Do not globalize variables with reference type.
+ return;
+ if (VD->getType()->isVariablyModifiedType())
+ EscapedVariableLengthDecls.insert(VD);
+ else
+ EscapedDecls.insert(VD);
+ }
+
+ void VisitValueDecl(const ValueDecl *VD) {
+ if (VD->getType()->isLValueReferenceType())
+ markAsEscaped(VD);
+ if (const auto *VarD = dyn_cast<VarDecl>(VD)) {
+ if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
+ const bool SavedAllEscaped = AllEscaped;
+ AllEscaped = VD->getType()->isLValueReferenceType();
+ Visit(VarD->getInit());
+ AllEscaped = SavedAllEscaped;
+ }
+ }
+ }
+ void VisitOpenMPCapturedStmt(const CapturedStmt *S,
+ ArrayRef<OMPClause *> Clauses,
+ bool IsCombinedParallelRegion) {
+ if (!S)
+ return;
+ for (const CapturedStmt::Capture &C : S->captures()) {
+ if (C.capturesVariable() && !C.capturesVariableByCopy()) {
+ const ValueDecl *VD = C.getCapturedVar();
+ bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
+ if (IsCombinedParallelRegion) {
+ // Check if the variable is privatized in the combined construct and
+ // those private copies must be shared in the inner parallel
+ // directive.
+ IsForCombinedParallelRegion = false;
+ for (const OMPClause *C : Clauses) {
+ if (!isOpenMPPrivate(C->getClauseKind()) ||
+ C->getClauseKind() == OMPC_reduction ||
+ C->getClauseKind() == OMPC_linear ||
+ C->getClauseKind() == OMPC_private)
+ continue;
+ ArrayRef<const Expr *> Vars;
+ if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
+ Vars = PC->getVarRefs();
+ else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C))
+ Vars = PC->getVarRefs();
+ else
+ llvm_unreachable("Unexpected clause.");
+ for (const auto *E : Vars) {
+ const Decl *D =
+ cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
+ if (D == VD->getCanonicalDecl()) {
+ IsForCombinedParallelRegion = true;
+ break;
+ }
+ }
+ if (IsForCombinedParallelRegion)
+ break;
+ }
+ }
+ markAsEscaped(VD);
+ if (isa<OMPCapturedExprDecl>(VD))
+ VisitValueDecl(VD);
+ IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
+ }
+ }
+ }
+
+ typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
+ static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
+ return P1.first > P2.first;
+ }
+
+ void buildRecordForGlobalizedVars() {
+ assert(!GlobalizedRD &&
+ "Record for globalized variables is built already.");
+ if (EscapedDecls.empty())
+ return;
+ ASTContext &C = CGF.getContext();
+ SmallVector<VarsDataTy, 4> GlobalizedVars;
+ for (const ValueDecl *D : EscapedDecls)
+ GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
+ std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
+ stable_sort_comparator);
+ // Build struct _globalized_locals_ty {
+ // /* globalized vars */
+ // };
+ GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
+ GlobalizedRD->startDefinition();
+ for (const auto &Pair : GlobalizedVars) {
+ const ValueDecl *VD = Pair.second;
+ QualType Type = VD->getType();
+ if (Type->isLValueReferenceType())
+ Type = C.getPointerType(Type.getNonReferenceType());
+ else
+ Type = Type.getNonReferenceType();
+ SourceLocation Loc = VD->getLocation();
+ auto *Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ GlobalizedRD->addDecl(Field);
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+ E(VD->getAttrs().end());
+ I != E; ++I)
+ Field->addAttr(*I);
+ }
+ MappedDeclsFields.try_emplace(VD, Field);
+ }
+ GlobalizedRD->completeDefinition();
+ }
+
+public:
+ CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {}
+ virtual ~CheckVarsEscapingDeclContext() = default;
+ void VisitDeclStmt(const DeclStmt *S) {
+ if (!S)
+ return;
+ for (const Decl *D : S->decls())
+ if (const auto *VD = dyn_cast_or_null<ValueDecl>(D))
+ VisitValueDecl(VD);
+ }
+ void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
+ if (!D)
+ return;
+ if (!D->hasAssociatedStmt())
+ return;
+ if (const auto *S =
+ dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) {
+ // Do not analyze directives that do not actually require capturing,
+ // like `omp for` or `omp simd` directives.
+ llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+ getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
+ if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
+ VisitStmt(S->getCapturedStmt());
+ return;
+ }
+ VisitOpenMPCapturedStmt(
+ S, D->clauses(),
+ CaptureRegions.back() == OMPD_parallel &&
+ isOpenMPDistributeDirective(D->getDirectiveKind()));
+ }
+ }
+ void VisitCapturedStmt(const CapturedStmt *S) {
+ if (!S)
+ return;
+ for (const CapturedStmt::Capture &C : S->captures()) {
+ if (C.capturesVariable() && !C.capturesVariableByCopy()) {
+ const ValueDecl *VD = C.getCapturedVar();
+ markAsEscaped(VD);
+ if (isa<OMPCapturedExprDecl>(VD))
+ VisitValueDecl(VD);
+ }
+ }
+ }
+ void VisitLambdaExpr(const LambdaExpr *E) {
+ if (!E)
+ return;
+ for (const LambdaCapture &C : E->captures()) {
+ if (C.capturesVariable()) {
+ if (C.getCaptureKind() == LCK_ByRef) {
+ const ValueDecl *VD = C.getCapturedVar();
+ markAsEscaped(VD);
+ if (E->isInitCapture(&C) || isa<OMPCapturedExprDecl>(VD))
+ VisitValueDecl(VD);
+ }
+ }
+ }
+ }
+ void VisitBlockExpr(const BlockExpr *E) {
+ if (!E)
+ return;
+ for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {
+ if (C.isByRef()) {
+ const VarDecl *VD = C.getVariable();
+ markAsEscaped(VD);
+ if (isa<OMPCapturedExprDecl>(VD) || VD->isInitCapture())
+ VisitValueDecl(VD);
+ }
+ }
+ }
+ void VisitCallExpr(const CallExpr *E) {
+ if (!E)
+ return;
+ for (const Expr *Arg : E->arguments()) {
+ if (!Arg)
+ continue;
+ if (Arg->isLValue()) {
+ const bool SavedAllEscaped = AllEscaped;
+ AllEscaped = true;
+ Visit(Arg);
+ AllEscaped = SavedAllEscaped;
+ } else {
+ Visit(Arg);
+ }
+ }
+ Visit(E->getCallee());
+ }
+ void VisitDeclRefExpr(const DeclRefExpr *E) {
+ if (!E)
+ return;
+ const ValueDecl *VD = E->getDecl();
+ if (AllEscaped)
+ markAsEscaped(VD);
+ if (isa<OMPCapturedExprDecl>(VD))
+ VisitValueDecl(VD);
+ else if (const auto *VarD = dyn_cast<VarDecl>(VD))
+ if (VarD->isInitCapture())
+ VisitValueDecl(VD);
+ }
+ void VisitUnaryOperator(const UnaryOperator *E) {
+ if (!E)
+ return;
+ if (E->getOpcode() == UO_AddrOf) {
+ const bool SavedAllEscaped = AllEscaped;
+ AllEscaped = true;
+ Visit(E->getSubExpr());
+ AllEscaped = SavedAllEscaped;
+ } else {
+ Visit(E->getSubExpr());
+ }
+ }
+ void VisitImplicitCastExpr(const ImplicitCastExpr *E) {
+ if (!E)
+ return;
+ if (E->getCastKind() == CK_ArrayToPointerDecay) {
+ const bool SavedAllEscaped = AllEscaped;
+ AllEscaped = true;
+ Visit(E->getSubExpr());
+ AllEscaped = SavedAllEscaped;
+ } else {
+ Visit(E->getSubExpr());
+ }
+ }
+ void VisitExpr(const Expr *E) {
+ if (!E)
+ return;
+ bool SavedAllEscaped = AllEscaped;
+ if (!E->isLValue())
+ AllEscaped = false;
+ for (const Stmt *Child : E->children())
+ if (Child)
+ Visit(Child);
+ AllEscaped = SavedAllEscaped;
+ }
+ void VisitStmt(const Stmt *S) {
+ if (!S)
+ return;
+ for (const Stmt *Child : S->children())
+ if (Child)
+ Visit(Child);
+ }
+
+ /// Returns the record that handles all the escaped local variables and used
+ /// instead of their original storage.
+ const RecordDecl *getGlobalizedRecord() {
+ if (!GlobalizedRD)
+ buildRecordForGlobalizedVars();
+ return GlobalizedRD;
+ }
+
+ /// Returns the field in the globalized record for the escaped variable.
+ const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
+ assert(GlobalizedRD &&
+ "Record for globalized variables must be generated already.");
+ auto I = MappedDeclsFields.find(VD);
+ if (I == MappedDeclsFields.end())
+ return nullptr;
+ return I->getSecond();
+ }
+
+ /// Returns the list of the escaped local variables/parameters.
+ ArrayRef<const ValueDecl *> getEscapedDecls() const {
+ return EscapedDecls.getArrayRef();
+ }
+
+ /// Checks if the escaped local variable is actually a parameter passed by
+ /// value.
+ const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
+ return EscapedParameters;
+ }
+
+ /// Returns the list of the escaped variables with the variably modified
+ /// types.
+ ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const {
+ return EscapedVariableLengthDecls.getArrayRef();
+ }
+};
} // anonymous namespace
/// Get the GPU warp size.
@@ -223,12 +599,12 @@ static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
/// CTA. The threads in the last warp are reserved for master execution.
/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
- bool IsInSpmdExecutionMode = false) {
+ bool IsInSPMDExecutionMode = false) {
CGBuilderTy &Bld = CGF.Builder;
- return IsInSpmdExecutionMode
+ return IsInSPMDExecutionMode
? getNVPTXNumThreads(CGF)
- : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
- "thread_limit");
+ : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
}
/// Get the thread id of the OMP master thread.
@@ -243,96 +619,295 @@ static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
// We assume that the warp size is a power of 2.
- llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+ llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
- return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+ return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
Bld.CreateNot(Mask), "master_tid");
}
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
- CodeGenModule &CGM)
- : WorkerFn(nullptr), CGFI(nullptr) {
+ CodeGenModule &CGM, SourceLocation Loc)
+ : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
+ Loc(Loc) {
createWorkerFunction(CGM);
}
void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
CodeGenModule &CGM) {
// Create an worker function with no arguments.
- CGFI = &CGM.getTypes().arrangeNullaryFunction();
WorkerFn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
- /* placeholder */ "_worker", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ /*placeholder=*/"_worker", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
+ WorkerFn->setDoesNotRecurse();
}
-bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
- return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
+CGOpenMPRuntimeNVPTX::ExecutionMode
+CGOpenMPRuntimeNVPTX::getExecutionMode() const {
+ return CurrentExecutionMode;
+}
+
+static CGOpenMPRuntimeNVPTX::DataSharingMode
+getDataSharingMode(CodeGenModule &CGM) {
+ return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeNVPTX::CUDA
+ : CGOpenMPRuntimeNVPTX::Generic;
+}
+
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one.
+static const Stmt *getSingleCompoundChild(const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body))
+ if (C->size() == 1)
+ return C->body_front();
+ return Body;
+}
+
+/// Check if the parallel directive has an 'if' clause with non-constant or
+/// false condition. Also, check if the number of threads is strictly specified
+/// and run those directives in non-SPMD mode.
+static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ if (D.hasClausesOfKind<OMPNumThreadsClause>())
+ return true;
+ for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
+ OpenMPDirectiveKind NameModifier = C->getNameModifier();
+ if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown)
+ continue;
+ const Expr *Cond = C->getCondition();
+ bool Result;
+ if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result)
+ return true;
+ }
+ return false;
}
-static CGOpenMPRuntimeNVPTX::ExecutionMode
-getExecutionModeForDirective(CodeGenModule &CGM,
- const OMPExecutableDirective &D) {
+/// Check for inner (nested) SPMD construct, if any
+static bool hasNestedSPMDDirective(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
+ const Stmt *ChildStmt = getSingleCompoundChild(Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
+ return true;
+ if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NND))
+ return true;
+ if (DKind == OMPD_distribute) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (!ChildStmt)
+ return false;
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ return isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NND);
+ }
+ }
+ }
+ }
+ return false;
+ case OMPD_target_teams:
+ if (isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
+ return true;
+ if (DKind == OMPD_distribute) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ return isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NND);
+ }
+ }
+ return false;
+ case OMPD_target_teams_distribute:
+ return isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
+ case OMPD_target_simd:
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return false;
+}
+
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
- return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
+ case OMPD_target_teams_distribute:
+ return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
- return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
- default:
- llvm_unreachable("Unsupported directive on NVPTX device.");
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ return !hasParallelIfNumThreadsClause(Ctx, D);
+ case OMPD_target_simd:
+ case OMPD_target_teams_distribute_simd:
+ return false;
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_unknown:
+ break;
}
- llvm_unreachable("Unsupported directive on NVPTX device.");
+ llvm_unreachable(
+ "Unknown programming model for OpenMP directive on NVPTX target.");
}
-void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
+void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode,
- CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
EntryFunctionState EST;
- WorkerFunctionState WST(CGM);
+ WorkerFunctionState WST(CGM, D.getLocStart());
Work.clear();
WrapperFunctionsMap.clear();
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
- CGOpenMPRuntimeNVPTX &RT;
CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
public:
- NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
- CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+ NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
- : RT(RT), EST(EST), WST(WST) {}
+ : EST(EST), WST(WST) {}
void Enter(CodeGenFunction &CGF) override {
- RT.emitGenericEntryHeader(CGF, EST, WST);
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitNonSPMDEntryHeader(CGF, EST, WST);
}
void Exit(CodeGenFunction &CGF) override {
- RT.emitGenericEntryFooter(CGF, EST);
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitNonSPMDEntryFooter(CGF, EST);
}
- } Action(*this, EST, WST);
+ } Action(EST, WST);
CodeGen.setAction(Action);
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
- // Create the worker function
- emitWorkerFunction(WST);
-
// Now change the name of the worker function to correspond to this target
// region's entry function.
- WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+ WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
+
+ // Create the worker function
+ emitWorkerFunction(WST);
}
// Setup NVPTX threads for master-worker OpenMP scheme.
-void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
+void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
EntryFunctionState &EST,
WorkerFunctionState &WST) {
CGBuilderTy &Bld = CGF.Builder;
@@ -342,20 +917,22 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
EST.ExitBB = CGF.createBasicBlock(".exit");
- auto *IsWorker =
+ llvm::Value *IsWorker =
Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
CGF.EmitBlock(WorkerBB);
- emitCall(CGF, WST.WorkerFn);
+ emitCall(CGF, WST.Loc, WST.WorkerFn);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(MasterCheckBB);
- auto *IsMaster =
+ llvm::Value *IsMaster =
Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
CGF.EmitBlock(MasterBB);
+ IsInTargetMasterThreadRegion = true;
+ // SEQUENTIAL (MASTER) REGION START
// First action in sequential region:
// Initialize the state of the OpenMP runtime library on the GPU.
// TODO: Optimize runtime initialization and pass in correct value.
@@ -363,10 +940,23 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
Bld.getInt16(/*RequiresOMPRuntime=*/1)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
+
+ // For data sharing, we need to initialize the stack.
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
+
+ emitGenericVarsProlog(CGF, WST.Loc);
}
-void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
+void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
EntryFunctionState &EST) {
+ IsInTargetMasterThreadRegion = false;
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ emitGenericVarsEpilog(CGF);
+
if (!EST.ExitBB)
EST.ExitBB = CGF.createBasicBlock(".exit");
@@ -388,14 +978,13 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
EST.ExitBB = nullptr;
}
-void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
+void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode,
- CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd);
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -410,10 +999,10 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
const OMPExecutableDirective &D)
: RT(RT), EST(EST), D(D) {}
void Enter(CodeGenFunction &CGF) override {
- RT.emitSpmdEntryHeader(CGF, EST, D);
+ RT.emitSPMDEntryHeader(CGF, EST, D);
}
void Exit(CodeGenFunction &CGF) override {
- RT.emitSpmdEntryFooter(CGF, EST);
+ RT.emitSPMDEntryFooter(CGF, EST);
}
} Action(*this, EST, D);
CodeGen.setAction(Action);
@@ -421,10 +1010,10 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
IsOffloadEntry, CodeGen);
}
-void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
+void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D) {
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// Setup BBs in entry function.
llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
@@ -433,18 +1022,30 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
// Initialize the OMP state in the runtime; called by all active threads.
// TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
// based on code analysis of the target region.
- llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true),
+ llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
/*RequiresOMPRuntime=*/Bld.getInt16(1),
/*RequiresDataSharing=*/Bld.getInt16(1)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+
+ // For data sharing, we need to initialize the stack.
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+
CGF.EmitBranch(ExecuteBB);
CGF.EmitBlock(ExecuteBB);
+
+ IsInTargetMasterThreadRegion = true;
}
-void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
+void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
EntryFunctionState &EST) {
+ IsInTargetMasterThreadRegion = false;
+ if (!CGF.HaveInsertPoint())
+ return;
+
if (!EST.ExitBB)
EST.ExitBB = CGF.createBasicBlock(".exit");
@@ -468,19 +1069,21 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
// 'generic', the runtime reserves one warp for the master, otherwise, all
// warps participate in parallel work.
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
- CGOpenMPRuntimeNVPTX::ExecutionMode Mode) {
- (void)new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::WeakAnyLinkage,
- llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode"));
+ bool Mode) {
+ auto *GVMode =
+ new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
+ Twine(Name, "_exec_mode"));
+ CGM.addCompilerUsedGlobal(GVMode);
}
void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
ASTContext &Ctx = CGM.getContext();
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
+ WST.Loc, WST.Loc);
emitWorkerLoop(CGF, WST);
CGF.FinishFunction();
}
@@ -519,19 +1122,16 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
- // Set up shared arguments
- Address SharedArgs =
- CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args");
// TODO: Optimize runtime initialization and pass in correct value.
- llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(),
+ llvm::Value *Args[] = {WorkFn.getPointer(),
/*RequiresOMPRuntime=*/Bld.getInt16(1)};
llvm::Value *Ret = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
// On termination condition (workid == 0), exit loop.
- llvm::Value *ShouldTerminate =
- Bld.CreateIsNull(Bld.CreateLoad(WorkFn), "should_terminate");
+ llvm::Value *WorkID = Bld.CreateLoad(WorkFn);
+ llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate");
Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
// Activate requested workers.
@@ -543,13 +1143,10 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Signal start of parallel region.
CGF.EmitBlock(ExecuteBB);
- // Current context
- ASTContext &Ctx = CGF.getContext();
-
// Process work items: outlined parallel functions.
- for (auto *W : Work) {
+ for (llvm::Function *W : Work) {
// Try to match this outlined function.
- auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
+ llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
llvm::Value *WorkFnMatch =
Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
@@ -562,23 +1159,33 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.EmitBlock(ExecuteFNBB);
// Insert call to work function via shared wrapper. The shared
- // wrapper takes exactly three arguments:
+ // wrapper takes two arguments:
// - the parallelism level;
- // - the master thread ID;
- // - the list of references to shared arguments.
- //
- // TODO: Assert that the function is a wrapper function.s
- Address Capture = CGF.EmitLoadOfPointer(SharedArgs,
- Ctx.getPointerType(
- Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>());
- emitCall(CGF, W, {Bld.getInt16(/*ParallelLevel=*/0),
- getMasterThreadID(CGF), Capture.getPointer()});
+ // - the thread ID;
+ emitCall(CGF, WST.Loc, W,
+ {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
// Go to end of parallel region.
CGF.EmitBranch(TerminateBB);
CGF.EmitBlock(CheckNextBB);
}
+ // Default case: call to outlined function through pointer if the target
+ // region makes a declare target call that may contain an orphaned parallel
+ // directive.
+ auto *ParallelFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
+ /*isVarArg=*/false)
+ ->getPointerTo();
+ llvm::Value *WorkFnCast = Bld.CreateBitCast(WorkID, ParallelFnTy);
+ // Insert call to work function via shared wrapper. The shared
+ // wrapper takes two arguments:
+ // - the parallelism level;
+ // - the thread ID;
+ emitCall(CGF, WST.Loc, WorkFnCast,
+ {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+ // Go to end of parallel region.
+ CGF.EmitBranch(TerminateBB);
// Signal end of parallel region.
CGF.EmitBlock(TerminateBB);
@@ -597,7 +1204,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.EmitBlock(ExitBB);
}
-/// \brief Returns specified OpenMP runtime function for the current OpenMP
+/// Returns specified OpenMP runtime function for the current OpenMP
/// implementation. Specialized for the NVPTX device.
/// \param Function OpenMP runtime function.
/// \return Specified function.
@@ -609,7 +1216,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
// Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
// RequiresOMPRuntime);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
break;
@@ -617,7 +1224,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
case OMPRTL_NVPTX__kmpc_kernel_deinit: {
// Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
llvm::Type *TypeParams[] = {CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
break;
@@ -626,44 +1233,40 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
// Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
// Build void __kmpc_spmd_kernel_deinit();
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
/// Build void __kmpc_kernel_prepare_parallel(
- /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t
- /// IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrTy,
- CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty,
- CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_parallel: {
- /// Build bool __kmpc_kernel_parallel(void **outlined_function, void
- /// ***args, int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy,
- CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty};
+ /// Build bool __kmpc_kernel_parallel(void **outlined_function,
+ /// int16_t IsOMPRuntimeInitialized);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
/// Build void __kmpc_kernel_end_parallel();
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
break;
@@ -672,7 +1275,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
break;
@@ -681,7 +1284,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
@@ -690,7 +1293,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
// Build int32_t __kmpc_shuffle_int32(int32_t element,
// int16_t lane_offset, int16_t warp_size);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
break;
@@ -699,7 +1302,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
// Build int64_t __kmpc_shuffle_int64(int64_t element,
// int16_t lane_offset, int16_t warp_size);
llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
break;
@@ -725,12 +1328,39 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
CGM.VoidPtrTy,
ShuffleReduceFnTy->getPointerTo(),
InterWarpCopyFnTy->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
break;
}
+ case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: {
+ // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid,
+ // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t Algorithm Version),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait");
+ break;
+ }
case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
// Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
// int32_t num_vars, size_t reduce_size, void *reduce_data,
@@ -768,7 +1398,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
InterWarpCopyFnTy->getPointerTo(),
CopyToScratchpadFnTy->getPointerTo(),
LoadReduceFnTy->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
@@ -777,32 +1407,103 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
// Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
llvm::Type *TypeParams[] = {CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
break;
}
+ case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
+ /// Build void __kmpc_data_sharing_init_stack();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
+ /// Build void __kmpc_data_sharing_init_stack_spmd();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
+ // Build void *__kmpc_data_sharing_push_stack(size_t size,
+ // int16_t UseSharedMemory);
+ llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
+ // Build void __kmpc_data_sharing_pop_stack(void *a);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy,
+ /*Name=*/"__kmpc_data_sharing_pop_stack");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
+ /// Build void __kmpc_begin_sharing_variables(void ***args,
+ /// size_t n_args);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
+ /// Build void __kmpc_end_sharing_variables();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_get_shared_variables: {
+ /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_parallel_level: {
+ // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
+ // Build int8_t __kmpc_is_spmd_exec_mode();
+ auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
+ break;
+ }
}
return RTLFn;
}
void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
llvm::Constant *Addr,
- uint64_t Size, int32_t) {
- auto *F = dyn_cast<llvm::Function>(Addr);
+ uint64_t Size, int32_t,
+ llvm::GlobalValue::LinkageTypes) {
// TODO: Add support for global variables on the device after declare target
// support.
- if (!F)
+ if (!isa<llvm::Function>(Addr))
return;
- llvm::Module *M = F->getParent();
- llvm::LLVMContext &Ctx = M->getContext();
+ llvm::Module &M = CGM.getModule();
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
// Get "nvvm.annotations" metadata node
- llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+ llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
llvm::Metadata *MDVals[] = {
- llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"),
+ llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
// Append metadata to nvvm.annotations
@@ -818,27 +1519,19 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
- CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
- getExecutionModeForDirective(CGM, D);
- switch (Mode) {
- case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
- emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
- CodeGen);
- break;
- case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd:
- emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
+ if (Mode)
+ emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
- break;
- case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown:
- llvm_unreachable(
- "Unknown programming model for OpenMP directive on NVPTX target.");
- }
+ else
+ emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) {
+ : CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
}
@@ -846,9 +1539,8 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
OpenMPProcBindClauseKind ProcBind,
SourceLocation Loc) {
- // Do nothing in case of Spmd mode and L0 parallel.
- // TODO: If in Spmd mode and L1 parallel emit the clause.
- if (isInSpmdExecutionMode())
+ // Do nothing in case of SPMD mode and L0 parallel.
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
@@ -857,9 +1549,8 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
- // Do nothing in case of Spmd mode and L0 parallel.
- // TODO: If in Spmd mode and L1 parallel emit the clause.
- if (isInSpmdExecutionMode())
+ // Do nothing in case of SPMD mode and L0 parallel.
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
@@ -873,13 +1564,33 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ bool &IsInParallelRegion;
+ bool PrevIsInParallelRegion;
- auto *OutlinedFun = cast<llvm::Function>(
- CGOpenMPRuntime::emitParallelOutlinedFunction(
+ public:
+ NVPTXPrePostActionTy(bool &IsInParallelRegion)
+ : IsInParallelRegion(IsInParallelRegion) {}
+ void Enter(CodeGenFunction &CGF) override {
+ PrevIsInParallelRegion = IsInParallelRegion;
+ IsInParallelRegion = true;
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ IsInParallelRegion = PrevIsInParallelRegion;
+ }
+ } Action(IsInParallelRegion);
+ CodeGen.setAction(Action);
+ bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
+ IsInTargetMasterThreadRegion = false;
+ auto *OutlinedFun =
+ cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
- if (!isInSpmdExecutionMode()) {
+ IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+ if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
+ !IsInParallelRegion) {
llvm::Function *WrapperFun =
- createDataSharingWrapper(OutlinedFun, D);
+ createParallelDataSharingWrapper(OutlinedFun, D);
WrapperFunctionsMap[OutlinedFun] = WrapperFun;
}
@@ -889,7 +1600,24 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ SourceLocation Loc = D.getLocStart();
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ SourceLocation &Loc;
+
+ public:
+ NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ void Enter(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsProlog(CGF, Loc);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ } Action(Loc);
+ CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
@@ -900,6 +1628,119 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
return OutlinedFun;
}
+void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ return;
+
+ CGBuilderTy &Bld = CGF.Builder;
+
+ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I == FunctionGlobalizedDecls.end())
+ return;
+ if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
+ QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ GlobalRecordSizeArg);
+ llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ LValue Base =
+ CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+
+ // Emit the "global alloca" which is a GEP from the global declaration
+ // record using the pointer returned by the runtime.
+ for (auto &Rec : I->getSecond().LocalVarData) {
+ bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
+ llvm::Value *ParValue;
+ if (EscapedParam) {
+ const auto *VD = cast<VarDecl>(Rec.first);
+ LValue ParLVal =
+ CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+ ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
+ }
+ const FieldDecl *FD = Rec.second.first;
+ LValue VarAddr = CGF.EmitLValueForField(Base, FD);
+ Rec.second.second = VarAddr.getAddress();
+ if (EscapedParam) {
+ const auto *VD = cast<VarDecl>(Rec.first);
+ CGF.EmitStoreOfScalar(ParValue, VarAddr);
+ I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
+ }
+ }
+ }
+ for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::Value *Size = CGF.getTypeSize(VD->getType());
+ CharUnits Align = CGM.getContext().getDeclAlign(VD);
+ Size = Bld.CreateNUWAdd(
+ Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
+ llvm::Value *AlignVal =
+ llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
+ Size = Bld.CreateUDiv(Size, AlignVal);
+ Size = Bld.CreateNUWMul(Size, AlignVal);
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ GlobalRecordSizeArg);
+ llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
+ LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(),
+ CGM.getContext().getDeclAlign(VD),
+ AlignmentSource::Decl);
+ I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
+ Base.getAddress());
+ I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
+ }
+ I->getSecond().MappedParams->apply(CGF);
+}
+
+void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ return;
+
+ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I != FunctionGlobalizedDecls.end()) {
+ I->getSecond().MappedParams->restore(CGF);
+ if (!CGF.HaveInsertPoint())
+ return;
+ for (llvm::Value *Addr :
+ llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ Addr);
+ }
+ if (I->getSecond().GlobalRecordAddr) {
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ I->getSecond().GlobalRecordAddr);
+ }
+ }
+}
+
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
@@ -908,12 +1749,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
- Address ZeroAddr =
- CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ /*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -925,66 +1766,102 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall(
if (!CGF.HaveInsertPoint())
return;
- if (isInSpmdExecutionMode())
- emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
+ emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
else
- emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+ emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
}
-void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
+void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
- llvm::Function *WFn = WrapperFunctionsMap[Fn];
- assert(WFn && "Wrapper function does not exist!");
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF,
- PrePostActionTy &) {
- CGBuilderTy &Bld = CGF.Builder;
+ Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/32, /*Signed=*/1),
+ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ // ThreadId for serialized parallels is 0.
+ Address ThreadIDAddr = ZeroAddr;
+ auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+
+ llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+ emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
+ };
+ auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+
+ RegionCodeGenTy RCG(CodeGen);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *Args[] = {RTLoc, ThreadID};
+ NVPTXActionTy Action(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+ Args,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+ Args);
+ RCG.setAction(Action);
+ RCG(CGF);
+ };
+
+ auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::Function *WFn = WrapperFunctionsMap[Fn];
+ assert(WFn && "Wrapper function does not exist!");
llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
+ // Prepare for parallel region. Indicate the outlined function.
+ llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+ Args);
+
+ // Create a private scope that will globalize the arguments
+ // passed from the outside of the target region.
+ CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
+
+ // There's somehting to share.
if (!CapturedVars.empty()) {
- // There's somehting to share, add the attribute
- CGF.CurFn->addFnAttr("has-nvptx-shared-depot");
// Prepare for parallel region. Indicate the outlined function.
Address SharedArgs =
- CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
- "shared_args");
+ CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs");
llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
- // TODO: Optimize runtime initialization and pass in correct value.
- llvm::Value *Args[] = {ID, SharedArgsPtr,
- Bld.getInt32(CapturedVars.size()),
- /*RequiresOMPRuntime=*/Bld.getInt16(1)};
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
- Args);
+ llvm::Value *DataSharingArgs[] = {
+ SharedArgsPtr,
+ llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_begin_sharing_variables),
+ DataSharingArgs);
+ // Store variable address in a list of references to pass to workers.
unsigned Idx = 0;
ASTContext &Ctx = CGF.getContext();
+ Address SharedArgListAddress = CGF.EmitLoadOfPointer(
+ SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy))
+ .castAs<PointerType>());
for (llvm::Value *V : CapturedVars) {
- Address Dst = Bld.CreateConstInBoundsGEP(
- CGF.EmitLoadOfPointer(SharedArgs,
- Ctx.getPointerType(
- Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>()),
- Idx, CGF.getPointerSize());
- llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy);
+ Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx,
+ CGF.getPointerSize());
+ llvm::Value *PtrV;
+ if (V->getType()->isIntegerTy())
+ PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
+ else
+ PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
- Ctx.getPointerType(Ctx.VoidPtrTy));
- Idx++;
+ Ctx.getPointerType(Ctx.VoidPtrTy));
+ ++Idx;
}
- } else {
- // TODO: Optimize runtime initialization and pass in correct value.
- llvm::Value *Args[] = {
- ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)),
- /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)};
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
- Args);
}
// Activate workers. This barrier is used by the master to signal
@@ -999,96 +1876,332 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
// The master waits at this barrier until all workers are done.
syncCTAThreads(CGF);
+ if (!CapturedVars.empty())
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
+
// Remember for post-processing in worker loop.
Work.emplace_back(WFn);
};
- auto *RTLoc = emitUpdateLocation(CGF, Loc);
- auto *ThreadID = getThreadID(CGF, Loc);
- llvm::Value *Args[] = {RTLoc, ThreadID};
-
- auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
- PrePostActionTy &) {
- auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
- Action.Enter(CGF);
-
- llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
- emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
- };
-
+ auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen,
+ &ThreadIDAddr](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
RegionCodeGenTy RCG(CodeGen);
- NVPTXActionTy Action(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
- Args,
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
- Args);
- RCG.setAction(Action);
- RCG(CGF);
+ if (IsInParallelRegion) {
+ SeqGen(CGF, Action);
+ } else if (IsInTargetMasterThreadRegion) {
+ L0ParallelGen(CGF, Action);
+ } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) {
+ RCG(CGF);
+ } else {
+ // Check for master and then parallelism:
+ // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
+ // Serialized execution.
+ // } else if (master) {
+ // Worker call.
+ // } else {
+ // Outlined function call.
+ // }
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
+ llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
+ llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+ llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+ Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ParallelCheckBB);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *PL = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ {RTLoc, ThreadID});
+ llvm::Value *Res = Bld.CreateIsNotNull(PL);
+ Bld.CreateCondBr(Res, SeqBB, MasterCheckBB);
+ CGF.EmitBlock(SeqBB);
+ SeqGen(CGF, Action);
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(MasterCheckBB);
+ llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then");
+ llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
+ llvm::Value *IsMaster =
+ Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+ Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock);
+ CGF.EmitBlock(MasterThenBB);
+ L0ParallelGen(CGF, Action);
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ElseBlock);
+ // In the worker need to use the real thread id.
+ ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+ RCG(CGF);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ // Emit the continuation block for code after the if.
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+ }
};
- if (IfCond)
- emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen);
- else {
+ if (IfCond) {
+ emitOMPIfClause(CGF, IfCond, LNParallelGen, SeqGen);
+ } else {
CodeGenFunction::RunCleanupsScope Scope(CGF);
- RegionCodeGenTy ThenRCG(L0ParallelGen);
+ RegionCodeGenTy ThenRCG(LNParallelGen);
ThenRCG(CGF);
}
}
-void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
+void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
// Just call the outlined function to execute the parallel region.
// OutlinedFn(&GTid, &zero, CapturedStruct);
//
- // TODO: Do something with IfCond when support for the 'if' clause
- // is added on Spmd target directives.
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
- emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+
+ Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/32, /*Signed=*/1),
+ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ // ThreadId for serialized parallels is 0.
+ Address ThreadIDAddr = ZeroAddr;
+ auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
+ &ThreadIDAddr](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+
+ llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+ emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+ };
+ auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+
+ RegionCodeGenTy RCG(CodeGen);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *Args[] = {RTLoc, ThreadID};
+
+ NVPTXActionTy Action(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+ Args,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+ Args);
+ RCG.setAction(Action);
+ RCG(CGF);
+ };
+
+ if (IsInTargetMasterThreadRegion) {
+ // In the worker need to use the real thread id.
+ ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+ RegionCodeGenTy RCG(CodeGen);
+ RCG(CGF);
+ } else {
+ // If we are not in the target region, it is definitely L2 parallelism or
+ // more, because for SPMD mode we always has L1 parallel level, sowe don't
+ // need to check for orphaned directives.
+ RegionCodeGenTy RCG(SeqGen);
+ RCG(CGF);
+ }
+}
+
+void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
+ CodeGenFunction &CGF, StringRef CriticalName,
+ const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
+ const Expr *Hint) {
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
+ llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
+ llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
+
+ // Fetch team-local id of the thread.
+ llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+
+ // Get the width of the team.
+ llvm::Value *TeamWidth = getNVPTXNumThreads(CGF);
+
+ // Initialize the counter variable for the loop.
+ QualType Int32Ty =
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
+ Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
+ LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
+ CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
+ /*isInit=*/true);
+
+ // Block checks if loop counter exceeds upper bound.
+ CGF.EmitBlock(LoopBB);
+ llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
+ llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
+ CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
+
+ // Block tests which single thread should execute region, and which threads
+ // should go straight to synchronisation point.
+ CGF.EmitBlock(TestBB);
+ CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
+ llvm::Value *CmpThreadToCounter =
+ CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
+ CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
+
+ // Block emits the body of the critical region.
+ CGF.EmitBlock(BodyBB);
+
+ // Output the critical statement.
+ CriticalOpGen(CGF);
+
+ // After the body surrounded by the critical region, the single executing
+ // thread will jump to the synchronisation point.
+ // Block waits for all threads in current team to finish then increments the
+ // counter variable and returns to the loop.
+ CGF.EmitBlock(SyncBB);
+ getNVPTXCTABarrier(CGF);
+
+ llvm::Value *IncCounterVal =
+ CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
+ CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
+ CGF.EmitBranch(LoopBB);
+
+ // Block that is reached when all threads in the team complete the region.
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+/// Cast value to the specified type.
+static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
+ QualType ValTy, QualType CastTy,
+ SourceLocation Loc) {
+ assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
+ "Cast type must sized.");
+ assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
+ "Val type must sized.");
+ llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
+ if (ValTy == CastTy)
+ return Val;
+ if (CGF.getContext().getTypeSizeInChars(ValTy) ==
+ CGF.getContext().getTypeSizeInChars(CastTy))
+ return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
+ if (CastTy->isIntegerType() && ValTy->isIntegerType())
+ return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
+ CastTy->hasSignedIntegerRepresentation());
+ Address CastItem = CGF.CreateMemTemp(CastTy);
+ Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+ CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
+ return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
}
/// This function creates calls to one of two shuffle functions to copy
/// variables between lanes in a warp.
static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
- QualType ElemTy,
llvm::Value *Elem,
- llvm::Value *Offset) {
- auto &CGM = CGF.CGM;
- auto &C = CGM.getContext();
- auto &Bld = CGF.Builder;
+ QualType ElemType,
+ llvm::Value *Offset,
+ SourceLocation Loc) {
+ CodeGenModule &CGM = CGF.CGM;
+ CGBuilderTy &Bld = CGF.Builder;
CGOpenMPRuntimeNVPTX &RT =
*(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
- unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
- assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
+ CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
+ assert(Size.getQuantity() <= 8 &&
+ "Unsupported bitwidth in shuffle instruction.");
- OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
+ OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
? OMPRTL_NVPTX__kmpc_shuffle_int32
: OMPRTL_NVPTX__kmpc_shuffle_int64;
// Cast all types to 32- or 64-bit values before calling shuffle routines.
- auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
- auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
- auto *WarpSize = CGF.EmitScalarConversion(
- getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
- C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+ QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
+ Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1);
+ llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc);
+ llvm::Value *WarpSize =
+ Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
- auto *ShuffledVal =
- CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
- {ElemCast, Offset, WarpSize});
+ llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
+ RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
- return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+ return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
+}
+
+static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
+ Address DestAddr, QualType ElemType,
+ llvm::Value *Offset, SourceLocation Loc) {
+ CGBuilderTy &Bld = CGF.Builder;
+
+ CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
+ // Create the loop over the big sized data.
+ // ptr = (void*)Elem;
+ // ptrEnd = (void*) Elem + 1;
+ // Step = 8;
+ // while (ptr + Step < ptrEnd)
+ // shuffle((int64_t)*ptr);
+ // Step = 4;
+ // while (ptr + Step < ptrEnd)
+ // shuffle((int32_t)*ptr);
+ // ...
+ Address ElemPtr = DestAddr;
+ Address Ptr = SrcAddr;
+ Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ Bld.CreateConstGEP(SrcAddr, 1, Size), CGF.VoidPtrTy);
+ for (int IntSize = 8; IntSize >= 1; IntSize /= 2) {
+ if (Size < CharUnits::fromQuantity(IntSize))
+ continue;
+ QualType IntType = CGF.getContext().getIntTypeForBitwidth(
+ CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)),
+ /*Signed=*/1);
+ llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType);
+ Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo());
+ ElemPtr =
+ Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo());
+ if (Size.getQuantity() / IntSize > 1) {
+ llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond");
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit");
+ llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
+ CGF.EmitBlock(PreCondBB);
+ llvm::PHINode *PhiSrc =
+ Bld.CreatePHI(Ptr.getType(), /*NumReservedValues=*/2);
+ PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB);
+ llvm::PHINode *PhiDest =
+ Bld.CreatePHI(ElemPtr.getType(), /*NumReservedValues=*/2);
+ PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB);
+ Ptr = Address(PhiSrc, Ptr.getAlignment());
+ ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
+ llvm::Value *PtrDiff = Bld.CreatePtrDiff(
+ PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
+ Ptr.getPointer(), CGF.VoidPtrTy));
+ Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
+ ThenBB, ExitBB);
+ CGF.EmitBlock(ThenBB);
+ llvm::Value *Res = createRuntimeShuffleFunction(
+ CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+ IntType, Offset, Loc);
+ CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+ Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize));
+ ElemPtr =
+ Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize));
+ PhiSrc->addIncoming(Ptr.getPointer(), ThenBB);
+ PhiDest->addIncoming(ElemPtr.getPointer(), ThenBB);
+ CGF.EmitBranch(PreCondBB);
+ CGF.EmitBlock(ExitBB);
+ } else {
+ llvm::Value *Res = createRuntimeShuffleFunction(
+ CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+ IntType, Offset, Loc);
+ CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+ Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize));
+ ElemPtr =
+ Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize));
+ }
+ Size = Size % IntSize;
+ }
}
namespace {
@@ -1119,19 +2232,19 @@ static void emitReductionListCopy(
ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase,
CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
- auto &CGM = CGF.CGM;
- auto &C = CGM.getContext();
- auto &Bld = CGF.Builder;
+ CodeGenModule &CGM = CGF.CGM;
+ ASTContext &C = CGM.getContext();
+ CGBuilderTy &Bld = CGF.Builder;
- auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
- auto *ScratchpadIndex = CopyOptions.ScratchpadIndex;
- auto *ScratchpadWidth = CopyOptions.ScratchpadWidth;
+ llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
+ llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
+ llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
// Iterates, element-by-element, through the source Reduce list and
// make a copy.
unsigned Idx = 0;
unsigned Size = Privates.size();
- for (auto &Private : Privates) {
+ for (const Expr *Private : Privates) {
Address SrcElementAddr = Address::invalid();
Address DestElementAddr = Address::invalid();
Address DestElementPtrAddr = Address::invalid();
@@ -1150,10 +2263,9 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Create a temporary to store the element in the destination
// Reduce list.
@@ -1169,62 +2281,49 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element. The destination
// element has already been created on the thread's stack.
DestElementPtrAddr =
Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
- llvm::Value *DestElementPtr =
- CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation());
- Address DestElemAddr =
- Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = CGF.EmitLoadOfPointer(
+ DestElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
break;
}
case ThreadToScratchpad: {
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
- auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
- auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+ llvm::Value *CurrentOffset =
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
+ llvm::Value *ScratchPadElemAbsolutePtrVal =
+ Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
- Address ScratchpadPtr =
- Address(ScratchPadElemAbsolutePtrVal,
- C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
IncrScratchpadDest = true;
break;
}
case ScratchpadToThread: {
// Step 1.1: Get the address for the src element in the scratchpad.
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
- auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
- auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset);
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+ llvm::Value *CurrentOffset =
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
+ llvm::Value *ScratchPadElemAbsolutePtrVal =
+ Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
@@ -1246,21 +2345,30 @@ static void emitReductionListCopy(
// element as this is required in all directions
SrcElementAddr = Bld.CreateElementBitCast(
SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
- llvm::Value *Elem =
- CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
- Private->getType(), SourceLocation());
+ DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+ SrcElementAddr.getElementType());
// Now that all active lanes have read the element in the
// Reduce list, shuffle over the value from the remote lane.
if (ShuffleInElement) {
- Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
- RemoteLaneOffset);
+ shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
+ RemoteLaneOffset, Private->getExprLoc());
+ } else {
+ if (Private->getType()->isScalarType()) {
+ llvm::Value *Elem =
+ CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
+ Private->getType(), Private->getExprLoc());
+ // Store the source element value to the dest element address.
+ CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
+ Private->getType());
+ } else {
+ CGF.EmitAggregateCopy(
+ CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
+ CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
+ Private->getType(), AggValueSlot::DoesNotOverlap);
+ }
}
- // Store the source element value to the dest element address.
- CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
- Private->getType());
-
// Step 3.1: Modify reference in dest Reduce list as needed.
// Modifying the reference in Reduce list to point to the newly
// created element. The element is live in the current function
@@ -1279,22 +2387,20 @@ static void emitReductionListCopy(
if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
llvm::Value *ScratchpadBasePtr =
IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
- ScratchpadBasePtr = Bld.CreateAdd(
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
ScratchpadBasePtr,
- Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get(
- CGM.SizeTy, ElementSizeInChars)));
+ Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
// Take care of global memory alignment for performance
- ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateSDiv(
+ ScratchpadBasePtr = Bld.CreateNUWSub(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateUDiv(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
- ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateMul(
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateNUWMul(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
@@ -1304,7 +2410,7 @@ static void emitReductionListCopy(
SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
}
- Idx++;
+ ++Idx;
}
}
@@ -1319,27 +2425,31 @@ static void emitReductionListCopy(
/// local = local @ remote
/// else
/// local = remote
-static llvm::Value *
-emitReduceScratchpadFunction(CodeGenModule &CGM,
- ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy, llvm::Value *ReduceFn) {
- auto &C = CGM.getContext();
- auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+static llvm::Value *emitReduceScratchpadFunction(
+ CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
+ QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
// Destination of the copy.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// Base address of the scratchpad array, with each element storing a
// Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// A source index into the scratchpad array.
- ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
// Row width of an element in the scratchpad array, typically
// the number of teams.
- ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
// If should_reduce == 1, then it's load AND reduce,
// If should_reduce == 0 (or otherwise), then it only loads (+ copy).
// The latter case is used for initialization.
- ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ Int32Ty, ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
@@ -1348,47 +2458,44 @@ emitReduceScratchpadFunction(CodeGenModule &CGM,
Args.push_back(&WidthArg);
Args.push_back(&ShouldReduceArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_load_and_reduce", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// Get local Reduce list pointer.
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address ReduceListAddr(
Bld.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation()),
+ C.VoidPtrTy, Loc),
CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
CGF.getPointerAlign());
Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
- AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
- llvm::Value *IndexVal =
- Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
- Int32Ty, SourceLocation()),
- CGM.SizeTy, /*isSigned=*/true);
+ llvm::Value *IndexVal = Bld.CreateIntCast(
+ CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
+ CGM.SizeTy, /*isSigned=*/true);
Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
- llvm::Value *WidthVal =
- Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
- Int32Ty, SourceLocation()),
- CGM.SizeTy, /*isSigned=*/true);
+ llvm::Value *WidthVal = Bld.CreateIntCast(
+ CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc),
+ CGM.SizeTy, /*isSigned=*/true);
Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg);
llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar(
- AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation());
+ AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc);
// The absolute ptr address to the base addr of the next element to copy.
llvm::Value *CumulativeElemBasePtr =
@@ -1411,7 +2518,7 @@ emitReduceScratchpadFunction(CodeGenModule &CGM,
llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
- auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1));
+ llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal);
Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
CGF.EmitBlock(ThenBB);
@@ -1421,7 +2528,8 @@ emitReduceScratchpadFunction(CodeGenModule &CGM,
ReduceListAddr.getPointer(), CGF.VoidPtrTy);
llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
RemoteReduceList.getPointer(), CGF.VoidPtrTy);
- CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr});
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+ CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr});
Bld.CreateBr(MergeBB);
CGF.EmitBlock(ElseBB);
@@ -1445,22 +2553,27 @@ emitReduceScratchpadFunction(CodeGenModule &CGM,
///
static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy) {
+ QualType ReductionArrayTy,
+ SourceLocation Loc) {
- auto &C = CGM.getContext();
- auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+ ASTContext &C = CGM.getContext();
+ QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
// Source of the copy.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// Base address of the scratchpad array, with each element storing a
// Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// A destination index into the scratchpad array, typically the team
// identifier.
- ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
// Row width of an element in the scratchpad array, typically
// the number of teams.
- ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+ ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+ ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
@@ -1468,36 +2581,34 @@ static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
Args.push_back(&IndexArg);
Args.push_back(&WidthArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_copy_to_scratchpad", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address SrcDataAddr(
Bld.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation()),
+ C.VoidPtrTy, Loc),
CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
CGF.getPointerAlign());
Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
- AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
- llvm::Value *IndexVal =
- Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
- Int32Ty, SourceLocation()),
- CGF.SizeTy, /*isSigned=*/true);
+ llvm::Value *IndexVal = Bld.CreateIntCast(
+ CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
+ CGF.SizeTy, /*isSigned=*/true);
Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
llvm::Value *WidthVal =
@@ -1534,35 +2645,36 @@ static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
/// sync
static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy) {
- auto &C = CGM.getContext();
- auto &M = CGM.getModule();
+ QualType ReductionArrayTy,
+ SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
+ llvm::Module &M = CGM.getModule();
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
// aggregated values reside in the first lane of every active warp.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// NumWarps: number of warps active in the parallel region. This could
// be smaller than 32 (max warps in a CTA) for partial block reduction.
- ImplicitParamDecl NumWarpsArg(C,
+ ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getIntTypeForBitwidth(32, /* Signed */ true),
ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
Args.push_back(&NumWarpsArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_inter_warp_copy_func", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// This array is used as a medium to transfer, one reduce element at a time,
// the data from the first lane of every warp to lanes in the first warp
@@ -1571,7 +2683,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
// for reduced latency, as well as to have a distinct copy for concurrently
// executing target regions. The array is declared with common linkage so
// as to be shared across compilation units.
- const char *TransferMediumName =
+ StringRef TransferMediumName =
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
@@ -1584,14 +2696,15 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
llvm::Constant::getNullValue(Ty), TransferMediumName,
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
SharedAddressSpace);
+ CGM.addCompilerUsedGlobal(TransferMedium);
}
// Get the CUDA thread id of the current OpenMP thread on the GPU.
- auto *ThreadID = getNVPTXThreadID(CGF);
+ llvm::Value *ThreadID = getNVPTXThreadID(CGF);
// nvptx_lane_id = nvptx_id % warpsize
- auto *LaneID = getNVPTXLaneID(CGF);
+ llvm::Value *LaneID = getNVPTXLaneID(CGF);
// nvptx_warp_id = nvptx_id / warpsize
- auto *WarpID = getNVPTXWarpID(CGF);
+ llvm::Value *WarpID = getNVPTXWarpID(CGF);
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address LocalReduceList(
@@ -1602,7 +2715,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
CGF.getPointerAlign());
unsigned Idx = 0;
- for (auto &Private : Privates) {
+ for (const Expr *Private : Privates) {
//
// Warp master copies reduce element to transfer medium in __shared__
// memory.
@@ -1612,8 +2725,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
// if (lane_id == 0)
- auto IsWarpMaster =
- Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master");
+ llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
CGF.EmitBlock(ThenBB);
@@ -1627,9 +2739,6 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
ElemPtr = Bld.CreateElementBitCast(
ElemPtr, CGF.ConvertTypeForMem(Private->getType()));
- // elem = *elemptr
- llvm::Value *Elem = CGF.EmitLoadOfScalar(
- ElemPtr, /*Volatile=*/false, Private->getType(), SourceLocation());
// Get pointer to location in transfer medium.
// MediumPtr = &medium[warp_id]
@@ -1641,8 +2750,19 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
MediumPtr = Bld.CreateElementBitCast(
MediumPtr, CGF.ConvertTypeForMem(Private->getType()));
+ // elem = *elemptr
//*MediumPtr = elem
- Bld.CreateStore(Elem, MediumPtr);
+ if (Private->getType()->isScalarType()) {
+ llvm::Value *Elem = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
+ Private->getType(), Loc);
+ // Store the source element value to the dest element address.
+ CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/false,
+ Private->getType());
+ } else {
+ CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+ CGF.MakeAddrLValue(MediumPtr, Private->getType()),
+ Private->getType(), AggValueSlot::DoesNotOverlap);
+ }
Bld.CreateBr(MergeBB);
@@ -1655,7 +2775,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation());
- auto *NumActiveThreads = Bld.CreateNSWMul(
+ llvm::Value *NumActiveThreads = Bld.CreateNSWMul(
NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
// named_barrier_sync(ParallelBarrierID, num_active_threads)
syncParallelThreads(CGF, NumActiveThreads);
@@ -1668,7 +2788,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
// Up to 32 threads in warp 0 are active.
- auto IsActiveThread =
+ llvm::Value *IsActiveThread =
Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
@@ -1682,8 +2802,6 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
// SrcMediumVal = *SrcMediumPtr;
SrcMediumPtr = Bld.CreateElementBitCast(
SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType()));
- llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar(
- SrcMediumPtr, /*Volatile=*/false, Private->getType(), SourceLocation());
// TargetElemPtr = (type[i]*)(SrcDataAddr[i])
Address TargetElemPtrPtr =
@@ -1696,8 +2814,17 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
TargetElemPtr, CGF.ConvertTypeForMem(Private->getType()));
// *TargetElemPtr = SrcMediumVal;
- CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
- Private->getType());
+ if (Private->getType()->isScalarType()) {
+ llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar(
+ SrcMediumPtr, /*Volatile=*/false, Private->getType(), Loc);
+ CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
+ Private->getType());
+ } else {
+ CGF.EmitAggregateCopy(
+ CGF.MakeAddrLValue(SrcMediumPtr, Private->getType()),
+ CGF.MakeAddrLValue(TargetElemPtr, Private->getType()),
+ Private->getType(), AggValueSlot::DoesNotOverlap);
+ }
Bld.CreateBr(W0MergeBB);
CGF.EmitBlock(W0ElseBB);
@@ -1708,7 +2835,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
// While warp 0 copies values from transfer medium, all other warps must
// wait.
syncParallelThreads(CGF, NumActiveThreads);
- Idx++;
+ ++Idx;
}
CGF.FinishFunction();
@@ -1781,39 +2908,40 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
/// (2k+1)th thread is ignored in the value aggregation. Therefore
/// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
/// that the contiguity assumption still holds.
-static llvm::Value *
-emitShuffleAndReduceFunction(CodeGenModule &CGM,
- ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy, llvm::Value *ReduceFn) {
- auto &C = CGM.getContext();
+static llvm::Value *emitShuffleAndReduceFunction(
+ CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
// Thread local Reduce list used to host the values of data to be reduced.
- ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
// Current lane id; could be logical.
- ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
+ ImplicitParamDecl::Other);
// Offset of the remote source lane relative to the current lane.
- ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.ShortTy, ImplicitParamDecl::Other);
// Algorithm version. This is expected to be known at compile time.
- ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.ShortTy, ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
Args.push_back(&LaneIDArg);
Args.push_back(&RemoteLaneOffsetArg);
Args.push_back(&AlgoVerArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
- // We don't need debug information in this function as nothing here refers to
- // user code.
- CGF.disableDebugInfo();
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address LocalReduceList(
@@ -1870,21 +2998,19 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM,
// When AlgoVer==2, the third conjunction has only the second part to be
// evaluated during runtime. Other conjunctions evaluates to false
// during compile time.
- auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0));
+ llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
- auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
- auto CondAlgo1 = Bld.CreateAnd(
+ llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+ llvm::Value *CondAlgo1 = Bld.CreateAnd(
Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
- auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
- auto CondAlgo2 = Bld.CreateAnd(
- Algo2,
- Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)),
- Bld.getInt16(0)));
+ llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
+ llvm::Value *CondAlgo2 = Bld.CreateAnd(
+ Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
CondAlgo2 = Bld.CreateAnd(
CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
- auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
+ llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
@@ -1898,7 +3024,8 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM,
LocalReduceList.getPointer(), CGF.VoidPtrTy);
llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
RemoteReduceList.getPointer(), CGF.VoidPtrTy);
- CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+ CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
Bld.CreateBr(MergeBB);
CGF.EmitBlock(ElseBB);
@@ -1909,7 +3036,7 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM,
// if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
// Reduce list.
Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
- auto CondCopy = Bld.CreateAnd(
+ llvm::Value *CondCopy = Bld.CreateAnd(
Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
@@ -2182,16 +3309,22 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
- // FIXME: Add support for simd reduction.
- assert((TeamsReduction || ParallelReduction) &&
+ bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind);
+ assert((TeamsReduction || ParallelReduction || SimdReduction) &&
"Invalid reduction selection in emitReduction.");
- auto &C = CGM.getContext();
+ if (Options.SimpleReduction) {
+ CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
+ ReductionOps, Options);
+ return;
+ }
+
+ ASTContext &C = CGM.getContext();
// 1. Build a list of reduction variables.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
auto Size = RHSExprs.size();
- for (auto *E : Privates) {
+ for (const Expr *E : Privates) {
if (E->getType()->isVariablyModifiedType())
// Reserve place for array size.
++Size;
@@ -2219,7 +3352,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
llvm::Value *Size = CGF.Builder.CreateIntCast(
CGF.getVLASize(
CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
- .first,
+ .NumElts,
CGF.SizeTy, /*isSigned=*/false);
CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
Elem);
@@ -2227,41 +3360,44 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
}
// 2. Emit reduce_func().
- auto *ReductionFn = emitReductionFunction(
- CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
- LHSExprs, RHSExprs, ReductionOps);
+ llvm::Value *ReductionFn = emitReductionFunction(
+ CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+ Privates, LHSExprs, RHSExprs, ReductionOps);
// 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
// RedList, shuffle_reduce_func, interwarp_copy_func);
- auto *ThreadId = getThreadID(CGF, Loc);
- auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
- auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
+ llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
ReductionList.getPointer(), CGF.VoidPtrTy);
- auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
- CGM, Privates, ReductionArrayTy, ReductionFn);
- auto *InterWarpCopyFn =
- emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy);
+ llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
+ CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
+ llvm::Value *InterWarpCopyFn =
+ emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
- llvm::Value *Res = nullptr;
- if (ParallelReduction) {
- llvm::Value *Args[] = {ThreadId,
- CGF.Builder.getInt32(RHSExprs.size()),
- ReductionArrayTySize,
- RL,
- ShuffleAndReduceFn,
- InterWarpCopyFn};
+ llvm::Value *Args[] = {ThreadId,
+ CGF.Builder.getInt32(RHSExprs.size()),
+ ReductionArrayTySize,
+ RL,
+ ShuffleAndReduceFn,
+ InterWarpCopyFn};
+ llvm::Value *Res = nullptr;
+ if (ParallelReduction)
Res = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
Args);
- }
+ else if (SimdReduction)
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait),
+ Args);
if (TeamsReduction) {
- auto *ScratchPadCopyFn =
- emitCopyToScratchpad(CGM, Privates, ReductionArrayTy);
- auto *LoadAndReduceFn = emitReduceScratchpadFunction(
- CGM, Privates, ReductionArrayTy, ReductionFn);
+ llvm::Value *ScratchPadCopyFn =
+ emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc);
+ llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction(
+ CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
llvm::Value *Args[] = {ThreadId,
CGF.Builder.getInt32(RHSExprs.size()),
@@ -2277,25 +3413,26 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
}
// 5. Build switch(res)
- auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
- auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
+ llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+ llvm::SwitchInst *SwInst =
+ CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
// 6. Build case 1: where we have reduced values in the master
// thread in each team.
// __kmpc_end_reduce{_nowait}(<gtid>);
// break;
- auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+ llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
CGF.EmitBlock(Case1BB);
// Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
llvm::Value *EndArgs[] = {ThreadId};
- auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps,
+ auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
this](CodeGenFunction &CGF, PrePostActionTy &Action) {
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
- for (auto *E : ReductionOps) {
+ for (const Expr *E : ReductionOps) {
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
cast<DeclRefExpr>(*IRHS));
++IPriv;
@@ -2334,11 +3471,10 @@ CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD,
enum { NVPTX_local_addr = 5 };
QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
ArgType = QC.apply(CGM.getContext(), ArgType);
- if (isa<ImplicitParamDecl>(NativeParam)) {
+ if (isa<ImplicitParamDecl>(NativeParam))
return ImplicitParamDecl::Create(
CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
- }
return ParmVarDecl::Create(
CGM.getContext(),
const_cast<DeclContext *>(NativeParam->getDeclContext()),
@@ -2397,8 +3533,8 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
continue;
}
llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo(
- /*AddrSpace=*/0));
+ NativeArg,
+ NativeArg->getType()->getPointerElementType()->getPointerTo());
TargetArgs.emplace_back(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
}
@@ -2409,10 +3545,10 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
/// and controls the arguments which are passed to this function.
/// The wrapper ensures that the outlined function is called
/// with the correct arguments when data is shared.
-llvm::Function *CGOpenMPRuntimeNVPTX::createDataSharingWrapper(
+llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
ASTContext &Ctx = CGM.getContext();
- const auto &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+ const auto &CS = *D.getCapturedStmt(OMPD_parallel);
// Create a function that takes as argument the source thread.
FunctionArgList WrapperArgs;
@@ -2420,76 +3556,200 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createDataSharingWrapper(
Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
QualType Int32QTy =
Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
- QualType Int32PtrQTy = Ctx.getPointerType(Int32QTy);
- QualType VoidPtrPtrQTy = Ctx.getPointerType(Ctx.VoidPtrTy);
- ImplicitParamDecl ParallelLevelArg(Ctx, Int16QTy, ImplicitParamDecl::Other);
- ImplicitParamDecl WrapperArg(Ctx, Int32QTy, ImplicitParamDecl::Other);
- ImplicitParamDecl SharedArgsList(Ctx, VoidPtrPtrQTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ /*Id=*/nullptr, Int16QTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ /*Id=*/nullptr, Int32QTy,
+ ImplicitParamDecl::Other);
WrapperArgs.emplace_back(&ParallelLevelArg);
WrapperArgs.emplace_back(&WrapperArg);
- WrapperArgs.emplace_back(&SharedArgsList);
- auto &CGFI =
+ const CGFunctionInfo &CGFI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+ Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+ Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
- CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs);
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
+ D.getLocStart(), D.getLocStart());
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ /*Name*/ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
- // TODO: suppport SIMD and pass actual values
- Args.emplace_back(llvm::ConstantPointerNull::get(
- CGM.Int32Ty->getPointerTo()));
- Args.emplace_back(llvm::ConstantPointerNull::get(
- CGM.Int32Ty->getPointerTo()));
+ Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
+ Args.emplace_back(ZeroAddr.getPointer());
CGBuilderTy &Bld = CGF.Builder;
auto CI = CS.capture_begin();
- // Load the start of the array
- auto SharedArgs =
- CGF.EmitLoadOfPointer(CGF.GetAddrOfLocalVar(&SharedArgsList),
- VoidPtrPtrQTy->castAs<PointerType>());
-
- // For each captured variable
- for (unsigned I = 0; I < CS.capture_size(); ++I, ++CI, ++CurField) {
- // Name of captured variable
- StringRef Name;
- if (CI->capturesThis())
- Name = "this";
- else
- Name = CI->getCapturedVar()->getName();
-
- // We retrieve the CLANG type of the argument. We use it to create
- // an alloca which will give us the LLVM type.
- QualType ElemTy = CurField->getType();
- // If this is a capture by copy the element type has to be the pointer to
- // the data.
- if (CI->capturesVariableByCopy())
- ElemTy = Ctx.getPointerType(ElemTy);
-
- // Get shared address of the captured variable.
- Address ArgAddress = Bld.CreateConstInBoundsGEP(
- SharedArgs, I, CGF.getPointerSize());
- Address TypedArgAddress = Bld.CreateBitCast(
- ArgAddress, CGF.ConvertTypeForMem(Ctx.getPointerType(ElemTy)));
- llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedArgAddress,
- /*Volatile=*/false, Int32PtrQTy, SourceLocation());
- Args.emplace_back(Arg);
- }
-
- emitCall(CGF, OutlinedParallelFn, Args);
+ // Use global memory for data sharing.
+ // Handle passing of global args to workers.
+ Address GlobalArgs =
+ CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
+ llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
+ llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
+ DataSharingArgs);
+
+ // Retrieve the shared variables from the list of references returned
+ // by the runtime. Pass the variables to the outlined function.
+ Address SharedArgListAddress = Address::invalid();
+ if (CS.capture_size() > 0 ||
+ isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
+ SharedArgListAddress = CGF.EmitLoadOfPointer(
+ GlobalArgs, CGF.getContext()
+ .getPointerType(CGF.getContext().getPointerType(
+ CGF.getContext().VoidPtrTy))
+ .castAs<PointerType>());
+ }
+ unsigned Idx = 0;
+ if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
+ Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx,
+ CGF.getPointerSize());
+ Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ Src, CGF.SizeTy->getPointerTo());
+ llvm::Value *LB = CGF.EmitLoadOfScalar(
+ TypedAddress,
+ /*Volatile=*/false,
+ CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
+ cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
+ Args.emplace_back(LB);
+ ++Idx;
+ Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx,
+ CGF.getPointerSize());
+ TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ Src, CGF.SizeTy->getPointerTo());
+ llvm::Value *UB = CGF.EmitLoadOfScalar(
+ TypedAddress,
+ /*Volatile=*/false,
+ CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
+ cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
+ Args.emplace_back(UB);
+ ++Idx;
+ }
+ if (CS.capture_size() > 0) {
+ ASTContext &CGFContext = CGF.getContext();
+ for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
+ QualType ElemTy = CurField->getType();
+ Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx,
+ CGF.getPointerSize());
+ Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)));
+ llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
+ /*Volatile=*/false,
+ CGFContext.getPointerType(ElemTy),
+ CI->getLocation());
+ if (CI->capturesVariableByCopy() &&
+ !CI->getCapturedVar()->getType()->isAnyPointerType()) {
+ Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
+ CI->getLocation());
+ }
+ Args.emplace_back(Arg);
+ }
+ }
+
+ emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedParallelFn, Args);
CGF.FinishFunction();
return Fn;
}
+
+void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
+ const Decl *D) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ return;
+
+ assert(D && "Expected function or captured|block decl.");
+ assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
+ "Function is registered already.");
+ const Stmt *Body = nullptr;
+ bool NeedToDelayGlobalization = false;
+ if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
+ Body = FD->getBody();
+ } else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
+ Body = BD->getBody();
+ } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
+ Body = CD->getBody();
+ NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
+ }
+ if (!Body)
+ return;
+ CheckVarsEscapingDeclContext VarChecker(CGF);
+ VarChecker.Visit(Body);
+ const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
+ ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =
+ VarChecker.getEscapedVariableLengthDecls();
+ if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
+ return;
+ auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+ I->getSecond().MappedParams =
+ llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ I->getSecond().GlobalRecord = GlobalizedVarsRecord;
+ I->getSecond().EscapedParameters.insert(
+ VarChecker.getEscapedParameters().begin(),
+ VarChecker.getEscapedParameters().end());
+ I->getSecond().EscapedVariableLengthDecls.append(
+ EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
+ DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+ assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+ Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ }
+ if (!NeedToDelayGlobalization) {
+ emitGenericVarsProlog(CGF, D->getLocStart());
+ struct GlobalizationScope final : EHScopeStack::Cleanup {
+ GlobalizationScope() = default;
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ };
+ CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
+ }
+}
+
+Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
+ const VarDecl *VD) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ return Address::invalid();
+
+ VD = VD->getCanonicalDecl();
+ auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I == FunctionGlobalizedDecls.end())
+ return Address::invalid();
+ auto VDI = I->getSecond().LocalVarData.find(VD);
+ if (VDI != I->getSecond().LocalVarData.end())
+ return VDI->second.second;
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),
+ E(VD->attr_end());
+ IT != E; ++IT) {
+ auto VDI = I->getSecond().LocalVarData.find(
+ cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
+ ->getCanonicalDecl());
+ if (VDI != I->getSecond().LocalVarData.end())
+ return VDI->second.second;
+ }
+ }
+ return Address::invalid();
+}
+
+void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
+ FunctionGlobalizedDecls.erase(CGF.CurFn);
+ CGOpenMPRuntime::functionFinished(CGF);
+}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 5d13408318a5..f83e99f8a3b7 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -24,8 +24,18 @@ namespace clang {
namespace CodeGen {
class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+public:
+ /// Defines the execution mode.
+ enum ExecutionMode {
+ /// SPMD execution mode (all threads are worker threads).
+ EM_SPMD,
+ /// Non-SPMD execution mode (1 master thread, others are workers).
+ EM_NonSPMD,
+ /// Unknown execution mode (orphaned directive).
+ EM_Unknown,
+ };
private:
- // Parallel outlined function work for workers to execute.
+ /// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
struct EntryFunctionState {
@@ -35,48 +45,56 @@ private:
class WorkerFunctionState {
public:
llvm::Function *WorkerFn;
- const CGFunctionInfo *CGFI;
+ const CGFunctionInfo &CGFI;
+ SourceLocation Loc;
- WorkerFunctionState(CodeGenModule &CGM);
+ WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
private:
void createWorkerFunction(CodeGenModule &CGM);
};
- bool isInSpmdExecutionMode() const;
+ ExecutionMode getExecutionMode() const;
- /// \brief Emit the worker function for the current target region.
+ /// Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
- /// \brief Helper for worker function. Emit body of worker loop.
+ /// Helper for worker function. Emit body of worker loop.
void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
- /// \brief Helper for generic target entry function. Guide the master and
+ /// Helper for non-SPMD target entry function. Guide the master and
/// worker threads to their respective locations.
- void emitGenericEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+ void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
WorkerFunctionState &WST);
- /// \brief Signal termination of OMP execution for generic target entry
+ /// Signal termination of OMP execution for non-SPMD target entry
/// function.
- void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+ void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+ /// Helper for generic variables globalization prolog.
+ void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
+
+ /// Helper for generic variables globalization epilog.
+ void emitGenericVarsEpilog(CodeGenFunction &CGF);
- /// \brief Helper for Spmd mode target directive's entry function.
- void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+ /// Helper for SPMD mode target directive's entry function.
+ void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D);
- /// \brief Signal termination of Spmd mode execution.
- void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+ /// Signal termination of SPMD mode execution.
+ void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
//
// Base class overrides.
//
- /// \brief Creates offloading entry for the provided entry ID \a ID,
+ /// Creates offloading entry for the provided entry ID \a ID,
/// address \a Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
- uint64_t Size, int32_t Flags = 0) override;
+ uint64_t Size, int32_t Flags,
+ llvm::GlobalValue::LinkageTypes Linkage) override;
- /// \brief Emit outlined function specialized for the Fork-Join
+ /// Emit outlined function specialized for the Fork-Join
/// programming model for applicable target directives on the NVPTX device.
/// \param D Directive to emit.
/// \param ParentName Name of the function that encloses the target region.
@@ -85,12 +103,12 @@ private:
/// \param IsOffloadEntry True if the outlined function is an offload entry.
/// An outlined function may not be an entry if, e.g. the if clause always
/// evaluates to false.
- void emitGenericKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
- /// \brief Emit outlined function specialized for the Single Program
+ /// Emit outlined function specialized for the Single Program
/// Multiple Data programming model for applicable target directives on the
/// NVPTX device.
/// \param D Directive to emit.
@@ -101,12 +119,12 @@ private:
/// \param CodeGen Object containing the target statements.
/// An outlined function may not be an entry if, e.g. the if clause always
/// evaluates to false.
- void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
- /// \brief Emit outlined function for 'target' directive on the NVPTX
+ /// Emit outlined function for 'target' directive on the NVPTX
/// device.
/// \param D Directive to emit.
/// \param ParentName Name of the function that encloses the target region.
@@ -122,22 +140,22 @@ private:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) override;
- /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// Emits code for parallel or serial call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
- /// This call is for the Generic Execution Mode.
+ /// This call is for the Non-SPMD Execution Mode.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
/// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
/// \param CapturedVars A pointer to the record with the references to
/// variables used in \a OutlinedFn function.
/// \param IfCond Condition in the associated 'if' clause, if it was
/// specified, nullptr otherwise.
- void emitGenericParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond);
- /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// Emits code for parallel or serial call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
/// This call is for a parallel directive within an SPMD target directive.
@@ -148,13 +166,13 @@ private:
/// \param IfCond Condition in the associated 'if' clause, if it was
/// specified, nullptr otherwise.
///
- void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond);
protected:
- /// \brief Get the function name of an outlined region.
+ /// Get the function name of an outlined region.
// The name can be customized depending on the target.
//
StringRef getOutlinedHelperName() const override {
@@ -164,13 +182,13 @@ protected:
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
- /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+ /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
virtual void emitProcBindClause(CodeGenFunction &CGF,
OpenMPProcBindClauseKind ProcBind,
SourceLocation Loc) override;
- /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+ /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
/// \param NumThreads An integer value of threads.
@@ -178,7 +196,7 @@ public:
llvm::Value *NumThreads,
SourceLocation Loc) override;
- /// \brief This function ought to emit, in the general case, a call to
+ /// This function ought to emit, in the general case, a call to
// the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
// as these numbers are obtained through the PTX grid and block configuration.
/// \param NumTeams An integer expression of teams.
@@ -186,7 +204,7 @@ public:
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
const Expr *ThreadLimit, SourceLocation Loc) override;
- /// \brief Emits inlined function for the specified OpenMP parallel
+ /// Emits inlined function for the specified OpenMP parallel
// directive.
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
@@ -201,7 +219,7 @@ public:
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) override;
- /// \brief Emits inlined function for the specified OpenMP teams
+ /// Emits inlined function for the specified OpenMP teams
// directive.
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
@@ -216,7 +234,7 @@ public:
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) override;
- /// \brief Emits code for teams call of the \a OutlinedFn with
+ /// Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
/// \param OutlinedFn Outlined function to be run by team masters. Type of
@@ -228,7 +246,7 @@ public:
SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars) override;
- /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// Emits code for parallel or serial call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
/// CapturedStruct.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
@@ -242,6 +260,16 @@ public:
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) override;
+ /// Emits a critical region.
+ /// \param CriticalName Name of the critical region.
+ /// \param CriticalOpGen Generator for the statement associated with the given
+ /// critical region.
+ /// \param Hint Value of the 'hint' clause (optional).
+ void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+ const RegionCodeGenTy &CriticalOpGen,
+ SourceLocation Loc,
+ const Expr *Hint = nullptr) override;
+
/// Emit a code for reduction clause.
///
/// \param Privates List of private copies for original reduction arguments.
@@ -270,7 +298,7 @@ public:
/// Translates the native parameter of outlined function if this is required
/// for target.
- /// \param FD Field decl from captured record for the paramater.
+ /// \param FD Field decl from captured record for the parameter.
/// \param NativeParam Parameter itself.
const VarDecl *translateParameter(const FieldDecl *FD,
const VarDecl *NativeParam) const override;
@@ -288,23 +316,41 @@ public:
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args = llvm::None) const override;
- /// Target codegen is specialized based on two programming models: the
- /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd'
- /// model for constructs like 'target parallel' that support it.
- enum ExecutionMode {
- /// Single Program Multiple Data.
- Spmd,
- /// Generic codegen to support fork-join model.
+ /// Emits OpenMP-specific function prolog.
+ /// Required for device constructs.
+ void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
+
+ /// Gets the OpenMP-specific address of the local variable.
+ Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+ const VarDecl *VD) override;
+
+ /// Target codegen is specialized based on two data-sharing modes: CUDA, in
+ /// which the local variables are actually global threadlocal, and Generic, in
+ /// which the local variables are placed in global memory if they may escape
+ /// their declaration context.
+ enum DataSharingMode {
+ /// CUDA data sharing mode.
+ CUDA,
+ /// Generic data-sharing mode.
Generic,
- Unknown,
};
+ /// Cleans up references to the objects in finished function.
+ ///
+ void functionFinished(CodeGenFunction &CGF) override;
+
private:
- // Track the execution mode when codegening directives within a target
- // region. The appropriate mode (generic/spmd) is set on entry to the
- // target region and used by containing directives such as 'parallel'
- // to emit optimized code.
- ExecutionMode CurrentExecutionMode;
+ /// Track the execution mode when codegening directives within a target
+ /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
+ /// target region and used by containing directives such as 'parallel'
+ /// to emit optimized code.
+ ExecutionMode CurrentExecutionMode = EM_Unknown;
+
+ /// true if we're emitting the code for the target region and next parallel
+ /// region is L0 for sure.
+ bool IsInTargetMasterThreadRegion = false;
+ /// true if we're definitely in the parallel region.
+ bool IsInParallelRegion = false;
/// Map between an outlined function and its wrapper.
llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
@@ -313,9 +359,26 @@ private:
/// and controls the parameters which are passed to this function.
/// The wrapper ensures that the outlined function is called
/// with the correct arguments when data is shared.
- llvm::Function *
- createDataSharingWrapper(llvm::Function *OutlinedParallelFn,
- const OMPExecutableDirective &D);
+ llvm::Function *createParallelDataSharingWrapper(
+ llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
+
+ /// The map of local variables to their addresses in the global memory.
+ using DeclToAddrMapTy = llvm::MapVector<const Decl *,
+ std::pair<const FieldDecl *, Address>>;
+ /// Set of the parameters passed by value escaping OpenMP context.
+ using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
+ struct FunctionData {
+ DeclToAddrMapTy LocalVarData;
+ EscapedParamsTy EscapedParameters;
+ llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
+ llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
+ const RecordDecl *GlobalRecord = nullptr;
+ llvm::Value *GlobalRecordAddr = nullptr;
+ std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
+ };
+ /// Maps the function to the list of the globalized variables with their
+ /// addresses.
+ llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
};
} // CodeGen namespace.
diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h
index 7b9c27d1d772..41084294ab9a 100644
--- a/lib/CodeGen/CGRecordLayout.h
+++ b/lib/CodeGen/CGRecordLayout.h
@@ -23,7 +23,7 @@ namespace llvm {
namespace clang {
namespace CodeGen {
-/// \brief Structure with information about how a bitfield should be accessed.
+/// Structure with information about how a bitfield should be accessed.
///
/// Often we layout a sequence of bitfields as a contiguous sequence of bits.
/// When the AST record layout does this, we represent it in the LLVM IR's type
@@ -92,7 +92,7 @@ struct CGBitFieldInfo {
void print(raw_ostream &OS) const;
void dump() const;
- /// \brief Given a bit-field decl, build an appropriate helper object for
+ /// Given a bit-field decl, build an appropriate helper object for
/// accessing that field (which is expected to have the given offset and
/// size).
static CGBitFieldInfo MakeInfo(class CodeGenTypes &Types,
@@ -156,31 +156,31 @@ public:
IsZeroInitializable(IsZeroInitializable),
IsZeroInitializableAsBase(IsZeroInitializableAsBase) {}
- /// \brief Return the "complete object" LLVM type associated with
+ /// Return the "complete object" LLVM type associated with
/// this record.
llvm::StructType *getLLVMType() const {
return CompleteObjectType;
}
- /// \brief Return the "base subobject" LLVM type associated with
+ /// Return the "base subobject" LLVM type associated with
/// this record.
llvm::StructType *getBaseSubobjectLLVMType() const {
return BaseSubobjectType;
}
- /// \brief Check whether this struct can be C++ zero-initialized
+ /// Check whether this struct can be C++ zero-initialized
/// with a zeroinitializer.
bool isZeroInitializable() const {
return IsZeroInitializable;
}
- /// \brief Check whether this struct can be C++ zero-initialized
+ /// Check whether this struct can be C++ zero-initialized
/// with a zeroinitializer when considered as a base subobject.
bool isZeroInitializableAsBase() const {
return IsZeroInitializableAsBase;
}
- /// \brief Return llvm::StructType element number that corresponds to the
+ /// Return llvm::StructType element number that corresponds to the
/// field FD.
unsigned getLLVMFieldNo(const FieldDecl *FD) const {
FD = FD->getCanonicalDecl();
@@ -193,14 +193,14 @@ public:
return NonVirtualBases.lookup(RD);
}
- /// \brief Return the LLVM field index corresponding to the given
+ /// Return the LLVM field index corresponding to the given
/// virtual base. Only valid when operating on the complete object.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const {
assert(CompleteObjectVirtualBases.count(base) && "Invalid virtual base!");
return CompleteObjectVirtualBases.lookup(base);
}
- /// \brief Return the BitFieldInfo that corresponds to the field FD.
+ /// Return the BitFieldInfo that corresponds to the field FD.
const CGBitFieldInfo &getBitFieldInfo(const FieldDecl *FD) const {
FD = FD->getCanonicalDecl();
assert(FD->isBitField() && "Invalid call for non-bit-field decl!");
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 1644ab4c0725..4ee6c8e71457 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -62,7 +62,7 @@ namespace {
/// because LLVM reads from the complete type it can generate incorrect code
/// if we do not clip the tail padding off of the bitfield in the complete
/// layout. This introduces a somewhat awkward extra unnecessary clip stage.
-/// The location of the clip is stored internally as a sentinal of type
+/// The location of the clip is stored internally as a sentinel of type
/// SCISSOR. If LLVM were updated to read base types (which it probably
/// should because locations of things such as VBases are bogus in the llvm
/// type anyway) then we could eliminate the SCISSOR.
@@ -74,7 +74,7 @@ namespace {
struct CGRecordLowering {
// MemberInfo is a helper structure that contains information about a record
// member. In additional to the standard member types, there exists a
- // sentinal member type that ensures correct rounding.
+ // sentinel member type that ensures correct rounding.
struct MemberInfo {
CharUnits Offset;
enum InfoKind { VFPtr, VBPtr, Field, Base, VBase, Scissor } Kind;
@@ -95,7 +95,7 @@ struct CGRecordLowering {
// The constructor.
CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D, bool Packed);
// Short helper routines.
- /// \brief Constructs a MemberInfo instance from an offset and llvm::Type *.
+ /// Constructs a MemberInfo instance from an offset and llvm::Type *.
MemberInfo StorageInfo(CharUnits Offset, llvm::Type *Data) {
return MemberInfo(Offset, MemberInfo::Field, Data);
}
@@ -118,19 +118,19 @@ struct CGRecordLowering {
return !Context.getTargetInfo().getCXXABI().isMicrosoft();
}
- /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments.
+ /// Wraps llvm::Type::getIntNTy with some implicit arguments.
llvm::Type *getIntNType(uint64_t NumBits) {
return llvm::Type::getIntNTy(Types.getLLVMContext(),
(unsigned)llvm::alignTo(NumBits, 8));
}
- /// \brief Gets an llvm type of size NumBytes and alignment 1.
+ /// Gets an llvm type of size NumBytes and alignment 1.
llvm::Type *getByteArrayType(CharUnits NumBytes) {
assert(!NumBytes.isZero() && "Empty byte arrays aren't allowed.");
llvm::Type *Type = llvm::Type::getInt8Ty(Types.getLLVMContext());
return NumBytes == CharUnits::One() ? Type :
(llvm::Type *)llvm::ArrayType::get(Type, NumBytes.getQuantity());
}
- /// \brief Gets the storage type for a field decl and handles storage
+ /// Gets the storage type for a field decl and handles storage
/// for itanium bitfields that are smaller than their declared type.
llvm::Type *getStorageType(const FieldDecl *FD) {
llvm::Type *Type = Types.ConvertTypeForMem(FD->getType());
@@ -139,7 +139,7 @@ struct CGRecordLowering {
return getIntNType(std::min(FD->getBitWidthValue(Context),
(unsigned)Context.toBits(getSize(Type))));
}
- /// \brief Gets the llvm Basesubobject type from a CXXRecordDecl.
+ /// Gets the llvm Basesubobject type from a CXXRecordDecl.
llvm::Type *getStorageType(const CXXRecordDecl *RD) {
return Types.getCGRecordLayout(RD).getBaseSubobjectLLVMType();
}
@@ -168,7 +168,7 @@ struct CGRecordLowering {
// Layout routines.
void setBitFieldInfo(const FieldDecl *FD, CharUnits StartOffset,
llvm::Type *StorageType);
- /// \brief Lowers an ASTRecordLayout to a llvm type.
+ /// Lowers an ASTRecordLayout to a llvm type.
void lower(bool NonVirtualBaseType);
void lowerUnion();
void accumulateFields();
@@ -177,18 +177,18 @@ struct CGRecordLowering {
void accumulateBases();
void accumulateVPtrs();
void accumulateVBases();
- /// \brief Recursively searches all of the bases to find out if a vbase is
+ /// Recursively searches all of the bases to find out if a vbase is
/// not the primary vbase of some base class.
bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query);
void calculateZeroInit();
- /// \brief Lowers bitfield storage types to I8 arrays for bitfields with tail
+ /// Lowers bitfield storage types to I8 arrays for bitfields with tail
/// padding that is or can potentially be used.
void clipTailPadding();
- /// \brief Determines if we need a packed llvm struct.
+ /// Determines if we need a packed llvm struct.
void determinePacked(bool NVBaseType);
- /// \brief Inserts padding everwhere it's needed.
+ /// Inserts padding everywhere it's needed.
void insertPadding();
- /// \brief Fills out the structures that are ultimately consumed.
+ /// Fills out the structures that are ultimately consumed.
void fillOutputFields();
// Input memoization fields.
CodeGenTypes &Types;
@@ -214,12 +214,13 @@ private:
};
} // namespace {
-CGRecordLowering::CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D, bool Packed)
- : Types(Types), Context(Types.getContext()), D(D),
- RD(dyn_cast<CXXRecordDecl>(D)),
- Layout(Types.getContext().getASTRecordLayout(D)),
- DataLayout(Types.getDataLayout()), IsZeroInitializable(true),
- IsZeroInitializableAsBase(true), Packed(Packed) {}
+CGRecordLowering::CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D,
+ bool Packed)
+ : Types(Types), Context(Types.getContext()), D(D),
+ RD(dyn_cast<CXXRecordDecl>(D)),
+ Layout(Types.getContext().getASTRecordLayout(D)),
+ DataLayout(Types.getDataLayout()), IsZeroInitializable(true),
+ IsZeroInitializableAsBase(true), Packed(Packed) {}
void CGRecordLowering::setBitFieldInfo(
const FieldDecl *FD, CharUnits StartOffset, llvm::Type *StorageType) {
@@ -294,8 +295,7 @@ void CGRecordLowering::lowerUnion() {
// been doing and cause lit tests to change.
for (const auto *Field : D->fields()) {
if (Field->isBitField()) {
- // Skip 0 sized bitfields.
- if (Field->getBitWidthValue(Context) == 0)
+ if (Field->isZeroLengthBitField(Context))
continue;
llvm::Type *FieldType = getStorageType(Field);
if (LayoutSize < getSize(FieldType))
@@ -380,7 +380,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
for (; Field != FieldEnd; ++Field) {
uint64_t BitOffset = getFieldBitOffset(*Field);
// Zero-width bitfields end runs.
- if (Field->getBitWidthValue(Context) == 0) {
+ if (Field->isZeroLengthBitField(Context)) {
Run = FieldEnd;
continue;
}
@@ -404,19 +404,20 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
return;
}
- // Check if current Field is better as a single field run. When current field
+ // Check if OffsetInRecord is better as a single field run. When OffsetInRecord
// has legal integer width, and its bitfield offset is naturally aligned, it
// is better to make the bitfield a separate storage component so as it can be
// accessed directly with lower cost.
- auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) {
+ auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord,
+ uint64_t StartBitOffset) {
if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
return false;
- unsigned Width = Field->getBitWidthValue(Context);
- if (!DataLayout.isLegalInteger(Width))
+ if (!DataLayout.isLegalInteger(OffsetInRecord))
return false;
- // Make sure Field is natually aligned if it is treated as an IType integer.
- if (getFieldBitOffset(*Field) %
- Context.toBits(getAlignment(getIntNType(Width))) !=
+ // Make sure StartBitOffset is natually aligned if it is treated as an
+ // IType integer.
+ if (StartBitOffset %
+ Context.toBits(getAlignment(getIntNType(OffsetInRecord))) !=
0)
return false;
return true;
@@ -431,26 +432,31 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
if (Field == FieldEnd)
break;
// Any non-zero-length bitfield can start a new run.
- if (Field->getBitWidthValue(Context) != 0) {
+ if (!Field->isZeroLengthBitField(Context)) {
Run = Field;
StartBitOffset = getFieldBitOffset(*Field);
Tail = StartBitOffset + Field->getBitWidthValue(Context);
- StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run);
+ StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset,
+ StartBitOffset);
}
++Field;
continue;
}
// If the start field of a new run is better as a single run, or
- // if current field is better as a single run, or
- // if current field has zero width bitfield, or
+ // if current field (or consecutive fields) is better as a single run, or
+ // if current field has zero width bitfield and either
+ // UseZeroLengthBitfieldAlignment or UseBitFieldTypeAlignment is set to
+ // true, or
// if the offset of current field is inconsistent with the offset of
// previous field plus its offset,
// skip the block below and go ahead to emit the storage.
// Otherwise, try to add bitfields to the run.
if (!StartFieldAsSingleRun && Field != FieldEnd &&
- !IsBetterAsSingleFieldRun(Field) &&
- Field->getBitWidthValue(Context) != 0 &&
+ !IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset) &&
+ (!Field->isZeroLengthBitField(Context) ||
+ (!Context.getTargetInfo().useZeroLengthBitfieldAlignment() &&
+ !Context.getTargetInfo().useBitFieldTypeAlignment())) &&
Tail == getFieldBitOffset(*Field)) {
Tail += Field->getBitWidthValue(Context);
++Field;
@@ -626,7 +632,7 @@ void CGRecordLowering::determinePacked(bool NVBaseType) {
// non-virtual sub-object and an unpacked complete object or vise versa.
if (NVSize % NVAlignment)
Packed = true;
- // Update the alignment of the sentinal.
+ // Update the alignment of the sentinel.
if (!Packed)
Members.back().Data = getIntNType(Context.toBits(Alignment));
}
@@ -785,8 +791,7 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
}
// Verify that the LLVM and AST field offsets agree.
- llvm::StructType *ST =
- dyn_cast<llvm::StructType>(RL->getLLVMType());
+ llvm::StructType *ST = RL->getLLVMType();
const llvm::StructLayout *SL = getDataLayout().getStructLayout(ST);
const ASTRecordLayout &AST_RL = getContext().getASTRecordLayout(D);
@@ -808,7 +813,7 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
continue;
// Don't inspect zero-length bitfields.
- if (FD->getBitWidthValue(getContext()) == 0)
+ if (FD->isZeroLengthBitField(getContext()))
continue;
const CGBitFieldInfo &Info = RL->getBitFieldInfo(FD);
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 91fa49a46ef1..79662ec0099f 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -74,6 +74,15 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
// Generate a stoppoint if we are emitting debug info.
EmitStopPoint(S);
+ // Ignore all OpenMP directives except for simd if OpenMP with Simd is
+ // enabled.
+ if (getLangOpts().OpenMP && getLangOpts().OpenMPSimd) {
+ if (const auto *D = dyn_cast<OMPExecutableDirective>(S)) {
+ EmitSimpleOMPExecutableDirective(*D);
+ return;
+ }
+ }
+
switch (S->getStmtClass()) {
case Stmt::NoStmtClass:
case Stmt::CXXCatchStmtClass:
@@ -599,7 +608,7 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
EmitStmt(S.getInit());
if (S.getConditionVariable())
- EmitAutoVarDecl(*S.getConditionVariable());
+ EmitDecl(*S.getConditionVariable());
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
@@ -696,7 +705,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
RunCleanupsScope ConditionScope(*this);
if (S.getConditionVariable())
- EmitAutoVarDecl(*S.getConditionVariable());
+ EmitDecl(*S.getConditionVariable());
// Evaluate the conditional in the while header. C99 6.8.5.1: The
// evaluation of the controlling expression takes place before each
@@ -768,11 +777,6 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
// Emit the body of the loop.
llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
- const SourceRange &R = S.getSourceRange();
- LoopStack.push(LoopBody, CGM.getContext(), DoAttrs,
- SourceLocToDebugLoc(R.getBegin()),
- SourceLocToDebugLoc(R.getEnd()));
-
EmitBlockWithFallThrough(LoopBody, &S);
{
RunCleanupsScope BodyScope(*this);
@@ -781,6 +785,11 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
EmitBlock(LoopCond.getBlock());
+ const SourceRange &R = S.getSourceRange();
+ LoopStack.push(LoopBody, CGM.getContext(), DoAttrs,
+ SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+
// C99 6.8.5.2: "The evaluation of the controlling expression takes place
// after each execution of the loop body."
@@ -856,7 +865,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
// If the for statement has a condition scope, emit the local variable
// declaration.
if (S.getConditionVariable()) {
- EmitAutoVarDecl(*S.getConditionVariable());
+ EmitDecl(*S.getConditionVariable());
}
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
@@ -996,7 +1005,9 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
if (RV.isScalar()) {
Builder.CreateStore(RV.getScalarVal(), ReturnValue);
} else if (RV.isAggregate()) {
- EmitAggregateCopy(ReturnValue, RV.getAggregateAddress(), Ty);
+ LValue Dest = MakeAddrLValue(ReturnValue, Ty);
+ LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty);
+ EmitAggregateCopy(Dest, Src, Ty, overlapForReturnValue());
} else {
EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty),
/*init*/ true);
@@ -1026,7 +1037,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
Builder.ClearInsertionPoint();
}
- // Emit the result value, even if unused, to evalute the side effects.
+ // Emit the result value, even if unused, to evaluate the side effects.
const Expr *RV = S.getRetValue();
// Treat block literals in a return expression as if they appeared
@@ -1074,11 +1085,12 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
/*isInit*/ true);
break;
case TEK_Aggregate:
- EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue,
- Qualifiers(),
- AggValueSlot::IsDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ EmitAggExpr(RV, AggValueSlot::forAddr(
+ ReturnValue, Qualifiers(),
+ AggValueSlot::IsDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased,
+ overlapForReturnValue()));
break;
}
}
@@ -1563,7 +1575,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// Emit the condition variable if needed inside the entire cleanup scope
// used by this special case for constant folded switches.
if (S.getConditionVariable())
- EmitAutoVarDecl(*S.getConditionVariable());
+ EmitDecl(*S.getConditionVariable());
// At this point, we are no longer "within" a switch instance, so
// we can temporarily enforce this to ensure that any embedded case
@@ -1592,7 +1604,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
EmitStmt(S.getInit());
if (S.getConditionVariable())
- EmitAutoVarDecl(*S.getConditionVariable());
+ EmitDecl(*S.getConditionVariable());
llvm::Value *CondV = EmitScalarExpr(S.getCond());
// Create basic block to hold stuff that comes after switch
@@ -1915,7 +1927,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Simplify the output constraint.
std::string OutputConstraint(S.getOutputConstraint(i));
OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1,
- getTarget());
+ getTarget(), &OutputConstraintInfos);
const Expr *OutExpr = S.getOutputExpr(i);
OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());
@@ -2122,7 +2134,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
llvm::InlineAsm *IA =
llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
/* IsAlignStack */ false, AsmDialect);
- llvm::CallInst *Result = Builder.CreateCall(IA, Args);
+ llvm::CallInst *Result =
+ Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index f9861735832b..0d343f84c71f 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -29,12 +29,13 @@ namespace {
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
for (const auto *C : S.clauses()) {
- if (auto *CPI = OMPClauseWithPreInit::get(C)) {
- if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
+ if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
for (const auto *I : PreInit->decls()) {
- if (!I->hasAttr<OMPCaptureNoInitAttr>())
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
CGF.EmitVarDecl(cast<VarDecl>(*I));
- else {
+ } else {
CodeGenFunction::AutoVarEmission Emission =
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
CGF.EmitAutoVarCleanups(Emission);
@@ -53,34 +54,35 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
}
public:
- OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
- bool AsInlined = false, bool EmitPreInitStmt = true)
+ OMPLexicalScope(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
+ const bool EmitPreInitStmt = true)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
if (EmitPreInitStmt)
emitPreInitStmt(CGF, S);
- if (AsInlined) {
- if (S.hasAssociatedStmt()) {
- auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
- for (auto &C : CS->captures()) {
- if (C.capturesVariable() || C.capturesVariableByCopy()) {
- auto *VD = C.getCapturedVar();
- assert(VD == VD->getCanonicalDecl() &&
- "Canonical decl must be captured.");
- DeclRefExpr DRE(const_cast<VarDecl *>(VD),
- isCapturedVar(CGF, VD) ||
- (CGF.CapturedStmtInfo &&
- InlinedShareds.isGlobalVarCaptured(VD)),
- VD->getType().getNonReferenceType(), VK_LValue,
- SourceLocation());
- InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
- return CGF.EmitLValue(&DRE).getAddress();
- });
- }
- }
- (void)InlinedShareds.Privatize();
+ if (!CapturedRegion.hasValue())
+ return;
+ assert(S.hasAssociatedStmt() &&
+ "Expected associated statement for inlined directive.");
+ const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
+ for (const auto &C : CS->captures()) {
+ if (C.capturesVariable() || C.capturesVariableByCopy()) {
+ auto *VD = C.getCapturedVar();
+ assert(VD == VD->getCanonicalDecl() &&
+ "Canonical decl must be captured.");
+ DeclRefExpr DRE(
+ const_cast<VarDecl *>(VD),
+ isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
+ InlinedShareds.isGlobalVarCaptured(VD)),
+ VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
+ InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
+ return CGF.EmitLValue(&DRE).getAddress();
+ });
}
}
+ (void)InlinedShareds.Privatize();
}
};
@@ -96,9 +98,8 @@ class OMPParallelScope final : public OMPLexicalScope {
public:
OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
- : OMPLexicalScope(CGF, S,
- /*AsInlined=*/false,
- /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+ : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
+ EmitPreInitStmt(S)) {}
};
/// Lexical scope for OpenMP teams construct, that handles correct codegen
@@ -112,29 +113,26 @@ class OMPTeamsScope final : public OMPLexicalScope {
public:
OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
- : OMPLexicalScope(CGF, S,
- /*AsInlined=*/false,
- /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+ : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
+ EmitPreInitStmt(S)) {}
};
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
- CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
- for (auto *E : S.counters()) {
+ CodeGenFunction::OMPMapVars PreCondVars;
+ for (const auto *E : S.counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- (void)PreCondScope.addPrivate(VD, [&CGF, VD]() {
- return CGF.CreateMemTemp(VD->getType().getNonReferenceType());
- });
+ (void)PreCondVars.setVarAddr(
+ CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
}
- (void)PreCondScope.Privatize();
- if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
- if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
- for (const auto *I : PreInits->decls())
- CGF.EmitVarDecl(cast<VarDecl>(*I));
- }
+ (void)PreCondVars.apply(CGF);
+ if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
+ for (const auto *I : PreInits->decls())
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
}
+ PreCondVars.restore(CGF);
}
public:
@@ -144,6 +142,72 @@ public:
}
};
+class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
+ CodeGenFunction::OMPPrivateScope InlinedShareds;
+
+ static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
+ return CGF.LambdaCaptureFields.lookup(VD) ||
+ (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
+ (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
+ cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
+ }
+
+public:
+ OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
+ InlinedShareds(CGF) {
+ for (const auto *C : S.clauses()) {
+ if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
+ }
+ }
+ }
+ } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
+ for (const Expr *E : UDP->varlists()) {
+ const Decl *D = cast<DeclRefExpr>(E)->getDecl();
+ if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
+ CGF.EmitVarDecl(*OED);
+ }
+ }
+ }
+ if (!isOpenMPSimdDirective(S.getDirectiveKind()))
+ CGF.EmitOMPPrivateClause(S, InlinedShareds);
+ if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
+ if (const Expr *E = TG->getReductionRef())
+ CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
+ }
+ const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
+ while (CS) {
+ for (auto &C : CS->captures()) {
+ if (C.capturesVariable() || C.capturesVariableByCopy()) {
+ auto *VD = C.getCapturedVar();
+ assert(VD == VD->getCanonicalDecl() &&
+ "Canonical decl must be captured.");
+ DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+ isCapturedVar(CGF, VD) ||
+ (CGF.CapturedStmtInfo &&
+ InlinedShareds.isGlobalVarCaptured(VD)),
+ VD->getType().getNonReferenceType(), VK_LValue,
+ C.getLocation());
+ InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
+ return CGF.EmitLValue(&DRE).getAddress();
+ });
+ }
+ }
+ CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
+ }
+ (void)InlinedShareds.Privatize();
+ }
+};
+
} // namespace
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
@@ -151,8 +215,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
const RegionCodeGenTy &CodeGen);
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
- if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
- if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
+ if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
+ if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
OrigVD = OrigVD->getCanonicalDecl();
bool IsCaptured =
LambdaCaptureFields.lookup(OrigVD) ||
@@ -167,23 +231,23 @@ LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
}
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
- auto &C = getContext();
+ ASTContext &C = getContext();
llvm::Value *Size = nullptr;
auto SizeInChars = C.getTypeSizeInChars(Ty);
if (SizeInChars.isZero()) {
// getTypeSizeInChars() returns 0 for a VLA.
- while (auto *VAT = C.getAsVariableArrayType(Ty)) {
- llvm::Value *ArraySize;
- std::tie(ArraySize, Ty) = getVLASize(VAT);
- Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
+ while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
+ VlaSizePair VlaSize = getVLASize(VAT);
+ Ty = VlaSize.Type;
+ Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
+ : VlaSize.NumElts;
}
SizeInChars = C.getTypeSizeInChars(Ty);
if (SizeInChars.isZero())
return llvm::ConstantInt::get(SizeTy, /*V=*/0);
- Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
- } else
- Size = CGM.getSize(SizeInChars);
- return Size;
+ return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
+ }
+ return CGM.getSize(SizeInChars);
}
void CodeGenFunction::GenerateOpenMPCapturedVars(
@@ -195,27 +259,26 @@ void CodeGenFunction::GenerateOpenMPCapturedVars(
E = S.capture_init_end();
I != E; ++I, ++CurField, ++CurCap) {
if (CurField->hasCapturedVLAType()) {
- auto VAT = CurField->getCapturedVLAType();
- auto *Val = VLASizeMap[VAT->getSizeExpr()];
+ const VariableArrayType *VAT = CurField->getCapturedVLAType();
+ llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
CapturedVars.push_back(Val);
- } else if (CurCap->capturesThis())
+ } else if (CurCap->capturesThis()) {
CapturedVars.push_back(CXXThisValue);
- else if (CurCap->capturesVariableByCopy()) {
- llvm::Value *CV =
- EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
+ } else if (CurCap->capturesVariableByCopy()) {
+ llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
// If the field is not a pointer, we need to save the actual value
// and load it as a void pointer.
if (!CurField->getType()->isAnyPointerType()) {
- auto &Ctx = getContext();
- auto DstAddr = CreateMemTemp(
+ ASTContext &Ctx = getContext();
+ Address DstAddr = CreateMemTemp(
Ctx.getUIntPtrType(),
- Twine(CurCap->getCapturedVar()->getName()) + ".casted");
+ Twine(CurCap->getCapturedVar()->getName(), ".casted"));
LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
- auto *SrcAddrVal = EmitScalarConversion(
+ llvm::Value *SrcAddrVal = EmitScalarConversion(
DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
- Ctx.getPointerType(CurField->getType()), SourceLocation());
+ Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
LValue SrcLV =
MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
@@ -223,7 +286,7 @@ void CodeGenFunction::GenerateOpenMPCapturedVars(
EmitStoreThroughLValue(RValue::get(CV), SrcLV);
// Load the value using the destination type pointer.
- CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+ CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
}
CapturedVars.push_back(CV);
} else {
@@ -233,15 +296,16 @@ void CodeGenFunction::GenerateOpenMPCapturedVars(
}
}
-static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
- StringRef Name, LValue AddrLV,
+static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
+ QualType DstType, StringRef Name,
+ LValue AddrLV,
bool isReferenceType = false) {
ASTContext &Ctx = CGF.getContext();
- auto *CastedPtr = CGF.EmitScalarConversion(
+ llvm::Value *CastedPtr = CGF.EmitScalarConversion(
AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
- Ctx.getPointerType(DstType), SourceLocation());
- auto TmpAddr =
+ Ctx.getPointerType(DstType), Loc);
+ Address TmpAddr =
CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
.getAddress();
@@ -249,27 +313,26 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
// reference instead of the reference of the value.
if (isReferenceType) {
QualType RefType = Ctx.getLValueReferenceType(DstType);
- auto *RefVal = TmpAddr.getPointer();
- TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
- auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
- CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
+ llvm::Value *RefVal = TmpAddr.getPointer();
+ TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref"));
+ LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
+ CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true);
}
return TmpAddr;
}
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
- if (T->isLValueReferenceType()) {
+ if (T->isLValueReferenceType())
return C.getLValueReferenceType(
getCanonicalParamType(C, T.getNonReferenceType()),
/*SpelledAsLValue=*/false);
- }
if (T->isPointerType())
return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
- if (auto *A = T->getAsArrayTypeUnsafe()) {
- if (auto *VLA = dyn_cast<VariableArrayType>(A))
+ if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
+ if (const auto *VLA = dyn_cast<VariableArrayType>(A))
return getCanonicalParamType(C, VLA->getElementType());
- else if (!A->isVariablyModifiedType())
+ if (!A->isVariablyModifiedType())
return C.getCanonicalType(T);
}
return C.getCanonicalParamType(T);
@@ -329,7 +392,7 @@ static llvm::Function *emitOutlinedFunctionPrologue(
Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
}
- for (auto *FD : RD->fields()) {
+ for (const FieldDecl *FD : RD->fields()) {
QualType ArgType = FD->getType();
IdentifierInfo *II = nullptr;
VarDecl *CapVar = nullptr;
@@ -339,18 +402,17 @@ static llvm::Function *emitOutlinedFunctionPrologue(
// uintptr. This is necessary given that the runtime library is only able to
// deal with pointers. We can pass in the same way the VLA type sizes to the
// outlined function.
- if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
- I->capturesVariableArrayType()) {
- if (FO.UIntPtrCastRequired)
- ArgType = Ctx.getUIntPtrType();
- }
+ if (FO.UIntPtrCastRequired &&
+ ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
+ I->capturesVariableArrayType()))
+ ArgType = Ctx.getUIntPtrType();
if (I->capturesVariable() || I->capturesVariableByCopy()) {
CapVar = I->getCapturedVar();
II = CapVar->getIdentifier();
- } else if (I->capturesThis())
+ } else if (I->capturesThis()) {
II = &Ctx.Idents.get("this");
- else {
+ } else {
assert(I->capturesVariableArrayType());
II = &Ctx.Idents.get("vla");
}
@@ -387,19 +449,20 @@ static llvm::Function *emitOutlinedFunctionPrologue(
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
- llvm::Function *F =
+ auto *F =
llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
FO.FunctionName, &CGM.getModule());
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
if (CD->isNothrow())
F->setDoesNotThrow();
+ F->setDoesNotRecurse();
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
FO.S->getLocStart(), CD->getBody()->getLocStart());
unsigned Cnt = CD->getContextParamPosition();
I = FO.S->captures().begin();
- for (auto *FD : RD->fields()) {
+ for (const FieldDecl *FD : RD->fields()) {
// Do not map arguments if we emit function with non-original types.
Address LocalAddr(Address::invalid());
if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
@@ -431,23 +494,23 @@ static llvm::Function *emitOutlinedFunctionPrologue(
AlignmentSource::Decl);
if (FD->hasCapturedVLAType()) {
if (FO.UIntPtrCastRequired) {
- ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
- Args[Cnt]->getName(),
- ArgLVal),
- FD->getType(), AlignmentSource::Decl);
+ ArgLVal = CGF.MakeAddrLValue(
+ castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
+ Args[Cnt]->getName(), ArgLVal),
+ FD->getType(), AlignmentSource::Decl);
}
- auto *ExprArg =
- CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
- auto VAT = FD->getCapturedVLAType();
- VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
+ llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
+ const VariableArrayType *VAT = FD->getCapturedVLAType();
+ VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
} else if (I->capturesVariable()) {
- auto *Var = I->getCapturedVar();
+ const VarDecl *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
Address ArgAddr = ArgLVal.getAddress();
if (!VarTy->isReferenceType()) {
if (ArgLVal.getType()->isLValueReferenceType()) {
ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
- } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
+ } else if (!VarTy->isVariablyModifiedType() ||
+ !VarTy->isPointerType()) {
assert(ArgLVal.getType()->isPointerType());
ArgAddr = CGF.EmitLoadOfPointer(
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
@@ -461,20 +524,19 @@ static llvm::Function *emitOutlinedFunctionPrologue(
} else if (I->capturesVariableByCopy()) {
assert(!FD->getType()->isAnyPointerType() &&
"Not expecting a captured pointer.");
- auto *Var = I->getCapturedVar();
+ const VarDecl *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
LocalAddrs.insert(
{Args[Cnt],
- {Var,
- FO.UIntPtrCastRequired
- ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
- ArgLVal, VarTy->isReferenceType())
- : ArgLVal.getAddress()}});
+ {Var, FO.UIntPtrCastRequired
+ ? castValueFromUintptr(CGF, I->getLocation(),
+ FD->getType(), Args[Cnt]->getName(),
+ ArgLVal, VarTy->isReferenceType())
+ : ArgLVal.getAddress()}});
} else {
// If 'this' is captured, load it into CXXThisValue.
assert(I->capturesThis());
- CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
- .getScalarVal();
+ CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
}
++Cnt;
@@ -524,6 +586,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
/*RegisterCastedArgsOnly=*/true,
CapturedStmtInfo->getHelperName());
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
+ WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
Args.clear();
LocalAddrs.clear();
VLASizes.clear();
@@ -539,16 +602,16 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
I->second.second,
I->second.first ? I->second.first->getType() : Arg->getType(),
AlignmentSource::Decl);
- CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
} else {
auto EI = VLASizes.find(Arg);
- if (EI != VLASizes.end())
+ if (EI != VLASizes.end()) {
CallArg = EI->second.second;
- else {
+ } else {
LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
Arg->getType(),
AlignmentSource::Decl);
- CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
}
}
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
@@ -564,28 +627,28 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
//===----------------------------------------------------------------------===//
void CodeGenFunction::EmitOMPAggregateAssign(
Address DestAddr, Address SrcAddr, QualType OriginalType,
- const llvm::function_ref<void(Address, Address)> &CopyGen) {
+ const llvm::function_ref<void(Address, Address)> CopyGen) {
// Perform element-by-element initialization.
QualType ElementTy;
// Drill down to the base element type on both arrays.
- auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
- auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
+ const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
+ llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
- auto SrcBegin = SrcAddr.getPointer();
- auto DestBegin = DestAddr.getPointer();
+ llvm::Value *SrcBegin = SrcAddr.getPointer();
+ llvm::Value *DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
- auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
+ llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
// The basic structure here is a while-do loop.
- auto BodyBB = createBasicBlock("omp.arraycpy.body");
- auto DoneBB = createBasicBlock("omp.arraycpy.done");
- auto IsEmpty =
+ llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
+ llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
+ llvm::Value *IsEmpty =
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
- auto EntryBB = Builder.GetInsertBlock();
+ llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
EmitBlock(BodyBB);
CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
@@ -608,12 +671,12 @@ void CodeGenFunction::EmitOMPAggregateAssign(
CopyGen(DestElementCurrent, SrcElementCurrent);
// Shift the address forward by one element.
- auto DestElementNext = Builder.CreateConstGEP1_32(
+ llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
- auto SrcElementNext = Builder.CreateConstGEP1_32(
+ llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
// Check whether we've reached the end.
- auto Done =
+ llvm::Value *Done =
Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
@@ -627,10 +690,12 @@ void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
Address SrcAddr, const VarDecl *DestVD,
const VarDecl *SrcVD, const Expr *Copy) {
if (OriginalType->isArrayType()) {
- auto *BO = dyn_cast<BinaryOperator>(Copy);
+ const auto *BO = dyn_cast<BinaryOperator>(Copy);
if (BO && BO->getOpcode() == BO_Assign) {
// Perform simple memcpy for simple copying.
- EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
+ LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
+ LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
+ EmitAggregateAssign(Dest, Src, OriginalType);
} else {
// For arrays with complex element types perform element by element
// copying.
@@ -641,11 +706,8 @@ void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
// destination and source variables to corresponding array
// elements.
CodeGenFunction::OMPPrivateScope Remap(*this);
- Remap.addPrivate(DestVD, [DestElement]() -> Address {
- return DestElement;
- });
- Remap.addPrivate(
- SrcVD, [SrcElement]() -> Address { return SrcElement; });
+ Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
+ Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
(void)Remap.Privatize();
EmitIgnoredExpr(Copy);
});
@@ -653,8 +715,8 @@ void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
} else {
// Remap pseudo source variable to private copy.
CodeGenFunction::OMPPrivateScope Remap(*this);
- Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
- Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
+ Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
+ Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
(void)Remap.Privatize();
// Emit copying of the whole variable.
EmitIgnoredExpr(Copy);
@@ -673,17 +735,21 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
}
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
- CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
+ llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+ getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
+ // Force emission of the firstprivate copy if the directive does not emit
+ // outlined function, like omp for, omp simd, omp distribute etc.
+ bool MustEmitFirstprivateCopy =
+ CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
auto IRef = C->varlist_begin();
auto InitsRef = C->inits().begin();
- for (auto IInit : C->private_copies()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *IInit : C->private_copies()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
bool ThisFirstprivateIsLastprivate =
Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
- auto *CapFD = CapturesInfo.lookup(OrigVD);
- auto *FD = CapturedStmtInfo->lookup(OrigVD);
- if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
+ const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
+ if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
!FD->getType()->isReferenceType()) {
EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
++IRef;
@@ -693,54 +759,61 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
FirstprivateIsLastprivate =
FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+ const auto *VDInit =
+ cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
bool IsRegistered;
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
- Address OriginalAddr = EmitLValue(&DRE).getAddress();
+ LValue OriginalLVal = EmitLValue(&DRE);
QualType Type = VD->getType();
if (Type->isArrayType()) {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
- IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
- auto Emission = EmitAutoVarAlloca(*VD);
- auto *Init = VD->getInit();
- if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
- // Perform simple memcpy.
- EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
- Type);
- } else {
- EmitOMPAggregateAssign(
- Emission.getAllocatedAddress(), OriginalAddr, Type,
- [this, VDInit, Init](Address DestElement,
- Address SrcElement) {
- // Clean up any temporaries needed by the initialization.
- RunCleanupsScope InitScope(*this);
- // Emit initialization for single element.
- setAddrOfLocalVar(VDInit, SrcElement);
- EmitAnyExprToMem(Init, DestElement,
- Init->getType().getQualifiers(),
- /*IsInitializer*/ false);
- LocalDeclMap.erase(VDInit);
- });
- }
- EmitAutoVarCleanups(Emission);
- return Emission.getAllocatedAddress();
- });
+ IsRegistered = PrivateScope.addPrivate(
+ OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
+ AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
+ const Expr *Init = VD->getInit();
+ if (!isa<CXXConstructExpr>(Init) ||
+ isTrivialInitializer(Init)) {
+ // Perform simple memcpy.
+ LValue Dest =
+ MakeAddrLValue(Emission.getAllocatedAddress(), Type);
+ EmitAggregateAssign(Dest, OriginalLVal, Type);
+ } else {
+ EmitOMPAggregateAssign(
+ Emission.getAllocatedAddress(), OriginalLVal.getAddress(),
+ Type,
+ [this, VDInit, Init](Address DestElement,
+ Address SrcElement) {
+ // Clean up any temporaries needed by the
+ // initialization.
+ RunCleanupsScope InitScope(*this);
+ // Emit initialization for single element.
+ setAddrOfLocalVar(VDInit, SrcElement);
+ EmitAnyExprToMem(Init, DestElement,
+ Init->getType().getQualifiers(),
+ /*IsInitializer*/ false);
+ LocalDeclMap.erase(VDInit);
+ });
+ }
+ EmitAutoVarCleanups(Emission);
+ return Emission.getAllocatedAddress();
+ });
} else {
- IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
- // Emit private VarDecl with copy init.
- // Remap temp VDInit variable to the address of the original
- // variable
- // (for proper handling of captured global variables).
- setAddrOfLocalVar(VDInit, OriginalAddr);
- EmitDecl(*VD);
- LocalDeclMap.erase(VDInit);
- return GetAddrOfLocalVar(VD);
- });
+ Address OriginalAddr = OriginalLVal.getAddress();
+ IsRegistered = PrivateScope.addPrivate(
+ OrigVD, [this, VDInit, OriginalAddr, VD]() {
+ // Emit private VarDecl with copy init.
+ // Remap temp VDInit variable to the address of the original
+ // variable (for proper handling of captured global variables).
+ setAddrOfLocalVar(VDInit, OriginalAddr);
+ EmitDecl(*VD);
+ LocalDeclMap.erase(VDInit);
+ return GetAddrOfLocalVar(VD);
+ });
}
assert(IsRegistered &&
"firstprivate var already registered as private");
@@ -762,16 +835,15 @@ void CodeGenFunction::EmitOMPPrivateClause(
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
auto IRef = C->varlist_begin();
- for (auto IInit : C->private_copies()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *IInit : C->private_copies()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
- auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- bool IsRegistered =
- PrivateScope.addPrivate(OrigVD, [&]() -> Address {
- // Emit private VarDecl with copy init.
- EmitDecl(*VD);
- return GetAddrOfLocalVar(VD);
- });
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+ bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
+ // Emit private VarDecl with copy init.
+ EmitDecl(*VD);
+ return GetAddrOfLocalVar(VD);
+ });
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
@@ -794,8 +866,8 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
auto IRef = C->varlist_begin();
auto ISrcRef = C->source_exprs().begin();
auto IDestRef = C->destination_exprs().begin();
- for (auto *AssignOp : C->assignment_ops()) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *AssignOp : C->assignment_ops()) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = VD->getType();
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
// Get the address of the master variable. If we are emitting code with
@@ -826,12 +898,15 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
Builder.CreateCondBr(
Builder.CreateICmpNE(
Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
- Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
+ Builder.CreatePtrToInt(PrivateAddr.getPointer(),
+ CGM.IntPtrTy)),
CopyBegin, CopyEnd);
EmitBlock(CopyBegin);
}
- auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
- auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+ const auto *SrcVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
+ const auto *DestVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
}
++IRef;
@@ -854,8 +929,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
bool HasAtLeastOneLastprivate = false;
llvm::DenseSet<const VarDecl *> SIMDLCVs;
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
- auto *LoopDirective = cast<OMPLoopDirective>(&D);
- for (auto *C : LoopDirective->counters()) {
+ const auto *LoopDirective = cast<OMPLoopDirective>(&D);
+ for (const Expr *C : LoopDirective->counters()) {
SIMDLCVs.insert(
cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
}
@@ -863,19 +938,21 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
HasAtLeastOneLastprivate = true;
- if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
+ if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
+ !getLangOpts().OpenMPSimd)
break;
auto IRef = C->varlist_begin();
auto IDestRef = C->destination_exprs().begin();
- for (auto *IInit : C->private_copies()) {
+ for (const Expr *IInit : C->private_copies()) {
// Keep the address of the original variable for future update at the end
// of the loop.
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
// Taskloops do not require additional initialization, it is done in
// runtime support library.
if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
- auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
- PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
+ const auto *DestVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+ PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
DeclRefExpr DRE(
const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
@@ -887,8 +964,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
// not generated. Initialization of this variable will happen in codegen
// for 'firstprivate' clause.
if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+ bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
@@ -926,10 +1003,10 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
}
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
- if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
+ if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
auto IC = LoopDirective->counters().begin();
- for (auto F : LoopDirective->finals()) {
- auto *D =
+ for (const Expr *F : LoopDirective->finals()) {
+ const auto *D =
cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
if (NoFinals)
AlreadyEmittedVars.insert(D);
@@ -942,23 +1019,26 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
auto IRef = C->varlist_begin();
auto ISrcRef = C->source_exprs().begin();
auto IDestRef = C->destination_exprs().begin();
- for (auto *AssignOp : C->assignment_ops()) {
- auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *AssignOp : C->assignment_ops()) {
+ const auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = PrivateVD->getType();
- auto *CanonicalVD = PrivateVD->getCanonicalDecl();
+ const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
if (AlreadyEmittedVars.insert(CanonicalVD).second) {
// If lastprivate variable is a loop control variable for loop-based
// directive, update its value before copyin back to original
// variable.
- if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
+ if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
EmitIgnoredExpr(FinalExpr);
- auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
- auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+ const auto *SrcVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
+ const auto *DestVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
// Get the address of the original variable.
Address OriginalAddr = GetAddrOfLocalVar(DestVD);
// Get the address of the private variable.
Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
- if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
+ if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
PrivateAddr =
Address(Builder.CreateLoad(PrivateAddr),
getNaturalTypeAlignment(RefTy->getPointeeType()));
@@ -968,7 +1048,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
++ISrcRef;
++IDestRef;
}
- if (auto *PostUpdate = C->getPostUpdateExpr())
+ if (const Expr *PostUpdate = C->getPostUpdateExpr())
EmitIgnoredExpr(PostUpdate);
}
if (IsLastIterCond)
@@ -990,7 +1070,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
auto IRed = C->reduction_ops().begin();
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
- for (const auto *Ref : C->varlists()) {
+ for (const Expr *Ref : C->varlists()) {
Shareds.emplace_back(Ref);
Privates.emplace_back(*IPriv);
ReductionOps.emplace_back(*IRed);
@@ -1007,12 +1087,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
auto ILHS = LHSs.begin();
auto IRHS = RHSs.begin();
auto IPriv = Privates.begin();
- for (const auto *IRef : Shareds) {
- auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+ for (const Expr *IRef : Shareds) {
+ const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
// Emit private VarDecl with reduction init.
RedCG.emitSharedLValue(*this, Count);
RedCG.emitAggregateType(*this, Count);
- auto Emission = EmitAutoVarAlloca(*PrivateVD);
+ AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
RedCG.getSharedLValue(Count),
[&Emission](CodeGenFunction &CGF) {
@@ -1023,32 +1103,31 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
Address BaseAddr = RedCG.adjustPrivateAddress(
*this, Count, Emission.getAllocatedAddress());
bool IsRegistered = PrivateScope.addPrivate(
- RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
+ RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
- auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
QualType Type = PrivateVD->getType();
bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
// Store the address of the original variable associated with the LHS
// implicit variable.
- PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+ PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
return RedCG.getSharedLValue(Count).getAddress();
});
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
- return GetAddrOfLocalVar(PrivateVD);
- });
+ PrivateScope.addPrivate(
+ RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
} else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
isa<ArraySubscriptExpr>(IRef)) {
// Store the address of the original variable associated with the LHS
// implicit variable.
- PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+ PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
return RedCG.getSharedLValue(Count).getAddress();
});
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+ PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()),
"rhs.begin");
@@ -1063,10 +1142,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
OriginalAddr = Builder.CreateElementBitCast(
OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
}
+ PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
PrivateScope.addPrivate(
- LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
- PrivateScope.addPrivate(
- RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
+ RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
return IsArray
? Builder.CreateElementBitCast(
GetAddrOfLocalVar(PrivateVD),
@@ -1100,9 +1178,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
if (HasAtLeastOneReduction) {
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
- D.getDirectiveKind() == OMPD_simd;
- bool SimpleReduction = D.getDirectiveKind() == OMPD_simd ||
- D.getDirectiveKind() == OMPD_distribute_simd;
+ ReductionKind == OMPD_simd;
+ bool SimpleReduction = ReductionKind == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction(
@@ -1113,17 +1190,17 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
static void emitPostUpdateForReductionClause(
CodeGenFunction &CGF, const OMPExecutableDirective &D,
- const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+ const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
if (!CGF.HaveInsertPoint())
return;
llvm::BasicBlock *DoneBB = nullptr;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- if (auto *PostUpdate = C->getPostUpdateExpr()) {
+ if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
if (!DoneBB) {
- if (auto *Cond = CondGen(CGF)) {
+ if (llvm::Value *Cond = CondGen(CGF)) {
// If the first post-update expression is found, emit conditional
// block if it was requested.
- auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
CGF.EmitBlock(ThenBB);
@@ -1151,12 +1228,14 @@ static void emitCommonOMPParallelDirective(
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
const CodeGenBoundParametersTy &CodeGenBoundParameters) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
- S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+ llvm::Value *OutlinedFn =
+ CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
- auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
- /*IgnoreResultAssign*/ true);
+ llvm::Value *NumThreads =
+ CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+ /*IgnoreResultAssign=*/true);
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
CGF, NumThreads, NumThreadsClause->getLocStart());
}
@@ -1192,7 +1271,8 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
@@ -1207,34 +1287,33 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
emitEmptyBoundParameters);
- emitPostUpdateForReductionClause(
- *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
}
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
JumpDest LoopExit) {
RunCleanupsScope BodyScope(*this);
// Update counters values on current iteration.
- for (auto I : D.updates()) {
- EmitIgnoredExpr(I);
- }
+ for (const Expr *UE : D.updates())
+ EmitIgnoredExpr(UE);
// Update the linear variables.
// In distribute directives only loop counters may be marked as linear, no
// need to generate the code for them.
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
- for (auto *U : C->updates())
- EmitIgnoredExpr(U);
+ for (const Expr *UE : C->updates())
+ EmitIgnoredExpr(UE);
}
}
// On a continue in the body, jump to the end.
- auto Continue = getJumpDestInCurrentScope("omp.body.continue");
+ JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
// Emit loop body.
EmitStmt(D.getBody());
@@ -1246,24 +1325,24 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
void CodeGenFunction::EmitOMPInnerLoop(
const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
const Expr *IncExpr,
- const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
- const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
+ const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
+ const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
// Start the loop with a block that tests the condition.
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
- const SourceRange &R = S.getSourceRange();
+ const SourceRange R = S.getSourceRange();
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
- auto ExitBlock = LoopExit.getBlock();
+ llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
if (RequiresCleanup)
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
- auto LoopBody = createBasicBlock("omp.inner.for.body");
+ llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
// Emit condition.
EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
@@ -1276,7 +1355,7 @@ void CodeGenFunction::EmitOMPInnerLoop(
incrementProfileCounter(&S);
// Create a block for the increment.
- auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
+ JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
BodyGen(*this);
@@ -1298,12 +1377,13 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
// Emit inits for the linear variables.
bool HasLinears = false;
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
- for (auto *Init : C->inits()) {
+ for (const Expr *Init : C->inits()) {
HasLinears = true;
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
- if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
+ if (const auto *Ref =
+ dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
- auto *OrigVD = cast<VarDecl>(Ref->getDecl());
+ const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
VD->getInit()->getType(), VK_LValue,
@@ -1312,13 +1392,14 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
VD->getType()),
/*capturedByInit=*/false);
EmitAutoVarCleanups(Emission);
- } else
+ } else {
EmitVarDecl(*VD);
+ }
}
// Emit the linear steps for the linear clauses.
// If a step is not constant, it is pre-calculated before the loop.
- if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
- if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
+ if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
+ if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
// Emit calculation of the linear step.
EmitIgnoredExpr(CS);
@@ -1329,36 +1410,36 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
void CodeGenFunction::EmitOMPLinearClauseFinal(
const OMPLoopDirective &D,
- const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+ const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
if (!HaveInsertPoint())
return;
llvm::BasicBlock *DoneBB = nullptr;
// Emit the final values of the linear variables.
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
auto IC = C->varlist_begin();
- for (auto *F : C->finals()) {
+ for (const Expr *F : C->finals()) {
if (!DoneBB) {
- if (auto *Cond = CondGen(*this)) {
+ if (llvm::Value *Cond = CondGen(*this)) {
// If the first post-update expression is found, emit conditional
// block if it was requested.
- auto *ThenBB = createBasicBlock(".omp.linear.pu");
+ llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
DoneBB = createBasicBlock(".omp.linear.pu.done");
Builder.CreateCondBr(Cond, ThenBB, DoneBB);
EmitBlock(ThenBB);
}
}
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
(*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
Address OrigAddr = EmitLValue(&DRE).getAddress();
CodeGenFunction::OMPPrivateScope VarScope(*this);
- VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
+ VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
(void)VarScope.Privatize();
EmitIgnoredExpr(F);
++IC;
}
- if (auto *PostUpdate = C->getPostUpdateExpr())
+ if (const Expr *PostUpdate = C->getPostUpdateExpr())
EmitIgnoredExpr(PostUpdate);
}
if (DoneBB)
@@ -1371,12 +1452,12 @@ static void emitAlignedClause(CodeGenFunction &CGF,
return;
for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
unsigned ClauseAlignment = 0;
- if (auto AlignmentExpr = Clause->getAlignment()) {
- auto AlignmentCI =
+ if (const Expr *AlignmentExpr = Clause->getAlignment()) {
+ auto *AlignmentCI =
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
}
- for (auto E : Clause->varlists()) {
+ for (const Expr *E : Clause->varlists()) {
unsigned Alignment = ClauseAlignment;
if (Alignment == 0) {
// OpenMP [2.8.1, Description]
@@ -1403,28 +1484,28 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
if (!HaveInsertPoint())
return;
auto I = S.private_counters().begin();
- for (auto *E : S.counters()) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
- (void)LoopScope.addPrivate(VD, [&]() -> Address {
- // Emit var without initialization.
- if (!LocalDeclMap.count(PrivateVD)) {
- auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
- EmitAutoVarCleanups(VarEmission);
- }
- DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
- /*RefersToEnclosingVariableOrCapture=*/false,
- (*I)->getType(), VK_LValue, (*I)->getExprLoc());
- return EmitLValue(&DRE).getAddress();
+ for (const Expr *E : S.counters()) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
+ // Emit var without initialization.
+ AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
+ EmitAutoVarCleanups(VarEmission);
+ LocalDeclMap.erase(PrivateVD);
+ (void)LoopScope.addPrivate(VD, [&VarEmission]() {
+ return VarEmission.getAllocatedAddress();
});
if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
VD->hasGlobalStorage()) {
- (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
+ (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
E->getType(), VK_LValue, E->getExprLoc());
return EmitLValue(&DRE).getAddress();
});
+ } else {
+ (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
+ return VarEmission.getAllocatedAddress();
+ });
}
++I;
}
@@ -1440,7 +1521,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
(void)PreCondScope.Privatize();
// Get initial values of real counters.
- for (auto I : S.inits()) {
+ for (const Expr *I : S.inits()) {
CGF.EmitIgnoredExpr(I);
}
}
@@ -1454,20 +1535,20 @@ void CodeGenFunction::EmitOMPLinearClause(
return;
llvm::DenseSet<const VarDecl *> SIMDLCVs;
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
- auto *LoopDirective = cast<OMPLoopDirective>(&D);
- for (auto *C : LoopDirective->counters()) {
+ const auto *LoopDirective = cast<OMPLoopDirective>(&D);
+ for (const Expr *C : LoopDirective->counters()) {
SIMDLCVs.insert(
cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
}
}
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
auto CurPrivate = C->privates().begin();
- for (auto *E : C->varlists()) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- auto *PrivateVD =
+ for (const Expr *E : C->varlists()) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
- bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
+ bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
// Emit private VarDecl with copy init.
EmitVarDecl(*PrivateVD);
return GetAddrOfLocalVar(PrivateVD);
@@ -1475,8 +1556,9 @@ void CodeGenFunction::EmitOMPLinearClause(
assert(IsRegistered && "linear var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
- } else
+ } else {
EmitVarDecl(*PrivateVD);
+ }
++CurPrivate;
}
}
@@ -1490,7 +1572,7 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
/*ignoreResult=*/true);
- llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+ auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
@@ -1500,12 +1582,12 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
} else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
/*ignoreResult=*/true);
- llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+ auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
// dependences of 'safelen' iterations are possible.
- CGF.LoopStack.setParallel(false);
+ CGF.LoopStack.setParallel(/*Enable=*/false);
}
}
@@ -1513,46 +1595,45 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
bool IsMonotonic) {
// Walk clauses and process safelen/lastprivate.
LoopStack.setParallel(!IsMonotonic);
- LoopStack.setVectorizeEnable(true);
+ LoopStack.setVectorizeEnable();
emitSimdlenSafelenClause(*this, D, IsMonotonic);
}
void CodeGenFunction::EmitOMPSimdFinal(
const OMPLoopDirective &D,
- const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+ const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
if (!HaveInsertPoint())
return;
llvm::BasicBlock *DoneBB = nullptr;
auto IC = D.counters().begin();
auto IPC = D.private_counters().begin();
- for (auto F : D.finals()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
- auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
- auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
+ for (const Expr *F : D.finals()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
+ const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
+ const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
OrigVD->hasGlobalStorage() || CED) {
if (!DoneBB) {
- if (auto *Cond = CondGen(*this)) {
+ if (llvm::Value *Cond = CondGen(*this)) {
// If the first post-update expression is found, emit conditional
// block if it was requested.
- auto *ThenBB = createBasicBlock(".omp.final.then");
+ llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
DoneBB = createBasicBlock(".omp.final.done");
Builder.CreateCondBr(Cond, ThenBB, DoneBB);
EmitBlock(ThenBB);
}
}
Address OrigAddr = Address::invalid();
- if (CED)
+ if (CED) {
OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
- else {
+ } else {
DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
/*RefersToEnclosingVariableOrCapture=*/false,
(*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
OrigAddr = EmitLValue(&DRE).getAddress();
}
OMPPrivateScope VarScope(*this);
- VarScope.addPrivate(OrigVD,
- [OrigAddr]() -> Address { return OrigAddr; });
+ VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
(void)VarScope.Privatize();
EmitIgnoredExpr(F);
}
@@ -1570,6 +1651,14 @@ static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
CGF.EmitStopPoint(&S);
}
+/// Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
PrePostActionTy &Action) {
Action.Enter(CGF);
@@ -1581,6 +1670,12 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
// <Final counter/linear vars updates>;
// }
//
+ if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
+ isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
+ isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
+ (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+ (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+ }
// Emit: if (PreCond) - begin.
// If the condition constant folds and can be elided, avoid emitting the
@@ -1591,7 +1686,7 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
if (!CondConstant)
return;
} else {
- auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
+ llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
ContBlock = CGF.createBasicBlock("simd.if.end");
emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
CGF.getProfileCount(&S));
@@ -1601,14 +1696,14 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
// Emit the loop iteration variable.
const Expr *IVExpr = S.getIterationVariable();
- const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+ const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
CGF.EmitVarDecl(*IVDecl);
CGF.EmitIgnoredExpr(S.getInit());
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on
// each iteration (e.g., it is foldable into a constant).
- if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
@@ -1633,17 +1728,15 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitStopPoint(&S);
},
[](CodeGenFunction &) {});
- CGF.EmitOMPSimdFinal(
- S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
// Emit final copy of the lastprivate variables at the end of loops.
if (HasLastprivateClause)
CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
- emitPostUpdateForReductionClause(
- CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
}
- CGF.EmitOMPLinearClauseFinal(
- S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
// Emit: if (PreCond) - end.
if (ContBlock) {
CGF.EmitBranch(ContBlock);
@@ -1655,7 +1748,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitOMPSimdRegion(CGF, S, Action);
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
@@ -1665,18 +1758,18 @@ void CodeGenFunction::EmitOMPOuterLoop(
const CodeGenFunction::OMPLoopArguments &LoopArgs,
const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
- auto &RT = CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
const Expr *IVExpr = S.getIterationVariable();
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
- auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
+ JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
// Start the loop with a block that tests the condition.
- auto CondBlock = createBasicBlock("omp.dispatch.cond");
+ llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
EmitBlock(CondBlock);
- const SourceRange &R = S.getSourceRange();
+ const SourceRange R = S.getSourceRange();
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
@@ -1698,11 +1791,11 @@ void CodeGenFunction::EmitOMPOuterLoop(
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
- auto ExitBlock = LoopExit.getBlock();
+ llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
if (LoopScope.requiresCleanups())
ExitBlock = createBasicBlock("omp.dispatch.cleanup");
- auto LoopBody = createBasicBlock("omp.dispatch.body");
+ llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -1716,7 +1809,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
EmitIgnoredExpr(LoopArgs.Init);
// Create a block for the increment.
- auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
+ JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
// Generate !llvm.loop.parallel metadata for loads and stores for loops
@@ -1769,7 +1862,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
const OMPLoopArguments &LoopArgs,
const CodeGenDispatchBoundsTy &CGDispatchBounds) {
- auto &RT = CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
const bool DynamicOrOrdered =
@@ -1835,7 +1928,8 @@ void CodeGenFunction::EmitOMPForOuterLoop(
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (DynamicOrOrdered) {
- auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
+ const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
+ CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
llvm::Value *LBVal = DispatchBounds.first;
llvm::Value *UBVal = DispatchBounds.second;
CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
@@ -1878,7 +1972,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
const CodeGenLoopTy &CodeGenLoopContent) {
- auto &RT = CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
// Emit outer loop.
// Same behavior as a OMPForOuterLoop, except that schedule cannot be
@@ -1933,14 +2027,6 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
emitEmptyOrdered);
}
-/// Emit a helper variable and return corresponding lvalue.
-static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
- const DeclRefExpr *Helper) {
- auto VDecl = cast<VarDecl>(Helper->getDecl());
- CGF.EmitVarDecl(*VDecl);
- return CGF.EmitLValue(Helper);
-}
-
static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
const OMPExecutableDirective &S) {
@@ -1958,14 +2044,18 @@ emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
// the current ones.
LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
- llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
+ llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
+ PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
PrevLBVal = CGF.EmitScalarConversion(
PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
- LS.getIterationVariable()->getType(), SourceLocation());
- llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
+ LS.getIterationVariable()->getType(),
+ LS.getPrevLowerBoundVariable()->getExprLoc());
+ llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
+ PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
PrevUBVal = CGF.EmitScalarConversion(
PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
- LS.getIterationVariable()->getType(), SourceLocation());
+ LS.getIterationVariable()->getType(),
+ LS.getPrevUpperBoundVariable()->getExprLoc());
CGF.EmitStoreOfScalar(PrevLBVal, LB);
CGF.EmitStoreOfScalar(PrevUBVal, UB);
@@ -1991,10 +2081,10 @@ emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
// is not normalized as each team only executes its own assigned
// distribute chunk
QualType IteratorTy = IVExpr->getType();
- llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
- SourceLocation());
- llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
- SourceLocation());
+ llvm::Value *LBVal =
+ CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
+ llvm::Value *UBVal =
+ CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
return {LBVal, UBVal};
}
@@ -2004,13 +2094,13 @@ static void emitDistributeParallelForDistributeInnerBoundParams(
const auto &Dir = cast<OMPLoopDirective>(S);
LValue LB =
CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
- auto LBCast = CGF.Builder.CreateIntCast(
+ llvm::Value *LBCast = CGF.Builder.CreateIntCast(
CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
CapturedVars.push_back(LBCast);
LValue UB =
CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
- auto UBCast = CGF.Builder.CreateIntCast(
+ llvm::Value *UBCast = CGF.Builder.CreateIntCast(
CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
CapturedVars.push_back(UBCast);
}
@@ -2020,7 +2110,8 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
bool HasCancel = false;
if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
@@ -2051,7 +2142,7 @@ void CodeGenFunction::EmitOMPDistributeParallelForDirective(
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_parallel);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
}
@@ -2061,7 +2152,7 @@ void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_parallel);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
}
@@ -2070,7 +2161,7 @@ void CodeGenFunction::EmitOMPDistributeSimdDirective(
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
@@ -2096,28 +2187,6 @@ void CodeGenFunction::EmitOMPTargetSimdDirective(
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
-void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
- const OMPTargetTeamsDistributeParallelForDirective &S) {
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams_distribute_parallel_for,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
-}
-
-void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
- const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams_distribute_parallel_for_simd,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
-}
-
namespace {
struct ScheduleKindModifiersTy {
OpenMPScheduleClauseKind Kind;
@@ -2135,20 +2204,20 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
const CodeGenLoopBoundsTy &CodeGenLoopBounds,
const CodeGenDispatchBoundsTy &CGDispatchBounds) {
// Emit the loop iteration variable.
- auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
- auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+ const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+ const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
EmitVarDecl(*IVDecl);
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on each
// iteration (e.g., it is foldable into a constant).
- if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
EmitIgnoredExpr(S.getCalcLastIteration());
}
- auto &RT = CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
bool HasLastprivateClause;
// Check pre-condition.
@@ -2163,7 +2232,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
if (!CondConstant)
return false;
} else {
- auto *ThenBlock = createBasicBlock("omp.precond.then");
+ llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
ContBlock = createBasicBlock("omp.precond.end");
emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
getProfileCount(&S));
@@ -2171,8 +2240,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
incrementProfileCounter(&S);
}
+ RunCleanupsScope DoacrossCleanupScope(*this);
bool Ordered = false;
- if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
+ if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
if (OrderedClause->getNumForLoops())
RT.emitDoacrossInit(*this, S);
else
@@ -2213,11 +2283,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// Detect the loop schedule kind and chunk.
llvm::Value *Chunk = nullptr;
OpenMPScheduleTy ScheduleKind;
- if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
+ if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
ScheduleKind.Schedule = C->getScheduleKind();
ScheduleKind.M1 = C->getFirstScheduleModifier();
ScheduleKind.M2 = C->getSecondScheduleModifier();
- if (const auto *Ch = C->getChunkSize()) {
+ if (const Expr *Ch = C->getChunkSize()) {
Chunk = EmitScalarExpr(Ch);
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
S.getIterationVariable()->getType(),
@@ -2245,7 +2315,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
UB.getAddress(), ST.getAddress());
RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
- auto LoopExit =
+ JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
EmitIgnoredExpr(S.getEnsureUpperBound());
@@ -2282,7 +2352,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
EmitOMPSimdFinal(S,
- [&](CodeGenFunction &CGF) -> llvm::Value * {
+ [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
@@ -2293,7 +2363,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
: /*Parallel only*/ OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
- *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+ *this, S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
@@ -2303,14 +2373,15 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
S, isOpenMPSimdDirective(S.getDirectiveKind()),
Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
}
- EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+ EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
+ DoacrossCleanupScope.ForceCleanup();
// We're now done with the loop, so jump to the continuation block.
if (ContBlock) {
EmitBranch(ContBlock);
- EmitBlock(ContBlock, true);
+ EmitBlock(ContBlock, /*IsFinished=*/true);
}
}
return HasLastprivateClause;
@@ -2321,7 +2392,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
/// of the associated 'for' or 'distribute' loop.
static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
- const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const auto &LS = cast<OMPLoopDirective>(S);
LValue LB =
EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
LValue UB =
@@ -2336,7 +2407,7 @@ emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
Address LB, Address UB) {
- const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const auto &LS = cast<OMPLoopDirective>(S);
const Expr *IVExpr = LS.getIterationVariable();
const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
@@ -2354,15 +2425,14 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
emitDispatchForLoopBounds);
};
{
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
S.hasCancel());
}
// Emit an implicit barrier at the end.
- if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
+ if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
- }
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
@@ -2374,38 +2444,39 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
emitDispatchForLoopBounds);
};
{
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
// Emit an implicit barrier at the end.
- if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
+ if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
- }
}
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
const Twine &Name,
llvm::Value *Init = nullptr) {
- auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
+ LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
if (Init)
CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
return LVal;
}
void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
- auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
- auto *CS = dyn_cast<CompoundStmt>(Stmt);
+ const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
+ const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
bool HasLastprivates = false;
- auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
- PrePostActionTy &) {
- auto &C = CGF.CGM.getContext();
- auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ auto &&CodeGen = [&S, CapturedStmt, CS,
+ &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
+ ASTContext &C = CGF.getContext();
+ QualType KmpInt32Ty =
+ C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Emit helper vars inits.
LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
CGF.Builder.getInt32(0));
- auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
- : CGF.Builder.getInt32(0);
+ llvm::ConstantInt *GlobalUBVal = CS != nullptr
+ ? CGF.Builder.getInt32(CS->size() - 1)
+ : CGF.Builder.getInt32(0);
LValue UB =
createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
@@ -2423,8 +2494,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
OK_Ordinary, S.getLocStart(), FPOptions());
// Increment for loop counter.
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
- S.getLocStart());
- auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
+ S.getLocStart(), true);
+ auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
// Iterate through all sections and emit a switch construct:
// switch (IV) {
// case 0:
@@ -2436,13 +2507,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// break;
// }
// .omp.sections.exit:
- auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
- auto *SwitchStmt = CGF.Builder.CreateSwitch(
- CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
- CS == nullptr ? 1 : CS->size());
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
+ llvm::SwitchInst *SwitchStmt =
+ CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
+ ExitBB, CS == nullptr ? 1 : CS->size());
if (CS) {
unsigned CaseNumber = 0;
- for (auto *SubStmt : CS->children()) {
+ for (const Stmt *SubStmt : CS->children()) {
auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
CGF.EmitBlock(CaseBB);
SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
@@ -2451,10 +2522,10 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
++CaseNumber;
}
} else {
- auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
+ llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
CGF.EmitBlock(CaseBB);
SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
- CGF.EmitStmt(Stmt);
+ CGF.EmitStmt(CapturedStmt);
CGF.EmitBranch(ExitBB);
}
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
@@ -2483,8 +2554,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.CGM.getOpenMPRuntime().emitForStaticInit(
CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
// UB = min(UB, GlobalUB);
- auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
- auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
+ llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
+ llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
// IV = LB;
@@ -2500,11 +2571,10 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
- emitPostUpdateForReductionClause(
- CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
- return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
- });
+ emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
+ return CGF.Builder.CreateIsNotNull(
+ CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ });
// Emit final copy of the lastprivate variables if IsLastIter != 0.
if (HasLastprivates)
@@ -2535,7 +2605,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
{
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
EmitSections(S);
}
// Emit an implicit barrier at the end.
@@ -2547,9 +2617,9 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
S.hasCancel());
}
@@ -2578,10 +2648,10 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
(void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
CGF.EmitOMPPrivateClause(S, SingleScope);
(void)SingleScope.Privatize();
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
{
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
CopyprivateVars, DestExprs,
SrcExprs, AssignmentOps);
@@ -2598,21 +2668,21 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
- Expr *Hint = nullptr;
- if (auto *HintClause = S.getSingleClause<OMPHintClause>())
+ const Expr *Hint = nullptr;
+ if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
Hint = HintClause->getHint();
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitCriticalRegion(*this,
S.getDirectiveName().getAsString(),
CodeGen, S.getLocStart(), Hint);
@@ -2622,7 +2692,8 @@ void CodeGenFunction::EmitOMPParallelForDirective(
const OMPParallelForDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
@@ -2635,7 +2706,8 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
const OMPParallelForSimdDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};
@@ -2647,27 +2719,28 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'sections' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CGF.EmitSections(S);
};
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
emitEmptyBoundParameters);
}
-void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
- const RegionCodeGenTy &BodyGen,
- const TaskGenTy &TaskGen,
- OMPTaskDataTy &Data) {
+void CodeGenFunction::EmitOMPTaskBasedDirective(
+ const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
+ const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
+ OMPTaskDataTy &Data) {
// Emit outlined function for task construct.
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto *I = CS->getCapturedDecl()->param_begin();
- auto *PartId = std::next(I);
- auto *TaskT = std::next(I, 4);
+ const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
+ auto I = CS->getCapturedDecl()->param_begin();
+ auto PartId = std::next(I);
+ auto TaskT = std::next(I, 4);
// Check if the task is final
if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
- auto *Cond = Clause->getCondition();
+ const Expr *Cond = Clause->getCondition();
bool CondConstant;
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
Data.Final.setInt(CondConstant);
@@ -2679,7 +2752,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
}
// Check if the task has 'priority' clause.
if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
- auto *Prio = Clause->getPriority();
+ const Expr *Prio = Clause->getPriority();
Data.Priority.setInt(/*IntVal=*/true);
Data.Priority.setPointer(EmitScalarConversion(
EmitScalarExpr(Prio), Prio->getType(),
@@ -2692,8 +2765,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
// Get list of private variables.
for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
auto IRef = C->varlist_begin();
- for (auto *IInit : C->private_copies()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *IInit : C->private_copies()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
Data.PrivateVars.push_back(*IRef);
Data.PrivateCopies.push_back(IInit);
@@ -2706,8 +2779,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
auto IRef = C->varlist_begin();
auto IElemInitRef = C->inits().begin();
- for (auto *IInit : C->private_copies()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *IInit : C->private_copies()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
Data.FirstprivateVars.push_back(*IRef);
Data.FirstprivateCopies.push_back(IInit);
@@ -2722,8 +2795,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
auto IRef = C->varlist_begin();
auto ID = C->destination_exprs().begin();
- for (auto *IInit : C->private_copies()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+ for (const Expr *IInit : C->private_copies()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
Data.LastprivateVars.push_back(*IRef);
Data.LastprivateCopies.push_back(IInit);
@@ -2742,7 +2815,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
auto IRed = C->reduction_ops().begin();
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
- for (const auto *Ref : C->varlists()) {
+ for (const Expr *Ref : C->varlists()) {
Data.ReductionVars.emplace_back(Ref);
Data.ReductionCopies.emplace_back(*IPriv);
Data.ReductionOps.emplace_back(*IRed);
@@ -2758,50 +2831,51 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
*this, S.getLocStart(), LHSs, RHSs, Data);
// Build list of dependences.
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
- for (auto *IRef : C->varlists())
- Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
- auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
- CodeGenFunction &CGF, PrePostActionTy &Action) {
+ for (const Expr *IRef : C->varlists())
+ Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
+ auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
+ CapturedRegion](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
!Data.LastprivateVars.empty()) {
enum { PrivatesParam = 2, CopyFnParam = 3 };
- auto *CopyFn = CGF.Builder.CreateLoad(
- CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
- auto *PrivatesPtr = CGF.Builder.CreateLoad(
- CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
+ llvm::Value *CopyFn = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
+ llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
+ CS->getCapturedDecl()->getParam(PrivatesParam)));
// Map privates.
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
CallArgs.push_back(PrivatesPtr);
- for (auto *E : Data.PrivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ for (const Expr *E : Data.PrivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr = CGF.CreateMemTemp(
CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
- PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+ PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
- for (auto *E : Data.FirstprivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ for (const Expr *E : Data.FirstprivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
".firstpriv.ptr.addr");
- PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+ PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
- for (auto *E : Data.LastprivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ for (const Expr *E : Data.LastprivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
".lastpriv.ptr.addr");
- PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+ PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
CopyFn, CallArgs);
- for (auto &&Pair : LastprivateDstsOrigs) {
- auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
+ for (const auto &Pair : LastprivateDstsOrigs) {
+ const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
DeclRefExpr DRE(
const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
@@ -2811,14 +2885,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
return CGF.EmitLValue(&DRE).getAddress();
});
}
- for (auto &&Pair : PrivatePtrs) {
+ for (const auto &Pair : PrivatePtrs) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
}
}
if (Data.Reductions) {
- OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
+ OMPLexicalScope LexScope(CGF, S, CapturedRegion);
ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
Data.ReductionOps);
llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
@@ -2826,6 +2900,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
RedCG.emitSharedLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
+ // FIXME: This must removed once the runtime library is fixed.
+ // Emit required threadprivate variables for
+ // initilizer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ RedCG, Cnt);
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement =
@@ -2833,16 +2912,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
CGF.getContext().getPointerType(
Data.ReductionCopies[Cnt]->getType()),
- SourceLocation()),
+ Data.ReductionCopies[Cnt]->getExprLoc()),
Replacement.getAlignment());
Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
Scope.addPrivate(RedCG.getBaseDecl(Cnt),
[Replacement]() { return Replacement; });
- // FIXME: This must removed once the runtime library is fixed.
- // Emit required threadprivate variables for
- // initilizer/combiner/finalizer.
- CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
- RedCG, Cnt);
}
}
// Privatize all private variables except for in_reduction items.
@@ -2855,7 +2929,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
auto ITD = C->taskgroup_descriptors().begin();
- for (const auto *Ref : C->varlists()) {
+ for (const Expr *Ref : C->varlists()) {
InRedVars.emplace_back(Ref);
InRedPrivs.emplace_back(*IPriv);
InRedOps.emplace_back(*IRed);
@@ -2875,24 +2949,25 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
RedCG.emitAggregateType(CGF, Cnt);
// The taskgroup descriptor variable is always implicit firstprivate and
// privatized already during procoessing of the firstprivates.
- llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
- CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
+ // FIXME: This must removed once the runtime library is fixed.
+ // Emit required threadprivate variables for
+ // initilizer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ RedCG, Cnt);
+ llvm::Value *ReductionsPtr =
+ CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
+ TaskgroupDescriptors[Cnt]->getExprLoc());
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement = Address(
CGF.EmitScalarConversion(
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
- SourceLocation()),
+ InRedPrivs[Cnt]->getExprLoc()),
Replacement.getAlignment());
Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
[Replacement]() { return Replacement; });
- // FIXME: This must removed once the runtime library is fixed.
- // Emit required threadprivate variables for
- // initilizer/combiner/finalizer.
- CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
- RedCG, Cnt);
}
}
(void)InRedScope.Privatize();
@@ -2900,7 +2975,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
Action.Enter(CGF);
BodyGen(CGF);
};
- auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+ llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
Data.NumberOfParts);
OMPLexicalScope Scope(*this, S);
@@ -2909,27 +2984,24 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
- QualType Ty, CapturedDecl *CD) {
- auto *OrigVD = ImplicitParamDecl::Create(
- C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other);
- auto *OrigRef =
- DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
- /*RefersToEnclosingVariableOrCapture=*/false,
- SourceLocation(), Ty, VK_LValue);
- auto *PrivateVD = ImplicitParamDecl::Create(
- C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other);
+ QualType Ty, CapturedDecl *CD,
+ SourceLocation Loc) {
+ auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
+ ImplicitParamDecl::Other);
+ auto *OrigRef = DeclRefExpr::Create(
+ C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
+ /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
+ auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
+ ImplicitParamDecl::Other);
auto *PrivateRef = DeclRefExpr::Create(
C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
- /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty,
- VK_LValue);
+ /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
QualType ElemType = C.getBaseElementType(Ty);
- auto *InitVD =
- ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr,
- ElemType, ImplicitParamDecl::Other);
- auto *InitRef =
- DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
- /*RefersToEnclosingVariableOrCapture=*/false,
- SourceLocation(), ElemType, VK_LValue);
+ auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
+ ImplicitParamDecl::Other);
+ auto *InitRef = DeclRefExpr::Create(
+ C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
+ /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
PrivateVD->setInitStyle(VarDecl::CInit);
PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
InitRef, /*BasePath=*/nullptr,
@@ -2944,12 +3016,12 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
OMPTargetDataInfo &InputInfo) {
// Emit outlined function for task construct.
- auto CS = S.getCapturedStmt(OMPD_task);
- auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
- auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
- auto *I = CS->getCapturedDecl()->param_begin();
- auto *PartId = std::next(I);
- auto *TaskT = std::next(I, 4);
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
+ Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ auto I = CS->getCapturedDecl()->param_begin();
+ auto PartId = std::next(I);
+ auto TaskT = std::next(I, 4);
OMPTaskDataTy Data;
// The task is not final.
Data.Final.setInt(/*IntVal=*/false);
@@ -2976,14 +3048,15 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
QualType BaseAndPointersType = getContext().getConstantArrayType(
getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- BPVD = createImplicitFirstprivateForType(getContext(), Data,
- BaseAndPointersType, CD);
- PVD = createImplicitFirstprivateForType(getContext(), Data,
- BaseAndPointersType, CD);
+ BPVD = createImplicitFirstprivateForType(
+ getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
+ PVD = createImplicitFirstprivateForType(
+ getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
QualType SizesType = getContext().getConstantArrayType(
getContext().getSizeType(), ArrSize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD);
+ SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
+ S.getLocStart());
TargetScope.addPrivate(
BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
TargetScope.addPrivate(PVD,
@@ -2994,33 +3067,33 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
(void)TargetScope.Privatize();
// Build list of dependences.
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
- for (auto *IRef : C->varlists())
- Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+ for (const Expr *IRef : C->varlists())
+ Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
&InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
if (!Data.FirstprivateVars.empty()) {
enum { PrivatesParam = 2, CopyFnParam = 3 };
- auto *CopyFn = CGF.Builder.CreateLoad(
- CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
- auto *PrivatesPtr = CGF.Builder.CreateLoad(
- CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
+ llvm::Value *CopyFn = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
+ llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
+ CS->getCapturedDecl()->getParam(PrivatesParam)));
// Map privates.
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
CallArgs.push_back(PrivatesPtr);
- for (auto *E : Data.FirstprivateVars) {
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ for (const Expr *E : Data.FirstprivateVars) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
".firstpriv.ptr.addr");
- PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+ PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
CopyFn, CallArgs);
- for (auto &&Pair : PrivatePtrs) {
+ for (const auto &Pair : PrivatePtrs) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
@@ -3028,19 +3101,20 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
// Privatize all private variables except for in_reduction items.
(void)Scope.Privatize();
- InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
- CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
- InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
- CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
- InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
- CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
+ if (InputInfo.NumberOfTargetItems > 0) {
+ InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
+ CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
+ InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
+ CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
+ InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
+ CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
+ }
Action.Enter(CGF);
- OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true,
- /*EmitPreInitStmt=*/false);
+ OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
BodyGen(CGF);
};
- auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+ llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
Data.NumberOfParts);
llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
@@ -3054,9 +3128,9 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
// Emit outlined function for task construct.
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
- auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
+ Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
if (C->getNameModifier() == OMPD_unknown ||
@@ -3079,7 +3153,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
SharedsTy, CapturedStruct, IfCond,
Data);
};
- EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
}
void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -3108,7 +3182,7 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
auto IRed = C->reduction_ops().begin();
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
- for (const auto *Ref : C->varlists()) {
+ for (const Expr *Ref : C->varlists()) {
Data.ReductionVars.emplace_back(Ref);
Data.ReductionCopies.emplace_back(*IPriv);
Data.ReductionOps.emplace_back(*IRed);
@@ -3128,40 +3202,42 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
/*Volatile=*/false, E->getType());
}
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
}
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
- CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
- if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
- return llvm::makeArrayRef(FlushClause->varlist_begin(),
- FlushClause->varlist_end());
- }
- return llvm::None;
- }(), S.getLocStart());
+ CGM.getOpenMPRuntime().emitFlush(
+ *this,
+ [&S]() -> ArrayRef<const Expr *> {
+ if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
+ return llvm::makeArrayRef(FlushClause->varlist_begin(),
+ FlushClause->varlist_end());
+ return llvm::None;
+ }(),
+ S.getLocStart());
}
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
const CodeGenLoopTy &CodeGenLoop,
Expr *IncExpr) {
// Emit the loop iteration variable.
- auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
- auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+ const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+ const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
EmitVarDecl(*IVDecl);
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on each
// iteration (e.g., it is foldable into a constant).
- if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
EmitIgnoredExpr(S.getCalcLastIteration());
}
- auto &RT = CGM.getOpenMPRuntime();
+ CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
bool HasLastprivateClause = false;
// Check pre-condition.
@@ -3176,7 +3252,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
if (!CondConstant)
return;
} else {
- auto *ThenBlock = createBasicBlock("omp.precond.then");
+ llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
ContBlock = createBasicBlock("omp.precond.end");
emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
getProfileCount(&S));
@@ -3225,9 +3301,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// Detect the distribute schedule kind and chunk.
llvm::Value *Chunk = nullptr;
OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
- if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
+ if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
ScheduleKind = C->getDistScheduleKind();
- if (const auto *Ch = C->getChunkSize()) {
+ if (const Expr *Ch = C->getChunkSize()) {
Chunk = EmitScalarExpr(Ch);
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
S.getIterationVariable()->getType(),
@@ -3254,7 +3330,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
LB.getAddress(), UB.getAddress(), ST.getAddress());
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
StaticInit);
- auto LoopExit =
+ JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
@@ -3265,9 +3341,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
? S.getCombinedInit()
: S.getInit());
- Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedCond()
- : S.getCond();
+ const Expr *Cond =
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
// for distribute alone, codegen
// while (idx <= UB) { BODY; ++idx; }
@@ -3291,31 +3368,35 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
CodeGenLoop);
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
- EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+ EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
}
- OpenMPDirectiveKind ReductionKind = OMPD_unknown;
- if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
- isOpenMPSimdDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_parallel_for_simd;
- } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_parallel_for;
- } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_simd;
- } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
- S.hasClausesOfKind<OMPReductionClause>()) {
- llvm_unreachable(
- "No reduction clauses is allowed in distribute directive.");
+ if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
+ !isOpenMPParallelDirective(S.getDirectiveKind()) &&
+ !isOpenMPTeamsDirective(S.getDirectiveKind())) {
+ OpenMPDirectiveKind ReductionKind = OMPD_unknown;
+ if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
+ isOpenMPSimdDirective(S.getDirectiveKind())) {
+ ReductionKind = OMPD_parallel_for_simd;
+ } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
+ ReductionKind = OMPD_parallel_for;
+ } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+ ReductionKind = OMPD_simd;
+ } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
+ S.hasClausesOfKind<OMPReductionClause>()) {
+ llvm_unreachable(
+ "No reduction clauses is allowed in distribute directive.");
+ }
+ EmitOMPReductionClauseFinal(S, ReductionKind);
+ // Emit post-update of the reduction variables if IsLastIter != 0.
+ emitPostUpdateForReductionClause(
+ *this, S, [IL, &S](CodeGenFunction &CGF) {
+ return CGF.Builder.CreateIsNotNull(
+ CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ });
}
- EmitOMPReductionClauseFinal(S, ReductionKind);
- // Emit post-update of the reduction variables if IsLastIter != 0.
- emitPostUpdateForReductionClause(
- *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
- return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
- });
// Emit final copy of the lastprivate variables if IsLastIter != 0.
if (HasLastprivateClause) {
EmitOMPLastprivateClauseFinal(
@@ -3335,10 +3416,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
void CodeGenFunction::EmitOMPDistributeDirective(
const OMPDistributeDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
}
@@ -3347,34 +3427,35 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
CGF.CapturedStmtInfo = &CapStmtInfo;
- auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
- Fn->addFnAttr(llvm::Attribute::NoInline);
+ llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
+ Fn->setDoesNotRecurse();
return Fn;
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
- if (!S.getAssociatedStmt()) {
+ if (S.hasClausesOfKind<OMPDependClause>()) {
+ assert(!S.getAssociatedStmt() &&
+ "No associated statement must be in ordered depend construct.");
for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
return;
}
- auto *C = S.getSingleClause<OMPSIMDClause>();
+ const auto *C = S.getSingleClause<OMPSIMDClause>();
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
PrePostActionTy &Action) {
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
if (C) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
- auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
+ llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
OutlinedFn, CapturedVars);
} else {
Action.Enter(CGF);
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(CS->getCapturedStmt());
}
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
}
@@ -3384,11 +3465,10 @@ static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
assert(CGF.hasScalarEvaluationKind(DestType) &&
"DestType must have scalar evaluation kind.");
assert(!Val.isAggregate() && "Must be a scalar or complex.");
- return Val.isScalar()
- ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
- Loc)
- : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
- DestType, Loc);
+ return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
+ DestType, Loc)
+ : CGF.EmitComplexToScalarConversion(
+ Val.getComplexVal(), SrcType, DestType, Loc);
}
static CodeGenFunction::ComplexPairTy
@@ -3399,15 +3479,17 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
CodeGenFunction::ComplexPairTy ComplexVal;
if (Val.isScalar()) {
// Convert the input element to the element type of the complex.
- auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
- auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
- DestElementType, Loc);
+ QualType DestElementType =
+ DestType->castAs<ComplexType>()->getElementType();
+ llvm::Value *ScalarVal = CGF.EmitScalarConversion(
+ Val.getScalarVal(), SrcType, DestElementType, Loc);
ComplexVal = CodeGenFunction::ComplexPairTy(
ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
} else {
assert(Val.isComplex() && "Must be a scalar or complex.");
- auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
- auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+ QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
+ QualType DestElementType =
+ DestType->castAs<ComplexType>()->getElementType();
ComplexVal.first = CGF.EmitScalarConversion(
Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
ComplexVal.second = CGF.EmitScalarConversion(
@@ -3446,7 +3528,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
}
}
-static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
const Expr *X, const Expr *V,
SourceLocation Loc) {
// v = x;
@@ -3470,7 +3552,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
}
-static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
const Expr *X, const Expr *E,
SourceLocation Loc) {
// x = expr;
@@ -3489,7 +3571,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
BinaryOperatorKind BO,
llvm::AtomicOrdering AO,
bool IsXLHSInRHSPart) {
- auto &Context = CGF.CGM.getContext();
+ ASTContext &Context = CGF.getContext();
// Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
// expression is simple and atomic is allowed for the given type for the
// target platform.
@@ -3567,20 +3649,21 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
case BO_Comma:
llvm_unreachable("Unsupported atomic update operation");
}
- auto *UpdateVal = Update.getScalarVal();
+ llvm::Value *UpdateVal = Update.getScalarVal();
if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
UpdateVal = CGF.Builder.CreateIntCast(
IC, X.getAddress().getElementType(),
X.getType()->hasSignedIntegerRepresentation());
}
- auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
+ llvm::Value *Res =
+ CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
return std::make_pair(true, RValue::get(Res));
}
std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
llvm::AtomicOrdering AO, SourceLocation Loc,
- const llvm::function_ref<RValue(RValue)> &CommonGen) {
+ const llvm::function_ref<RValue(RValue)> CommonGen) {
// Update expressions are allowed to have the following forms:
// x binop= expr; -> xrval + expr;
// x++, ++x -> xrval + 1;
@@ -3601,13 +3684,13 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
return Res;
}
-static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
const Expr *X, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
SourceLocation Loc) {
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
"Update expr in 'atomic update' must be a binary operator.");
- auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
+ const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
// Update expressions are allowed to have the following forms:
// x binop= expr; -> xrval + expr;
// x++, ++x -> xrval + 1;
@@ -3617,18 +3700,18 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic;
- auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
- auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
- auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
- auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
- auto Gen =
- [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
- CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
- CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
- return CGF.EmitAnyExpr(UE);
- };
+ llvm::AtomicOrdering AO = IsSeqCst
+ ? llvm::AtomicOrdering::SequentiallyConsistent
+ : llvm::AtomicOrdering::Monotonic;
+ const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
+ const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
+ const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
+ const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
+ auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
+ CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
+ CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
+ return CGF.EmitAnyExpr(UE);
+ };
(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
// OpenMP, 2.12.6, atomic Construct
@@ -3656,7 +3739,7 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value,
llvm_unreachable("Must be a scalar or complex.");
}
-static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
bool IsPostfixUpdate, const Expr *V,
const Expr *X, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
@@ -3667,27 +3750,28 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
LValue VLValue = CGF.EmitLValue(V);
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic;
+ llvm::AtomicOrdering AO = IsSeqCst
+ ? llvm::AtomicOrdering::SequentiallyConsistent
+ : llvm::AtomicOrdering::Monotonic;
QualType NewVValType;
if (UE) {
// 'x' is updated with some additional value.
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
"Update expr in 'atomic capture' must be a binary operator.");
- auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
+ const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
// Update expressions are allowed to have the following forms:
// x binop= expr; -> xrval + expr;
// x++, ++x -> xrval + 1;
// x--, --x -> xrval - 1;
// x = x binop expr; -> xrval binop expr
// x = expr Op x; - > expr binop xrval;
- auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
- auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
- auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
+ const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
+ const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
+ const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
NewVValType = XRValExpr->getType();
- auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
+ const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
- IsPostfixUpdate](RValue XRValue) -> RValue {
+ IsPostfixUpdate](RValue XRValue) {
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
RValue Res = CGF.EmitAnyExpr(UE);
@@ -3714,7 +3798,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = X->getType().getNonReferenceType();
ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
X->getType().getNonReferenceType(), Loc);
- auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
+ auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
NewVVal = XRValue;
return ExprRValue;
};
@@ -3737,24 +3821,24 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
}
-static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
+static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
bool IsSeqCst, bool IsPostfixUpdate,
const Expr *X, const Expr *V, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
SourceLocation Loc) {
switch (Kind) {
case OMPC_read:
- EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+ emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
break;
case OMPC_write:
- EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
+ emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
break;
case OMPC_unknown:
case OMPC_update:
- EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
+ emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
break;
case OMPC_capture:
- EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
+ emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
IsXLHSInRHSPart, Loc);
break;
case OMPC_if:
@@ -3810,7 +3894,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
OpenMPClauseKind Kind = OMPC_unknown;
- for (auto *C : S.clauses()) {
+ for (const OMPClause *C : S.clauses()) {
// Find first clause (skip seq_cst clause, if it is first).
if (C->getClauseKind() != OMPC_seq_cst) {
Kind = C->getClauseKind();
@@ -3818,28 +3902,25 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
}
}
- const auto *CS =
- S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
+ const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
+ if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
enterFullExpression(EWC);
- }
// Processing for statements under 'atomic capture'.
if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
- for (const auto *C : Compound->body()) {
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
+ for (const Stmt *C : Compound->body()) {
+ if (const auto *EWC = dyn_cast<ExprWithCleanups>(C))
enterFullExpression(EWC);
- }
}
}
auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
PrePostActionTy &) {
CGF.EmitStopPoint(CS);
- EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
+ emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
S.getV(), S.getExpr(), S.getUpdateExpr(),
S.isXLHSInRHSPart(), S.getLocStart());
};
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
}
@@ -3848,7 +3929,16 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
const RegionCodeGenTy &CodeGen) {
assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
CodeGenModule &CGM = CGF.CGM;
- const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
+
+ // On device emit this construct as inlined code.
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ OMPLexicalScope Scope(CGF, S, OMPD_target);
+ CGM.getOpenMPRuntime().emitInlinedDirective(
+ CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
+ });
+ return;
+ }
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
@@ -3865,9 +3955,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
// Check if we have any device clause associated with the directive.
const Expr *Device = nullptr;
- if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
+ if (auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice();
- }
// Check if we have an if clause whose conditional always evaluates to false
// or if we do not have any targets specified. If so the target region is not
@@ -3885,9 +3974,9 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
StringRef ParentName;
// In case we have Ctors/Dtors we use the complete type variant to produce
// the mangling of the device outlined kernel.
- if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
+ if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
- else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
+ else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
else
ParentName =
@@ -3896,22 +3985,19 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
// Emit target region as a standalone region.
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry, CodeGen);
- OMPLexicalScope Scope(CGF, S);
- llvm::SmallVector<llvm::Value *, 16> CapturedVars;
- CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
- CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
- CapturedVars);
+ OMPLexicalScope Scope(CGF, S, OMPD_task);
+ CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
}
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
(void)PrivateScope.Privatize();
- Action.Enter(CGF);
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
}
void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
@@ -3940,14 +4026,15 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
- S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+ llvm::Value *OutlinedFn =
+ CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
- const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
- const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
+ const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
+ const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
- Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
- Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
+ const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
+ const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
S.getLocStart());
@@ -3962,18 +4049,19 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
// Emit teams region as a standalone region.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
- emitPostUpdateForReductionClause(
- *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
}
static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
@@ -3982,18 +4070,18 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
Action.Enter(CGF);
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
- Action.Enter(CGF);
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
- emitPostUpdateForReductionClause(
- CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
}
void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
@@ -4028,7 +4116,8 @@ emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4073,7 +4162,8 @@ static void emitTargetTeamsDistributeSimdRegion(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4117,7 +4207,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4138,7 +4229,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4160,7 +4252,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4182,7 +4275,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4195,6 +4289,109 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
[](CodeGenFunction &) { return nullptr; });
}
+static void emitTargetTeamsDistributeParallelForRegion(
+ CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+ S.getDistInc());
+ };
+
+ // Emit teams region as a standalone region.
+ auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+ CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+ };
+
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
+ CodeGenTeams);
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDistributeParallelForDirective &S) {
+ // Emit SPMD target teams distribute parallel for region as a standalone
+ // region.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
+ const OMPTargetTeamsDistributeParallelForDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
+static void emitTargetTeamsDistributeParallelForSimdRegion(
+ CodeGenFunction &CGF,
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+ S.getDistInc());
+ };
+
+ // Emit teams region as a standalone region.
+ auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+ CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+ };
+
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
+ CodeGenTeams);
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+ // Emit SPMD target teams distribute parallel for simd region as a standalone
+ // region.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
void CodeGenFunction::EmitOMPCancellationPointDirective(
const OMPCancellationPointDirective &S) {
CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
@@ -4234,19 +4431,19 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
const auto &C = cast<OMPUseDevicePtrClause>(NC);
auto OrigVarIt = C.varlist_begin();
auto InitIt = C.inits().begin();
- for (auto PvtVarIt : C.private_copies()) {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
- auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
- auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
+ for (const Expr *PvtVarIt : C.private_copies()) {
+ const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
+ const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
+ const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
// In order to identify the right initializer we need to match the
// declaration used by the mapping logic. In some cases we may get
// OMPCapturedExprDecl that refers to the original declaration.
const ValueDecl *MatchingVD = OrigVD;
- if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
+ if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
// OMPCapturedExprDecl are used to privative fields of the current
// structure.
- auto *ME = cast<MemberExpr>(OED->getInit());
+ const auto *ME = cast<MemberExpr>(OED->getInit());
assert(isa<CXXThisExpr>(ME->getBase()) &&
"Base should be the current struct!");
MatchingVD = ME->getMemberDecl();
@@ -4258,7 +4455,9 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
if (InitAddrIt == CaptureDeviceAddrMap.end())
continue;
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+ bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
+ InitAddrIt, InitVD,
+ PvtVD]() {
// Initialize the temporary initialization variable with the address we
// get from the runtime library. We have to cast the source address
// because it is always a void *. References are materialized in the
@@ -4275,7 +4474,7 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
EmitDecl(*PvtVD);
// The initialization variables reached its purpose in the emission
- // ofthe previous declaration, so we don't need it anymore.
+ // of the previous declaration, so we don't need it anymore.
LocalDeclMap.erase(InitVD);
// Return the address of the private variable.
@@ -4312,13 +4511,12 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
- CodeGenFunction &CGF, PrePostActionTy &Action) {
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
- // Codegen that selects wheather to generate the privatization code or not.
+ // Codegen that selects whether to generate the privatization code or not.
auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
&InnermostCodeGen](CodeGenFunction &CGF,
PrePostActionTy &Action) {
@@ -4337,8 +4535,9 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
Info.CaptureDeviceAddrMap);
(void)PrivateScope.Privatize();
RCG(CGF);
- } else
+ } else {
RCG(CGF);
+ }
};
// Forward the provided action to the privatization codegen.
@@ -4364,12 +4563,12 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
// Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
- if (auto *C = S.getSingleClause<OMPIfClause>())
+ if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = C->getCondition();
// Check if we have any device clause associated with the directive.
const Expr *Device = nullptr;
- if (auto *C = S.getSingleClause<OMPDeviceClause>())
+ if (const auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice();
// Set the action to signal privatization of device pointers.
@@ -4389,15 +4588,15 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective(
// Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
- if (auto *C = S.getSingleClause<OMPIfClause>())
+ if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = C->getCondition();
// Check if we have any device clause associated with the directive.
const Expr *Device = nullptr;
- if (auto *C = S.getSingleClause<OMPDeviceClause>())
+ if (const auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice();
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_task);
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
@@ -4410,15 +4609,15 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(
// Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
- if (auto *C = S.getSingleClause<OMPIfClause>())
+ if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = C->getCondition();
// Check if we have any device clause associated with the directive.
const Expr *Device = nullptr;
- if (auto *C = S.getSingleClause<OMPDeviceClause>())
+ if (const auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice();
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_task);
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
@@ -4426,9 +4625,10 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
const OMPTargetParallelDirective &S,
PrePostActionTy &Action) {
// Get the captured statement associated with the 'parallel' region.
- auto *CS = S.getCapturedStmt(OMPD_parallel);
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
Action.Enter(CGF);
- auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
@@ -4440,8 +4640,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
};
emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
emitEmptyBoundParameters);
- emitPostUpdateForReductionClause(
- CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
}
void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
@@ -4472,7 +4672,8 @@ static void emitTargetParallelForRegion(CodeGenFunction &CGF,
Action.Enter(CGF);
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPCancelStackRAII CancelRegion(
CGF, OMPD_target_parallel_for, S.hasCancel());
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
@@ -4512,7 +4713,8 @@ emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
Action.Enter(CGF);
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};
@@ -4547,17 +4749,17 @@ void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
const ImplicitParamDecl *PVD,
CodeGenFunction::OMPPrivateScope &Privates) {
- auto *VDecl = cast<VarDecl>(Helper->getDecl());
- Privates.addPrivate(
- VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
+ const auto *VDecl = cast<VarDecl>(Helper->getDecl());
+ Privates.addPrivate(VDecl,
+ [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
}
void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
// Emit outlined function for task construct.
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
- auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
+ Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
if (C->getNameModifier() == OMPD_unknown ||
@@ -4600,7 +4802,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
if (!CondConstant)
return;
} else {
- auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
+ llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
ContBlock = CGF.createBasicBlock("taskloop.if.end");
emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
CGF.getProfileCount(&S));
@@ -4631,14 +4833,14 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
(void)LoopScope.Privatize();
// Emit the loop iteration variable.
const Expr *IVExpr = S.getIterationVariable();
- const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+ const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
CGF.EmitVarDecl(*IVDecl);
CGF.EmitIgnoredExpr(S.getInit());
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on
// each iteration (e.g., it is foldable into a constant).
- if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
@@ -4668,7 +4870,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
const OMPTaskDataTy &Data) {
- auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
+ &Data](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
OutlinedFn, SharedsTy,
@@ -4677,15 +4880,16 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
CodeGen);
};
- if (Data.Nogroup)
- EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
- else {
+ if (Data.Nogroup) {
+ EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
+ } else {
CGM.getOpenMPRuntime().emitTaskgroupRegion(
*this,
[&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
PrePostActionTy &Action) {
Action.Enter(CGF);
- CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
+ Data);
},
S.getLocStart());
}
@@ -4710,14 +4914,44 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
// Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
- if (auto *C = S.getSingleClause<OMPIfClause>())
+ if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = C->getCondition();
// Check if we have any device clause associated with the directive.
const Expr *Device = nullptr;
- if (auto *C = S.getSingleClause<OMPDeviceClause>())
+ if (const auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice();
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+ OMPLexicalScope Scope(*this, S, OMPD_task);
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
+
+void CodeGenFunction::EmitSimpleOMPExecutableDirective(
+ const OMPExecutableDirective &D) {
+ if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
+ return;
+ auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+ emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
+ } else {
+ if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
+ for (const Expr *E : LD->counters()) {
+ if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
+ cast<DeclRefExpr>(E)->getDecl())) {
+ // Emit only those that were not explicitly referenced in clauses.
+ if (!CGF.LocalDeclMap.count(VD))
+ CGF.EmitVarDecl(*VD);
+ }
+ }
+ }
+ CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
+ }
+ };
+ OMPSimdLexicalScope Scope(*this, D);
+ CGM.getOpenMPRuntime().emitInlinedDirective(
+ *this,
+ isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
+ : D.getDirectiveKind(),
+ CodeGen);
+}
+
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index 78928d04220d..41c8c943f54d 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -100,7 +100,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName()));
// Set the right visibility.
- CGM.setGlobalVisibility(VTT, RD, ForDefinition);
+ CGM.setGVProperties(VTT, RD);
}
llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 2d9bf3bce926..5a2ec65f7763 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -31,29 +31,12 @@ using namespace CodeGen;
CodeGenVTables::CodeGenVTables(CodeGenModule &CGM)
: CGM(CGM), VTContext(CGM.getContext().getVTableContext()) {}
-llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD,
- const ThunkInfo &Thunk) {
- const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-
- // Compute the mangled name.
- SmallString<256> Name;
- llvm::raw_svector_ostream Out(Name);
- if (const CXXDestructorDecl* DD = dyn_cast<CXXDestructorDecl>(MD))
- getCXXABI().getMangleContext().mangleCXXDtorThunk(DD, GD.getDtorType(),
- Thunk.This, Out);
- else
- getCXXABI().getMangleContext().mangleThunk(MD, Thunk, Out);
-
- llvm::Type *Ty = getTypes().GetFunctionTypeForVTable(GD);
- return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true,
+llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
+ GlobalDecl GD) {
+ return GetOrCreateLLVMFunction(Name, FnTy, GD, /*ForVTable=*/true,
/*DontDefer=*/true, /*IsThunk=*/true);
}
-static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD,
- const ThunkInfo &Thunk, llvm::Function *Fn) {
- CGM.setGlobalVisibility(Fn, MD, ForDefinition);
-}
-
static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
llvm::Function *ThunkFn, bool ForVTable,
GlobalDecl GD) {
@@ -62,8 +45,12 @@ static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
!Thunk.Return.isEmpty());
// Set the right visibility.
- const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- setThunkVisibility(CGM, MD, Thunk, ThunkFn);
+ CGM.setGVProperties(ThunkFn, GD);
+
+ if (!CGM.getCXXABI().exportThunk()) {
+ ThunkFn->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ ThunkFn->setDSOLocal(true);
+ }
if (CGM.supportsCOMDAT() && ThunkFn->isWeakForLinker())
ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
@@ -236,7 +223,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
}
void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
- const CGFunctionInfo &FnInfo) {
+ const CGFunctionInfo &FnInfo,
+ bool IsUnprototyped) {
assert(!CurGD.getDecl() && "CurGD was already set!");
CurGD = GD;
CurFuncIsThunk = true;
@@ -245,21 +233,28 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
QualType ThisType = MD->getThisType(getContext());
const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
- QualType ResultType = CGM.getCXXABI().HasThisReturn(GD)
- ? ThisType
- : CGM.getCXXABI().hasMostDerivedReturn(GD)
- ? CGM.getContext().VoidPtrTy
- : FPT->getReturnType();
+ QualType ResultType;
+ if (IsUnprototyped)
+ ResultType = CGM.getContext().VoidTy;
+ else if (CGM.getCXXABI().HasThisReturn(GD))
+ ResultType = ThisType;
+ else if (CGM.getCXXABI().hasMostDerivedReturn(GD))
+ ResultType = CGM.getContext().VoidPtrTy;
+ else
+ ResultType = FPT->getReturnType();
FunctionArgList FunctionArgs;
// Create the implicit 'this' parameter declaration.
CGM.getCXXABI().buildThisParam(*this, FunctionArgs);
- // Add the rest of the parameters.
- FunctionArgs.append(MD->param_begin(), MD->param_end());
+ // Add the rest of the parameters, if we have a prototype to work with.
+ if (!IsUnprototyped) {
+ FunctionArgs.append(MD->param_begin(), MD->param_end());
- if (isa<CXXDestructorDecl>(MD))
- CGM.getCXXABI().addImplicitStructorParams(*this, ResultType, FunctionArgs);
+ if (isa<CXXDestructorDecl>(MD))
+ CGM.getCXXABI().addImplicitStructorParams(*this, ResultType,
+ FunctionArgs);
+ }
// Start defining the function.
auto NL = ApplyDebugLocation::CreateEmpty(*this);
@@ -285,7 +280,8 @@ void CodeGenFunction::FinishThunk() {
}
void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
- const ThunkInfo *Thunk) {
+ const ThunkInfo *Thunk,
+ bool IsUnprototyped) {
assert(isa<CXXMethodDecl>(CurGD.getDecl()) &&
"Please use a new CGF for this thunk");
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CurGD.getDecl());
@@ -296,13 +292,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
*this, LoadCXXThisAddress(), Thunk->This)
: LoadCXXThis();
- if (CurFnInfo->usesInAlloca()) {
+ if (CurFnInfo->usesInAlloca() || IsUnprototyped) {
// We don't handle return adjusting thunks, because they require us to call
// the copy constructor. For now, fall through and pretend the return
// adjustment was empty so we don't crash.
if (Thunk && !Thunk->Return.isEmpty()) {
- CGM.ErrorUnsupported(
- MD, "non-trivial argument copy for return-adjusting thunk");
+ if (IsUnprototyped)
+ CGM.ErrorUnsupported(
+ MD, "return-adjusting thunk with incomplete parameter type");
+ else
+ CGM.ErrorUnsupported(
+ MD, "non-trivial argument copy for return-adjusting thunk");
}
EmitMustTailThunk(MD, AdjustedThisPtr, CalleePtr);
return;
@@ -429,55 +429,98 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
}
void CodeGenFunction::generateThunk(llvm::Function *Fn,
- const CGFunctionInfo &FnInfo,
- GlobalDecl GD, const ThunkInfo &Thunk) {
- StartThunk(Fn, GD, FnInfo);
+ const CGFunctionInfo &FnInfo, GlobalDecl GD,
+ const ThunkInfo &Thunk,
+ bool IsUnprototyped) {
+ StartThunk(Fn, GD, FnInfo, IsUnprototyped);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(*this);
- // Get our callee.
- llvm::Type *Ty =
- CGM.getTypes().GetFunctionType(CGM.getTypes().arrangeGlobalDeclaration(GD));
+ // Get our callee. Use a placeholder type if this method is unprototyped so
+ // that CodeGenModule doesn't try to set attributes.
+ llvm::Type *Ty;
+ if (IsUnprototyped)
+ Ty = llvm::StructType::get(getLLVMContext());
+ else
+ Ty = CGM.getTypes().GetFunctionType(FnInfo);
+
llvm::Constant *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
+ // Fix up the function type for an unprototyped musttail call.
+ if (IsUnprototyped)
+ Callee = llvm::ConstantExpr::getBitCast(Callee, Fn->getType());
+
// Make the call and return the result.
- EmitCallAndReturnForThunk(Callee, &Thunk);
+ EmitCallAndReturnForThunk(Callee, &Thunk, IsUnprototyped);
}
-void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk,
- bool ForVTable) {
- const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeGlobalDeclaration(GD);
+static bool shouldEmitVTableThunk(CodeGenModule &CGM, const CXXMethodDecl *MD,
+ bool IsUnprototyped, bool ForVTable) {
+ // Always emit thunks in the MS C++ ABI. We cannot rely on other TUs to
+ // provide thunks for us.
+ if (CGM.getTarget().getCXXABI().isMicrosoft())
+ return true;
- // FIXME: re-use FnInfo in this computation.
- llvm::Constant *C = CGM.GetAddrOfThunk(GD, Thunk);
- llvm::GlobalValue *Entry;
+ // In the Itanium C++ ABI, vtable thunks are provided by TUs that provide
+ // definitions of the main method. Therefore, emitting thunks with the vtable
+ // is purely an optimization. Emit the thunk if optimizations are enabled and
+ // all of the parameter types are complete.
+ if (ForVTable)
+ return CGM.getCodeGenOpts().OptimizationLevel && !IsUnprototyped;
- // Strip off a bitcast if we got one back.
- if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(C)) {
- assert(CE->getOpcode() == llvm::Instruction::BitCast);
- Entry = cast<llvm::GlobalValue>(CE->getOperand(0));
- } else {
- Entry = cast<llvm::GlobalValue>(C);
- }
+ // Always emit thunks along with the method definition.
+ return true;
+}
- // There's already a declaration with the same name, check if it has the same
- // type or if we need to replace it.
- if (Entry->getType()->getElementType() !=
- CGM.getTypes().GetFunctionTypeForVTable(GD)) {
- llvm::GlobalValue *OldThunkFn = Entry;
+llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD,
+ const ThunkInfo &TI,
+ bool ForVTable) {
+ const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- // If the types mismatch then we have to rewrite the definition.
- assert(OldThunkFn->isDeclaration() &&
- "Shouldn't replace non-declaration");
+ // First, get a declaration. Compute the mangled name. Don't worry about
+ // getting the function prototype right, since we may only need this
+ // declaration to fill in a vtable slot.
+ SmallString<256> Name;
+ MangleContext &MCtx = CGM.getCXXABI().getMangleContext();
+ llvm::raw_svector_ostream Out(Name);
+ if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD))
+ MCtx.mangleCXXDtorThunk(DD, GD.getDtorType(), TI.This, Out);
+ else
+ MCtx.mangleThunk(MD, TI, Out);
+ llvm::Type *ThunkVTableTy = CGM.getTypes().GetFunctionTypeForVTable(GD);
+ llvm::Constant *Thunk = CGM.GetAddrOfThunk(Name, ThunkVTableTy, GD);
+
+ // If we don't need to emit a definition, return this declaration as is.
+ bool IsUnprototyped = !CGM.getTypes().isFuncTypeConvertible(
+ MD->getType()->castAs<FunctionType>());
+ if (!shouldEmitVTableThunk(CGM, MD, IsUnprototyped, ForVTable))
+ return Thunk;
+
+ // Arrange a function prototype appropriate for a function definition. In some
+ // cases in the MS ABI, we may need to build an unprototyped musttail thunk.
+ const CGFunctionInfo &FnInfo =
+ IsUnprototyped ? CGM.getTypes().arrangeUnprototypedMustTailThunk(MD)
+ : CGM.getTypes().arrangeGlobalDeclaration(GD);
+ llvm::FunctionType *ThunkFnTy = CGM.getTypes().GetFunctionType(FnInfo);
+
+ // If the type of the underlying GlobalValue is wrong, we'll have to replace
+ // it. It should be a declaration.
+ llvm::Function *ThunkFn = cast<llvm::Function>(Thunk->stripPointerCasts());
+ if (ThunkFn->getFunctionType() != ThunkFnTy) {
+ llvm::GlobalValue *OldThunkFn = ThunkFn;
+
+ assert(OldThunkFn->isDeclaration() && "Shouldn't replace non-declaration");
// Remove the name from the old thunk function and get a new thunk.
OldThunkFn->setName(StringRef());
- Entry = cast<llvm::GlobalValue>(CGM.GetAddrOfThunk(GD, Thunk));
+ ThunkFn = llvm::Function::Create(ThunkFnTy, llvm::Function::ExternalLinkage,
+ Name.str(), &CGM.getModule());
+ CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn);
// If needed, replace the old thunk with a bitcast.
if (!OldThunkFn->use_empty()) {
llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(Entry, OldThunkFn->getType());
+ llvm::ConstantExpr::getBitCast(ThunkFn, OldThunkFn->getType());
OldThunkFn->replaceAllUsesWith(NewPtrForOldDecl);
}
@@ -485,61 +528,48 @@ void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk,
OldThunkFn->eraseFromParent();
}
- llvm::Function *ThunkFn = cast<llvm::Function>(Entry);
bool ABIHasKeyFunctions = CGM.getTarget().getCXXABI().hasKeyFunctions();
bool UseAvailableExternallyLinkage = ForVTable && ABIHasKeyFunctions;
if (!ThunkFn->isDeclaration()) {
if (!ABIHasKeyFunctions || UseAvailableExternallyLinkage) {
// There is already a thunk emitted for this function, do nothing.
- return;
+ return ThunkFn;
}
- setThunkProperties(CGM, Thunk, ThunkFn, ForVTable, GD);
- return;
+ setThunkProperties(CGM, TI, ThunkFn, ForVTable, GD);
+ return ThunkFn;
}
+ // If this will be unprototyped, add the "thunk" attribute so that LLVM knows
+ // that the return type is meaningless. These thunks can be used to call
+ // functions with differing return types, and the caller is required to cast
+ // the prototype appropriately to extract the correct value.
+ if (IsUnprototyped)
+ ThunkFn->addFnAttr("thunk");
+
CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);
- if (ThunkFn->isVarArg()) {
+ if (!IsUnprototyped && ThunkFn->isVarArg()) {
// Varargs thunks are special; we can't just generate a call because
// we can't copy the varargs. Our implementation is rather
// expensive/sucky at the moment, so don't generate the thunk unless
// we have to.
// FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
if (UseAvailableExternallyLinkage)
- return;
- ThunkFn =
- CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, Thunk);
+ return ThunkFn;
+ ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD,
+ TI);
} else {
// Normal thunk body generation.
- CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, Thunk);
+ CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped);
}
- setThunkProperties(CGM, Thunk, ThunkFn, ForVTable, GD);
-}
-
-void CodeGenVTables::maybeEmitThunkForVTable(GlobalDecl GD,
- const ThunkInfo &Thunk) {
- // If the ABI has key functions, only the TU with the key function should emit
- // the thunk. However, we can allow inlining of thunks if we emit them with
- // available_externally linkage together with vtables when optimizations are
- // enabled.
- if (CGM.getTarget().getCXXABI().hasKeyFunctions() &&
- !CGM.getCodeGenOpts().OptimizationLevel)
- return;
-
- // We can't emit thunks for member functions with incomplete types.
- const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- if (!CGM.getTypes().isFuncTypeConvertible(
- MD->getType()->castAs<FunctionType>()))
- return;
-
- emitThunk(GD, Thunk, /*ForVTable=*/true);
+ setThunkProperties(CGM, TI, ThunkFn, ForVTable, GD);
+ return ThunkFn;
}
-void CodeGenVTables::EmitThunks(GlobalDecl GD)
-{
+void CodeGenVTables::EmitThunks(GlobalDecl GD) {
const CXXMethodDecl *MD =
cast<CXXMethodDecl>(GD.getDecl())->getCanonicalDecl();
@@ -554,7 +584,7 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD)
return;
for (const ThunkInfo& Thunk : *ThunkInfoVector)
- emitThunk(GD, Thunk, /*ForVTable=*/false);
+ maybeEmitThunk(GD, Thunk, /*ForVTable=*/false);
}
void CodeGenVTables::addVTableComponent(
@@ -647,9 +677,8 @@ void CodeGenVTables::addVTableComponent(
layout.vtable_thunks()[nextVTableThunkIndex].first == idx) {
auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second;
- maybeEmitThunkForVTable(GD, thunkInfo);
nextVTableThunkIndex++;
- fnPtr = CGM.GetAddrOfThunk(GD, thunkInfo);
+ fnPtr = maybeEmitThunk(GD, thunkInfo, /*ForVTable=*/true);
// Otherwise we can use the method definition directly.
} else {
@@ -730,7 +759,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
// Create the variable that will hold the construction vtable.
llvm::GlobalVariable *VTable =
CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage);
- CGM.setGlobalVisibility(VTable, RD, ForDefinition);
+ CGM.setGVProperties(VTable, RD);
// V-tables are always unnamed_addr.
VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
@@ -845,7 +874,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
llvm_unreachable("Invalid TemplateSpecializationKind!");
}
-/// This is a callback from Sema to tell us that that a particular vtable is
+/// This is a callback from Sema to tell us that a particular vtable is
/// required to be emitted in this translation unit.
///
/// This is only called for vtables that _must_ be emitted (mainly due to key
@@ -983,31 +1012,29 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
CharUnits PointerWidth =
Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
- typedef std::pair<const CXXRecordDecl *, unsigned> BSEntry;
- std::vector<BSEntry> BitsetEntries;
- // Create a bit set entry for each address point.
+ typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint;
+ std::vector<AddressPoint> AddressPoints;
for (auto &&AP : VTLayout.getAddressPoints())
- BitsetEntries.push_back(
- std::make_pair(AP.first.getBase(),
- VTLayout.getVTableOffset(AP.second.VTableIndex) +
- AP.second.AddressPointIndex));
-
- // Sort the bit set entries for determinism.
- std::sort(BitsetEntries.begin(), BitsetEntries.end(),
- [this](const BSEntry &E1, const BSEntry &E2) {
- if (&E1 == &E2)
+ AddressPoints.push_back(std::make_pair(
+ AP.first.getBase(), VTLayout.getVTableOffset(AP.second.VTableIndex) +
+ AP.second.AddressPointIndex));
+
+ // Sort the address points for determinism.
+ llvm::sort(AddressPoints.begin(), AddressPoints.end(),
+ [this](const AddressPoint &AP1, const AddressPoint &AP2) {
+ if (&AP1 == &AP2)
return false;
std::string S1;
llvm::raw_string_ostream O1(S1);
getCXXABI().getMangleContext().mangleTypeName(
- QualType(E1.first->getTypeForDecl(), 0), O1);
+ QualType(AP1.first->getTypeForDecl(), 0), O1);
O1.flush();
std::string S2;
llvm::raw_string_ostream O2(S2);
getCXXABI().getMangleContext().mangleTypeName(
- QualType(E2.first->getTypeForDecl(), 0), O2);
+ QualType(AP2.first->getTypeForDecl(), 0), O2);
O2.flush();
if (S1 < S2)
@@ -1015,10 +1042,26 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
if (S1 != S2)
return false;
- return E1.second < E2.second;
+ return AP1.second < AP2.second;
});
- for (auto BitsetEntry : BitsetEntries)
- AddVTableTypeMetadata(VTable, PointerWidth * BitsetEntry.second,
- BitsetEntry.first);
+ ArrayRef<VTableComponent> Comps = VTLayout.vtable_components();
+ for (auto AP : AddressPoints) {
+ // Create type metadata for the address point.
+ AddVTableTypeMetadata(VTable, PointerWidth * AP.second, AP.first);
+
+ // The class associated with each address point could also potentially be
+ // used for indirect calls via a member function pointer, so we need to
+ // annotate the address of each function pointer with the appropriate member
+ // function pointer type.
+ for (unsigned I = 0; I != Comps.size(); ++I) {
+ if (Comps[I].getKind() != VTableComponent::CK_FunctionPointer)
+ continue;
+ llvm::Metadata *MD = CreateMetadataIdentifierForVirtualMemPtrType(
+ Context.getMemberPointerType(
+ Comps[I].getFunctionDecl()->getType(),
+ Context.getRecordType(AP.first).getTypePtr()));
+ VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD);
+ }
+ }
}
diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h
index b92212c368a9..a11474a15ea4 100644
--- a/lib/CodeGen/CGVTables.h
+++ b/lib/CodeGen/CGVTables.h
@@ -57,12 +57,10 @@ class CodeGenVTables {
/// Cache for the deleted virtual member call function.
llvm::Constant *DeletedVirtualFn = nullptr;
- /// emitThunk - Emit a single thunk.
- void emitThunk(GlobalDecl GD, const ThunkInfo &Thunk, bool ForVTable);
-
- /// maybeEmitThunkForVTable - Emit the given thunk for the vtable if needed by
- /// the ABI.
- void maybeEmitThunkForVTable(GlobalDecl GD, const ThunkInfo &Thunk);
+ /// Get the address of a thunk and emit it if necessary.
+ llvm::Constant *maybeEmitThunk(GlobalDecl GD,
+ const ThunkInfo &ThunkAdjustments,
+ bool ForVTable);
void addVTableComponent(ConstantArrayBuilder &builder,
const VTableLayout &layout, unsigned idx,
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 7d07ea4516c9..418bda1f41bb 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -193,7 +193,7 @@ class LValue {
// The alignment to use when accessing this lvalue. (For vector elements,
// this is the alignment of the whole vector.)
- int64_t Alignment;
+ unsigned Alignment;
// objective-c's ivar
bool Ivar:1;
@@ -215,13 +215,13 @@ class LValue {
// to make the default bitfield pattern all-zeroes.
bool ImpreciseLifetime : 1;
- LValueBaseInfo BaseInfo;
- TBAAAccessInfo TBAAInfo;
-
// This flag shows if a nontemporal load/stores should be used when accessing
// this lvalue.
bool Nontemporal : 1;
+ LValueBaseInfo BaseInfo;
+ TBAAAccessInfo TBAAInfo;
+
Expr *BaseIvarExp;
private:
@@ -231,7 +231,10 @@ private:
"initializing l-value with zero alignment!");
this->Type = Type;
this->Quals = Quals;
- this->Alignment = Alignment.getQuantity();
+ const unsigned MaxAlign = 1U << 31;
+ this->Alignment = Alignment.getQuantity() <= MaxAlign
+ ? Alignment.getQuantity()
+ : MaxAlign;
assert(this->Alignment == Alignment.getQuantity() &&
"Alignment exceeds allowed max!");
this->BaseInfo = BaseInfo;
@@ -398,7 +401,7 @@ public:
return R;
}
- /// \brief Create a new object to represent a bit-field access.
+ /// Create a new object to represent a bit-field access.
///
/// \param Addr - The base address of the bit-field sequence this
/// bit-field refers to.
@@ -469,17 +472,25 @@ class AggValueSlot {
/// evaluating an expression which constructs such an object.
bool AliasedFlag : 1;
+ /// This is set to true if the tail padding of this slot might overlap
+ /// another object that may have already been initialized (and whose
+ /// value must be preserved by this initialization). If so, we may only
+ /// store up to the dsize of the type. Otherwise we can widen stores to
+ /// the size of the type.
+ bool OverlapFlag : 1;
+
public:
enum IsAliased_t { IsNotAliased, IsAliased };
enum IsDestructed_t { IsNotDestructed, IsDestructed };
enum IsZeroed_t { IsNotZeroed, IsZeroed };
+ enum Overlap_t { DoesNotOverlap, MayOverlap };
enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
/// ignored - Returns an aggregate value slot indicating that the
/// aggregate value is being ignored.
static AggValueSlot ignored() {
return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed,
- DoesNotNeedGCBarriers, IsNotAliased);
+ DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap);
}
/// forAddr - Make a slot for an aggregate value.
@@ -497,6 +508,7 @@ public:
IsDestructed_t isDestructed,
NeedsGCBarriers_t needsGC,
IsAliased_t isAliased,
+ Overlap_t mayOverlap,
IsZeroed_t isZeroed = IsNotZeroed) {
AggValueSlot AV;
if (addr.isValid()) {
@@ -511,6 +523,7 @@ public:
AV.ObjCGCFlag = needsGC;
AV.ZeroedFlag = isZeroed;
AV.AliasedFlag = isAliased;
+ AV.OverlapFlag = mayOverlap;
return AV;
}
@@ -518,9 +531,10 @@ public:
IsDestructed_t isDestructed,
NeedsGCBarriers_t needsGC,
IsAliased_t isAliased,
+ Overlap_t mayOverlap,
IsZeroed_t isZeroed = IsNotZeroed) {
- return forAddr(LV.getAddress(),
- LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
+ return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC,
+ isAliased, mayOverlap, isZeroed);
}
IsDestructed_t isExternallyDestructed() const {
@@ -568,6 +582,10 @@ public:
return IsAliased_t(AliasedFlag);
}
+ Overlap_t mayOverlap() const {
+ return Overlap_t(OverlapFlag);
+ }
+
RValue asRValue() const {
if (isIgnored()) {
return RValue::getIgnored();
@@ -580,6 +598,14 @@ public:
IsZeroed_t isZeroed() const {
return IsZeroed_t(ZeroedFlag);
}
+
+ /// Get the preferred size to use when storing a value to this slot. This
+ /// is the type size unless that might overlap another object, in which
+ /// case it's the dsize.
+ CharUnits getPreferredSize(ASTContext &Ctx, QualType Type) const {
+ return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).first
+ : Ctx.getTypeSizeInChars(Type);
+ }
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 84248cc64719..2a0f4f0e83ec 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
Coverage
IPO
IRReader
+ AggressiveInstCombine
InstCombine
Instrumentation
LTO
@@ -31,6 +32,10 @@ if (CLANG_BUILT_STANDALONE)
set(codegen_deps)
endif()
+if (MSVC)
+ set_source_files_properties(CodeGenModule.cpp PROPERTIES COMPILE_FLAGS /bigobj)
+endif()
+
add_clang_library(clangCodeGen
BackendUtil.cpp
CGAtomic.cpp
@@ -56,6 +61,7 @@ add_clang_library(clangCodeGen
CGExprScalar.cpp
CGGPUBuiltin.cpp
CGLoopInfo.cpp
+ CGNonTrivialStruct.cpp
CGObjC.cpp
CGObjCGNU.cpp
CGObjCMac.cpp
@@ -93,7 +99,6 @@ add_clang_library(clangCodeGen
LINK_LIBS
clangAnalysis
clangAST
- clangAnalysis
clangBasic
clangFrontend
clangLex
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 6ca69d63cdce..7ca55070d4a0 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -126,7 +126,7 @@ namespace clang {
Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
CodeGenOpts, C, CoverageInfo)),
LinkModules(std::move(LinkModules)) {
- llvm::TimePassesIsEnabled = TimePasses;
+ FrontendTimesIsEnabled = TimePasses;
}
llvm::Module *getModule() const { return Gen->GetModule(); }
std::unique_ptr<llvm::Module> takeModule() {
@@ -144,12 +144,12 @@ namespace clang {
Context = &Ctx;
- if (llvm::TimePassesIsEnabled)
+ if (FrontendTimesIsEnabled)
LLVMIRGeneration.startTimer();
Gen->Initialize(Ctx);
- if (llvm::TimePassesIsEnabled)
+ if (FrontendTimesIsEnabled)
LLVMIRGeneration.stopTimer();
}
@@ -159,7 +159,7 @@ namespace clang {
"LLVM IR generation of declaration");
// Recurse.
- if (llvm::TimePassesIsEnabled) {
+ if (FrontendTimesIsEnabled) {
LLVMIRGenerationRefCount += 1;
if (LLVMIRGenerationRefCount == 1)
LLVMIRGeneration.startTimer();
@@ -167,7 +167,7 @@ namespace clang {
Gen->HandleTopLevelDecl(D);
- if (llvm::TimePassesIsEnabled) {
+ if (FrontendTimesIsEnabled) {
LLVMIRGenerationRefCount -= 1;
if (LLVMIRGenerationRefCount == 0)
LLVMIRGeneration.stopTimer();
@@ -180,12 +180,12 @@ namespace clang {
PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
Context->getSourceManager(),
"LLVM IR generation of inline function");
- if (llvm::TimePassesIsEnabled)
+ if (FrontendTimesIsEnabled)
LLVMIRGeneration.startTimer();
Gen->HandleInlineFunctionDefinition(D);
- if (llvm::TimePassesIsEnabled)
+ if (FrontendTimesIsEnabled)
LLVMIRGeneration.stopTimer();
}
@@ -227,7 +227,7 @@ namespace clang {
void HandleTranslationUnit(ASTContext &C) override {
{
PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
- if (llvm::TimePassesIsEnabled) {
+ if (FrontendTimesIsEnabled) {
LLVMIRGenerationRefCount += 1;
if (LLVMIRGenerationRefCount == 1)
LLVMIRGeneration.startTimer();
@@ -235,13 +235,13 @@ namespace clang {
Gen->HandleTranslationUnit(C);
- if (llvm::TimePassesIsEnabled) {
+ if (FrontendTimesIsEnabled) {
LLVMIRGenerationRefCount -= 1;
if (LLVMIRGenerationRefCount == 0)
LLVMIRGeneration.stopTimer();
}
- IRGenFinished = true;
+ IRGenFinished = true;
}
// Silently ignore if we weren't initialized for some reason.
@@ -341,17 +341,17 @@ namespace clang {
SourceLocation LocCookie);
void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
- /// \brief Specialized handler for InlineAsm diagnostic.
+ /// Specialized handler for InlineAsm diagnostic.
/// \return True if the diagnostic has been successfully reported, false
/// otherwise.
bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D);
- /// \brief Specialized handler for StackSize diagnostic.
+ /// Specialized handler for StackSize diagnostic.
/// \return True if the diagnostic has been successfully reported, false
/// otherwise.
bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
- /// \brief Specialized handler for unsupported backend feature diagnostic.
+ /// Specialized handler for unsupported backend feature diagnostic.
void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
- /// \brief Specialized handlers for optimization remarks.
+ /// Specialized handlers for optimization remarks.
/// Note that these handlers only accept remarks and they always handle
/// them.
void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D,
@@ -697,7 +697,7 @@ void BackendConsumer::OptimizationFailureHandler(
EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure);
}
-/// \brief This function is invoked when the backend needs
+/// This function is invoked when the backend needs
/// to report something to the user.
void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
unsigned DiagID = diag::err_fe_inline_asm;
@@ -846,7 +846,10 @@ GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) {
std::unique_ptr<ASTConsumer>
CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
BackendAction BA = static_cast<BackendAction>(Act);
- std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, InFile, BA);
+ std::unique_ptr<raw_pwrite_stream> OS = CI.takeOutputStream();
+ if (!OS)
+ OS = GetOutputStream(CI, InFile, BA);
+
if (BA != Backend_EmitNothing && !OS)
return nullptr;
@@ -947,12 +950,21 @@ std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) {
return {};
};
- Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef);
- if (!BMOrErr)
- return DiagErrors(BMOrErr.takeError());
-
+ Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
+ if (!BMsOrErr)
+ return DiagErrors(BMsOrErr.takeError());
+ BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr);
+ // We have nothing to do if the file contains no ThinLTO module. This is
+ // possible if ThinLTO compilation was not able to split module. Content of
+ // the file was already processed by indexing and will be passed to the
+ // linker using merged object file.
+ if (!Bm) {
+ auto M = llvm::make_unique<llvm::Module>("empty", *VMContext);
+ M->setTargetTriple(CI.getTargetOpts().Triple);
+ return M;
+ }
Expected<std::unique_ptr<llvm::Module>> MOrErr =
- BMOrErr->parseModule(*VMContext);
+ Bm->parseModule(*VMContext);
if (!MOrErr)
return DiagErrors(MOrErr.takeError());
return std::move(*MOrErr);
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 9dbd7cc3fcbf..3c582688e91e 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -65,25 +65,9 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
: CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()),
Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(),
CGBuilderInserterTy(this)),
- CurFn(nullptr), ReturnValue(Address::invalid()),
- CapturedStmtInfo(nullptr), SanOpts(CGM.getLangOpts().Sanitize),
- IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false),
- SawAsmBlock(false), IsOutlinedSEHHelper(false), BlockInfo(nullptr),
- BlockPointer(nullptr), LambdaThisCaptureField(nullptr),
- NormalCleanupDest(nullptr), NextCleanupDestIndex(1),
- FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr),
- EHSelectorSlot(nullptr), DebugInfo(CGM.getModuleDebugInfo()),
- DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(nullptr),
- PGO(cgm), SwitchInsn(nullptr), SwitchWeights(nullptr),
- CaseRangeBlock(nullptr), UnreachableBlock(nullptr), NumReturnExprs(0),
- NumSimpleReturnExprs(0), CXXABIThisDecl(nullptr),
- CXXABIThisValue(nullptr), CXXThisValue(nullptr),
- CXXStructorImplicitParamDecl(nullptr),
- CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr),
- CurLexicalScope(nullptr), TerminateLandingPad(nullptr),
- TerminateHandler(nullptr), TrapBB(nullptr),
- ShouldEmitLifetimeMarkers(
- shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) {
+ SanOpts(CGM.getLangOpts().Sanitize), DebugInfo(CGM.getModuleDebugInfo()),
+ PGO(cgm), ShouldEmitLifetimeMarkers(shouldEmitLifetimeMarkers(
+ CGM.getCodeGenOpts(), CGM.getLangOpts())) {
if (!suppressNewContext)
CGM.getCXXABI().getMangleContext().startNewFunction();
@@ -419,6 +403,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
EmitIfUsed(*this, TerminateHandler);
EmitIfUsed(*this, UnreachableBlock);
+ for (const auto &FuncletAndParent : TerminateFunclets)
+ EmitIfUsed(*this, FuncletAndParent.second);
+
if (CGM.getCodeGenOpts().EmitDeclMetadata)
EmitDeclMetadata();
@@ -436,11 +423,17 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// if compiled with no optimizations. We do it for coroutine as the lifetime
// of CleanupDestSlot alloca make correct coroutine frame building very
// difficult.
- if (NormalCleanupDest && isCoroutine()) {
+ if (NormalCleanupDest.isValid() && isCoroutine()) {
llvm::DominatorTree DT(*CurFn);
- llvm::PromoteMemToReg(NormalCleanupDest, DT);
- NormalCleanupDest = nullptr;
+ llvm::PromoteMemToReg(
+ cast<llvm::AllocaInst>(NormalCleanupDest.getPointer()), DT);
+ NormalCleanupDest = Address::invalid();
}
+
+ // Add the required-vector-width attribute.
+ if (LargestVectorWidth != 0)
+ CurFn->addFnAttr("min-legal-vector-width",
+ llvm::utostr(LargestVectorWidth));
}
/// ShouldInstrumentFunction - Return true if the current function should be
@@ -462,9 +455,19 @@ bool CodeGenFunction::ShouldXRayInstrumentFunction() const {
}
/// AlwaysEmitXRayCustomEvents - Return true if we should emit IR for calls to
-/// the __xray_customevent(...) builin calls, when doing XRay instrumentation.
+/// the __xray_customevent(...) builtin calls, when doing XRay instrumentation.
bool CodeGenFunction::AlwaysEmitXRayCustomEvents() const {
- return CGM.getCodeGenOpts().XRayAlwaysEmitCustomEvents;
+ return CGM.getCodeGenOpts().XRayInstrumentFunctions &&
+ (CGM.getCodeGenOpts().XRayAlwaysEmitCustomEvents ||
+ CGM.getCodeGenOpts().XRayInstrumentationBundle.Mask ==
+ XRayInstrKind::Custom);
+}
+
+bool CodeGenFunction::AlwaysEmitXRayTypedEvents() const {
+ return CGM.getCodeGenOpts().XRayInstrumentFunctions &&
+ (CGM.getCodeGenOpts().XRayAlwaysEmitTypedEvents ||
+ CGM.getCodeGenOpts().XRayInstrumentationBundle.Mask ==
+ XRayInstrKind::Typed);
}
llvm::Constant *
@@ -842,14 +845,24 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (D) {
// Apply the no_sanitize* attributes to SanOpts.
- for (auto Attr : D->specific_attrs<NoSanitizeAttr>())
- SanOpts.Mask &= ~Attr->getMask();
+ for (auto Attr : D->specific_attrs<NoSanitizeAttr>()) {
+ SanitizerMask mask = Attr->getMask();
+ SanOpts.Mask &= ~mask;
+ if (mask & SanitizerKind::Address)
+ SanOpts.set(SanitizerKind::KernelAddress, false);
+ if (mask & SanitizerKind::KernelAddress)
+ SanOpts.set(SanitizerKind::Address, false);
+ if (mask & SanitizerKind::HWAddress)
+ SanOpts.set(SanitizerKind::KernelHWAddress, false);
+ if (mask & SanitizerKind::KernelHWAddress)
+ SanOpts.set(SanitizerKind::HWAddress, false);
+ }
}
// Apply sanitizer attributes to the function.
if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress))
Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
- if (SanOpts.hasOneOf(SanitizerKind::HWAddress))
+ if (SanOpts.hasOneOf(SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress))
Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
if (SanOpts.has(SanitizerKind::Thread))
Fn->addFnAttr(llvm::Attribute::SanitizeThread);
@@ -857,6 +870,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
if (SanOpts.has(SanitizerKind::SafeStack))
Fn->addFnAttr(llvm::Attribute::SafeStack);
+ if (SanOpts.has(SanitizerKind::ShadowCallStack))
+ Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
+
+ // Apply fuzzing attribute to the function.
+ if (SanOpts.hasOneOf(SanitizerKind::Fuzzer | SanitizerKind::FuzzerNoLink))
+ Fn->addFnAttr(llvm::Attribute::OptForFuzzing);
// Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize,
// .cxx_destruct, __destroy_helper_block_ and all of their calees at run time.
@@ -884,7 +903,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
}
// Apply xray attributes to the function (as a string, for now)
- if (D && ShouldXRayInstrumentFunction()) {
+ bool InstrumentXray = ShouldXRayInstrumentFunction() &&
+ CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::Function);
+ if (D && InstrumentXray) {
if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
if (XRayAttr->alwaysXRayInstrument())
Fn->addFnAttr("function-instrument", "xray-always");
@@ -921,8 +943,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) {
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) {
+ // Remove any (C++17) exception specifications, to allow calling e.g. a
+ // noexcept function through a non-noexcept pointer.
+ auto ProtoTy =
+ getContext().getFunctionTypeWithExceptionSpec(FD->getType(),
+ EST_None);
llvm::Constant *FTRTTIConst =
- CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true);
+ CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
llvm::Constant *FTRTTIConstEncoded =
EncodeAddrForUseInPrologue(Fn, FTRTTIConst);
llvm::Constant *PrologueStructElems[] = {PrologueSig,
@@ -987,7 +1014,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
ArgTypes.push_back(VD->getType());
QualType FnType = getContext().getFunctionType(
RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
- DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder);
+ DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, CurFuncIsThunk,
+ Builder);
}
if (ShouldInstrumentFunction()) {
@@ -1006,10 +1034,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// The attribute "counting-function" is set to mcount function name which is
// architecture dependent.
if (CGM.getCodeGenOpts().InstrumentForProfiling) {
- if (CGM.getCodeGenOpts().CallFEntry)
- Fn->addFnAttr("fentry-call", "true");
- else {
- if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) {
+ // Calls to fentry/mcount should not be generated if function has
+ // the no_instrument_function attribute.
+ if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) {
+ if (CGM.getCodeGenOpts().CallFEntry)
+ Fn->addFnAttr("fentry-call", "true");
+ else {
Fn->addFnAttr("instrument-function-entry-inlined",
getTarget().getMCountName());
}
@@ -1055,6 +1085,11 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
EmitStartEHSpec(CurCodeDecl);
PrologueCleanupDepth = EHStack.stable_begin();
+
+ // Emit OpenMP specific initialization of the device functions.
+ if (getLangOpts().OpenMP && CurCodeDecl)
+ CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl);
+
EmitFunctionProlog(*CurFnInfo, CurFn, Args);
if (D && isa<CXXMethodDecl>(D) && cast<CXXMethodDecl>(D)->isInstance()) {
@@ -1108,8 +1143,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// may have a static invoker function, which may call this operator with
// a null 'this' pointer.
if (isLambdaCallOperator(MD) &&
- cast<CXXRecordDecl>(MD->getParent())->getLambdaCaptureDefault() ==
- LCD_None)
+ MD->getParent()->getLambdaCaptureDefault() == LCD_None)
SkippedChecks.set(SanitizerKind::Null, true);
EmitTypeCheck(isa<CXXConstructorDecl>(MD) ? TCK_ConstructorCall
@@ -1141,6 +1175,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// Emit a location at the end of the prologue.
if (CGDebugInfo *DI = getDebugInfo())
DI->EmitLocation(Builder, StartLoc);
+
+ // TODO: Do we need to handle this in two places like we do with
+ // target-features/target-cpu?
+ if (CurFuncDecl)
+ if (const auto *VecWidth = CurFuncDecl->getAttr<MinVectorWidthAttr>())
+ LargestVectorWidth = VecWidth->getVectorWidth();
}
void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
@@ -1748,12 +1788,9 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
if (const VariableArrayType *vlaType =
dyn_cast_or_null<VariableArrayType>(
getContext().getAsArrayType(Ty))) {
- QualType eltType;
- llvm::Value *numElts;
- std::tie(numElts, eltType) = getVLASize(vlaType);
-
- SizeVal = numElts;
- CharUnits eltSize = getContext().getTypeSizeInChars(eltType);
+ auto VlaSize = getVLASize(vlaType);
+ SizeVal = VlaSize.NumElts;
+ CharUnits eltSize = getContext().getTypeSizeInChars(VlaSize.Type);
if (!eltSize.isOne())
SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(eltSize));
vla = vlaType;
@@ -1836,7 +1873,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
// this is the size of the VLA in bytes, not its size in elements.
llvm::Value *numVLAElements = nullptr;
if (isa<VariableArrayType>(arrayType)) {
- numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).first;
+ numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).NumElts;
// Walk into all VLAs. This doesn't require changes to addr,
// which has type T* where T is the first non-VLA element type.
@@ -1917,14 +1954,13 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
return numElements;
}
-std::pair<llvm::Value*, QualType>
-CodeGenFunction::getVLASize(QualType type) {
+CodeGenFunction::VlaSizePair CodeGenFunction::getVLASize(QualType type) {
const VariableArrayType *vla = getContext().getAsVariableArrayType(type);
assert(vla && "type was not a variable array type!");
return getVLASize(vla);
}
-std::pair<llvm::Value*, QualType>
+CodeGenFunction::VlaSizePair
CodeGenFunction::getVLASize(const VariableArrayType *type) {
// The number of elements so far; always size_t.
llvm::Value *numElements = nullptr;
@@ -1945,7 +1981,22 @@ CodeGenFunction::getVLASize(const VariableArrayType *type) {
}
} while ((type = getContext().getAsVariableArrayType(elementType)));
- return std::pair<llvm::Value*,QualType>(numElements, elementType);
+ return { numElements, elementType };
+}
+
+CodeGenFunction::VlaSizePair
+CodeGenFunction::getVLAElements1D(QualType type) {
+ const VariableArrayType *vla = getContext().getAsVariableArrayType(type);
+ assert(vla && "type was not a variable array type!");
+ return getVLAElements1D(vla);
+}
+
+CodeGenFunction::VlaSizePair
+CodeGenFunction::getVLAElements1D(const VariableArrayType *Vla) {
+ llvm::Value *VlaSize = VLASizeMap[Vla->getSizeExpr()];
+ assert(VlaSize && "no size for VLA!");
+ assert(VlaSize->getType() == SizeTy);
+ return { VlaSize, Vla->getElementType() };
}
void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
@@ -2228,7 +2279,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
return std::all_of(
ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) {
SmallVector<StringRef, 1> OrFeatures;
- Feature.split(OrFeatures, "|");
+ Feature.split(OrFeatures, '|');
return std::any_of(OrFeatures.begin(), OrFeatures.end(),
[&](StringRef Feature) {
if (!CallerFeatureMap.lookup(Feature)) {
@@ -2266,17 +2317,28 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
// Return if the builtin doesn't have any required features.
if (!FeatureList || StringRef(FeatureList) == "")
return;
- StringRef(FeatureList).split(ReqFeatures, ",");
+ StringRef(FeatureList).split(ReqFeatures, ',');
if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
CGM.getDiags().Report(E->getLocStart(), diag::err_builtin_needs_feature)
<< TargetDecl->getDeclName()
<< CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
- } else if (TargetDecl->hasAttr<TargetAttr>()) {
+ } else if (TargetDecl->hasAttr<TargetAttr>() ||
+ TargetDecl->hasAttr<CPUSpecificAttr>()) {
// Get the required features for the callee.
+
+ const TargetAttr *TD = TargetDecl->getAttr<TargetAttr>();
+ TargetAttr::ParsedTargetAttr ParsedAttr = CGM.filterFunctionTargetAttrs(TD);
+
SmallVector<StringRef, 1> ReqFeatures;
llvm::StringMap<bool> CalleeFeatureMap;
CGM.getFunctionFeatureMap(CalleeFeatureMap, TargetDecl);
+
+ for (const auto &F : ParsedAttr.Features) {
+ if (F[0] == '+' && CalleeFeatureMap.lookup(F.substr(1)))
+ ReqFeatures.push_back(StringRef(F).substr(1));
+ }
+
for (const auto &F : CalleeFeatureMap) {
// Only positive features are "required".
if (F.getValue())
@@ -2297,6 +2359,99 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
CGM.getSanStats().create(IRB, SSK);
}
+llvm::Value *CodeGenFunction::FormResolverCondition(
+ const TargetMultiVersionResolverOption &RO) {
+ llvm::Value *TrueCondition = nullptr;
+ if (!RO.ParsedAttribute.Architecture.empty())
+ TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture);
+
+ if (!RO.ParsedAttribute.Features.empty()) {
+ SmallVector<StringRef, 8> FeatureList;
+ llvm::for_each(RO.ParsedAttribute.Features,
+ [&FeatureList](const std::string &Feature) {
+ FeatureList.push_back(StringRef{Feature}.substr(1));
+ });
+ llvm::Value *FeatureCmp = EmitX86CpuSupports(FeatureList);
+ TrueCondition = TrueCondition ? Builder.CreateAnd(TrueCondition, FeatureCmp)
+ : FeatureCmp;
+ }
+ return TrueCondition;
+}
+
+void CodeGenFunction::EmitTargetMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<TargetMultiVersionResolverOption> Options) {
+ assert((getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86 ||
+ getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86_64) &&
+ "Only implemented for x86 targets");
+
+ // Main function's basic block.
+ llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
+ Builder.SetInsertPoint(CurBlock);
+ EmitX86CpuInit();
+
+ llvm::Function *DefaultFunc = nullptr;
+ for (const TargetMultiVersionResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+ llvm::Value *TrueCondition = FormResolverCondition(RO);
+
+ if (!TrueCondition) {
+ DefaultFunc = RO.Function;
+ } else {
+ llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver);
+ llvm::IRBuilder<> RetBuilder(RetBlock);
+ RetBuilder.CreateRet(RO.Function);
+ CurBlock = createBasicBlock("ro_else", Resolver);
+ Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
+ }
+ }
+
+ assert(DefaultFunc && "No default version?");
+ // Emit return from the 'else-ist' block.
+ Builder.SetInsertPoint(CurBlock);
+ Builder.CreateRet(DefaultFunc);
+}
+
+void CodeGenFunction::EmitCPUDispatchMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<CPUDispatchMultiVersionResolverOption> Options) {
+ assert((getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86 ||
+ getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86_64) &&
+ "Only implemented for x86 targets");
+
+ // Main function's basic block.
+ llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
+ Builder.SetInsertPoint(CurBlock);
+ EmitX86CpuInit();
+
+ for (const CPUDispatchMultiVersionResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+
+ // "generic" case should catch-all.
+ if (RO.FeatureMask == 0) {
+ Builder.CreateRet(RO.Function);
+ return;
+ }
+ llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
+ llvm::IRBuilder<> RetBuilder(RetBlock);
+ RetBuilder.CreateRet(RO.Function);
+ CurBlock = createBasicBlock("resolver_else", Resolver);
+ llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask);
+ Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
+ }
+
+ Builder.SetInsertPoint(CurBlock);
+ llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
+}
+
llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) {
if (CGDebugInfo *DI = getDebugInfo())
return DI->SourceLocToDebugLoc(Location);
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index dd4c2e43ef64..79870ed59c96 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -34,6 +34,7 @@
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
@@ -137,6 +138,88 @@ enum SanitizerHandler {
#undef SANITIZER_CHECK
};
+/// Helper class with most of the code for saving a value for a
+/// conditional expression cleanup.
+struct DominatingLLVMValue {
+ typedef llvm::PointerIntPair<llvm::Value*, 1, bool> saved_type;
+
+ /// Answer whether the given value needs extra work to be saved.
+ static bool needsSaving(llvm::Value *value) {
+ // If it's not an instruction, we don't need to save.
+ if (!isa<llvm::Instruction>(value)) return false;
+
+ // If it's an instruction in the entry block, we don't need to save.
+ llvm::BasicBlock *block = cast<llvm::Instruction>(value)->getParent();
+ return (block != &block->getParent()->getEntryBlock());
+ }
+
+ static saved_type save(CodeGenFunction &CGF, llvm::Value *value);
+ static llvm::Value *restore(CodeGenFunction &CGF, saved_type value);
+};
+
+/// A partial specialization of DominatingValue for llvm::Values that
+/// might be llvm::Instructions.
+template <class T> struct DominatingPointer<T,true> : DominatingLLVMValue {
+ typedef T *type;
+ static type restore(CodeGenFunction &CGF, saved_type value) {
+ return static_cast<T*>(DominatingLLVMValue::restore(CGF, value));
+ }
+};
+
+/// A specialization of DominatingValue for Address.
+template <> struct DominatingValue<Address> {
+ typedef Address type;
+
+ struct saved_type {
+ DominatingLLVMValue::saved_type SavedValue;
+ CharUnits Alignment;
+ };
+
+ static bool needsSaving(type value) {
+ return DominatingLLVMValue::needsSaving(value.getPointer());
+ }
+ static saved_type save(CodeGenFunction &CGF, type value) {
+ return { DominatingLLVMValue::save(CGF, value.getPointer()),
+ value.getAlignment() };
+ }
+ static type restore(CodeGenFunction &CGF, saved_type value) {
+ return Address(DominatingLLVMValue::restore(CGF, value.SavedValue),
+ value.Alignment);
+ }
+};
+
+/// A specialization of DominatingValue for RValue.
+template <> struct DominatingValue<RValue> {
+ typedef RValue type;
+ class saved_type {
+ enum Kind { ScalarLiteral, ScalarAddress, AggregateLiteral,
+ AggregateAddress, ComplexAddress };
+
+ llvm::Value *Value;
+ unsigned K : 3;
+ unsigned Align : 29;
+ saved_type(llvm::Value *v, Kind k, unsigned a = 0)
+ : Value(v), K(k), Align(a) {}
+
+ public:
+ static bool needsSaving(RValue value);
+ static saved_type save(CodeGenFunction &CGF, RValue value);
+ RValue restore(CodeGenFunction &CGF);
+
+ // implementations in CGCleanup.cpp
+ };
+
+ static bool needsSaving(type value) {
+ return saved_type::needsSaving(value);
+ }
+ static saved_type save(CodeGenFunction &CGF, type value) {
+ return saved_type::save(CGF, value);
+ }
+ static type restore(CodeGenFunction &CGF, saved_type value) {
+ return value.restore(CGF);
+ }
+};
+
/// CodeGenFunction - This class organizes the per-function state that is used
/// while generating LLVM code.
class CodeGenFunction : public CodeGenTypeCache {
@@ -200,7 +283,7 @@ public:
Address UB)>
CodeGenDispatchBoundsTy;
- /// \brief CGBuilder insert helper. This function is called after an
+ /// CGBuilder insert helper. This function is called after an
/// instruction is created using Builder.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
llvm::BasicBlock *BB,
@@ -213,7 +296,7 @@ public:
const Decl *CurCodeDecl;
const CGFunctionInfo *CurFnInfo;
QualType FnRetTy;
- llvm::Function *CurFn;
+ llvm::Function *CurFn = nullptr;
// Holds coroutine data if the current function is a coroutine. We use a
// wrapper to manage its lifetime, so that we don't have to define CGCoroData
@@ -241,7 +324,7 @@ public:
/// ReturnValue - The temporary alloca to hold the return
/// value. This is invalid iff the function has no return value.
- Address ReturnValue;
+ Address ReturnValue = Address::invalid();
/// Return true if a label was seen in the current scope.
bool hasLabelBeenSeenInCurrentScope() const {
@@ -254,7 +337,7 @@ public:
/// we prefer to insert allocas.
llvm::AssertingVH<llvm::Instruction> AllocaInsertPt;
- /// \brief API for captured statement code generation.
+ /// API for captured statement code generation.
class CGCapturedStmtInfo {
public:
explicit CGCapturedStmtInfo(CapturedRegionKind K = CR_Default)
@@ -282,10 +365,10 @@ public:
CapturedRegionKind getKind() const { return Kind; }
virtual void setContextValue(llvm::Value *V) { ThisValue = V; }
- // \brief Retrieve the value of the context parameter.
+ // Retrieve the value of the context parameter.
virtual llvm::Value *getContextValue() const { return ThisValue; }
- /// \brief Lookup the captured field decl for a variable.
+ /// Lookup the captured field decl for a variable.
virtual const FieldDecl *lookup(const VarDecl *VD) const {
return CaptureFields.lookup(VD->getCanonicalDecl());
}
@@ -297,32 +380,32 @@ public:
return true;
}
- /// \brief Emit the captured statement body.
+ /// Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) {
CGF.incrementProfileCounter(S);
CGF.EmitStmt(S);
}
- /// \brief Get the name of the capture helper.
+ /// Get the name of the capture helper.
virtual StringRef getHelperName() const { return "__captured_stmt"; }
private:
- /// \brief The kind of captured statement being generated.
+ /// The kind of captured statement being generated.
CapturedRegionKind Kind;
- /// \brief Keep the map between VarDecl and FieldDecl.
+ /// Keep the map between VarDecl and FieldDecl.
llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields;
- /// \brief The base address of the captured record, passed in as the first
+ /// The base address of the captured record, passed in as the first
/// argument of the parallel region function.
llvm::Value *ThisValue;
- /// \brief Captured 'this' type.
+ /// Captured 'this' type.
FieldDecl *CXXThisFieldDecl;
};
- CGCapturedStmtInfo *CapturedStmtInfo;
+ CGCapturedStmtInfo *CapturedStmtInfo = nullptr;
- /// \brief RAII for correct setting/restoring of CapturedStmtInfo.
+ /// RAII for correct setting/restoring of CapturedStmtInfo.
class CGCapturedStmtRAII {
private:
CodeGenFunction &CGF;
@@ -361,13 +444,13 @@ public:
}
};
- /// \brief Sanitizers enabled for this function.
+ /// Sanitizers enabled for this function.
SanitizerSet SanOpts;
- /// \brief True if CodeGen currently emits code implementing sanitizer checks.
- bool IsSanitizerScope;
+ /// True if CodeGen currently emits code implementing sanitizer checks.
+ bool IsSanitizerScope = false;
- /// \brief RAII object to set/unset CodeGenFunction::IsSanitizerScope.
+ /// RAII object to set/unset CodeGenFunction::IsSanitizerScope.
class SanitizerScope {
CodeGenFunction *CGF;
public:
@@ -377,28 +460,28 @@ public:
/// In C++, whether we are code generating a thunk. This controls whether we
/// should emit cleanups.
- bool CurFuncIsThunk;
+ bool CurFuncIsThunk = false;
/// In ARC, whether we should autorelease the return value.
- bool AutoreleaseResult;
+ bool AutoreleaseResult = false;
/// Whether we processed a Microsoft-style asm block during CodeGen. These can
/// potentially set the return value.
- bool SawAsmBlock;
+ bool SawAsmBlock = false;
const FunctionDecl *CurSEHParent = nullptr;
/// True if the current function is an outlined SEH helper. This can be a
/// finally block or filter expression.
- bool IsOutlinedSEHHelper;
+ bool IsOutlinedSEHHelper = false;
- const CodeGen::CGBlockInfo *BlockInfo;
- llvm::Value *BlockPointer;
+ const CodeGen::CGBlockInfo *BlockInfo = nullptr;
+ llvm::Value *BlockPointer = nullptr;
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
- FieldDecl *LambdaThisCaptureField;
+ FieldDecl *LambdaThisCaptureField = nullptr;
- /// \brief A mapping from NRVO variables to the flags used to indicate
+ /// A mapping from NRVO variables to the flags used to indicate
/// when the NRVO has been applied to this variable.
llvm::DenseMap<const VarDecl *, llvm::Value *> NRVOFlags;
@@ -426,30 +509,33 @@ public:
/// The size of the following cleanup object.
unsigned Size;
/// The kind of cleanup to push: a value from the CleanupKind enumeration.
- CleanupKind Kind;
+ unsigned Kind : 31;
+ /// Whether this is a conditional cleanup.
+ unsigned IsConditional : 1;
size_t getSize() const { return Size; }
- CleanupKind getKind() const { return Kind; }
+ CleanupKind getKind() const { return (CleanupKind)Kind; }
+ bool isConditional() const { return IsConditional; }
};
/// i32s containing the indexes of the cleanup destinations.
- llvm::AllocaInst *NormalCleanupDest;
+ Address NormalCleanupDest = Address::invalid();
- unsigned NextCleanupDestIndex;
+ unsigned NextCleanupDestIndex = 1;
/// FirstBlockInfo - The head of a singly-linked-list of block layouts.
- CGBlockInfo *FirstBlockInfo;
+ CGBlockInfo *FirstBlockInfo = nullptr;
/// EHResumeBlock - Unified block containing a call to llvm.eh.resume.
- llvm::BasicBlock *EHResumeBlock;
+ llvm::BasicBlock *EHResumeBlock = nullptr;
/// The exception slot. All landing pads write the current exception pointer
/// into this alloca.
- llvm::Value *ExceptionSlot;
+ llvm::Value *ExceptionSlot = nullptr;
/// The selector slot. Under the MandatoryCleanup model, all landing pads
/// write the current selector value into this alloca.
- llvm::AllocaInst *EHSelectorSlot;
+ llvm::AllocaInst *EHSelectorSlot = nullptr;
/// A stack of exception code slots. Entering an __except block pushes a slot
/// on the stack and leaving pops one. The __exception_code() intrinsic loads
@@ -524,28 +610,52 @@ public:
initFullExprCleanup();
}
- /// \brief Queue a cleanup to be pushed after finishing the current
+ /// Queue a cleanup to be pushed after finishing the current
/// full-expression.
template <class T, class... As>
void pushCleanupAfterFullExpr(CleanupKind Kind, As... A) {
- assert(!isInConditionalBranch() && "can't defer conditional cleanup");
+ if (!isInConditionalBranch())
+ return pushCleanupAfterFullExprImpl<T>(Kind, Address::invalid(), A...);
+
+ Address ActiveFlag = createCleanupActiveFlag();
+ assert(!DominatingValue<Address>::needsSaving(ActiveFlag) &&
+ "cleanup active flag should never need saving");
- LifetimeExtendedCleanupHeader Header = { sizeof(T), Kind };
+ typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
+ SavedTuple Saved{saveValueInCond(A)...};
+
+ typedef EHScopeStack::ConditionalCleanup<T, As...> CleanupType;
+ pushCleanupAfterFullExprImpl<CleanupType>(Kind, ActiveFlag, Saved);
+ }
+
+ template <class T, class... As>
+ void pushCleanupAfterFullExprImpl(CleanupKind Kind, Address ActiveFlag,
+ As... A) {
+ LifetimeExtendedCleanupHeader Header = {sizeof(T), Kind,
+ ActiveFlag.isValid()};
size_t OldSize = LifetimeExtendedCleanupStack.size();
LifetimeExtendedCleanupStack.resize(
- LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size);
+ LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size +
+ (Header.IsConditional ? sizeof(ActiveFlag) : 0));
static_assert(sizeof(Header) % alignof(T) == 0,
"Cleanup will be allocated on misaligned address");
char *Buffer = &LifetimeExtendedCleanupStack[OldSize];
new (Buffer) LifetimeExtendedCleanupHeader(Header);
new (Buffer + sizeof(Header)) T(A...);
+ if (Header.IsConditional)
+ new (Buffer + sizeof(Header) + sizeof(T)) Address(ActiveFlag);
}
- /// Set up the last cleaup that was pushed as a conditional
+ /// Set up the last cleanup that was pushed as a conditional
/// full-expression cleanup.
- void initFullExprCleanup();
+ void initFullExprCleanup() {
+ initFullExprCleanupWithFlag(createCleanupActiveFlag());
+ }
+
+ void initFullExprCleanupWithFlag(Address ActiveFlag);
+ Address createCleanupActiveFlag();
/// PushDestructorCleanup - Push a cleanup to call the
/// complete-object destructor of an object of the given type at the
@@ -583,10 +693,10 @@ public:
void ActivateCleanupBlock(EHScopeStack::stable_iterator Cleanup,
llvm::Instruction *DominatingIP);
- /// \brief Enters a new scope for capturing cleanups, all of which
+ /// Enters a new scope for capturing cleanups, all of which
/// will be executed once the scope is exited.
class RunCleanupsScope {
- EHScopeStack::stable_iterator CleanupStackDepth;
+ EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth;
size_t LifetimeExtendedCleanupStackSize;
bool OldDidCallStackSave;
protected:
@@ -600,7 +710,7 @@ public:
CodeGenFunction& CGF;
public:
- /// \brief Enter a new cleanup scope.
+ /// Enter a new cleanup scope.
explicit RunCleanupsScope(CodeGenFunction &CGF)
: PerformCleanup(true), CGF(CGF)
{
@@ -609,20 +719,22 @@ public:
CGF.LifetimeExtendedCleanupStack.size();
OldDidCallStackSave = CGF.DidCallStackSave;
CGF.DidCallStackSave = false;
+ OldCleanupScopeDepth = CGF.CurrentCleanupScopeDepth;
+ CGF.CurrentCleanupScopeDepth = CleanupStackDepth;
}
- /// \brief Exit this cleanup scope, emitting any accumulated cleanups.
+ /// Exit this cleanup scope, emitting any accumulated cleanups.
~RunCleanupsScope() {
if (PerformCleanup)
ForceCleanup();
}
- /// \brief Determine whether this scope requires any cleanups.
+ /// Determine whether this scope requires any cleanups.
bool requiresCleanups() const {
return CGF.EHStack.stable_begin() != CleanupStackDepth;
}
- /// \brief Force the emission of cleanups now, instead of waiting
+ /// Force the emission of cleanups now, instead of waiting
/// until this object is destroyed.
/// \param ValuesToReload - A list of values that need to be available at
/// the insertion point after cleanup emission. If cleanup emission created
@@ -634,9 +746,14 @@ public:
CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize,
ValuesToReload);
PerformCleanup = false;
+ CGF.CurrentCleanupScopeDepth = OldCleanupScopeDepth;
}
};
+ // Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
+ EHScopeStack::stable_iterator CurrentCleanupScopeDepth =
+ EHScopeStack::stable_end();
+
class LexicalScope : public RunCleanupsScope {
SourceRange Range;
SmallVector<const LabelDecl*, 4> Labels;
@@ -646,7 +763,7 @@ public:
void operator=(const LexicalScope &) = delete;
public:
- /// \brief Enter a new cleanup scope.
+ /// Enter a new cleanup scope.
explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range)
: RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) {
CGF.CurLexicalScope = this;
@@ -659,7 +776,7 @@ public:
Labels.push_back(label);
}
- /// \brief Exit this cleanup scope, emitting any accumulated
+ /// Exit this cleanup scope, emitting any accumulated
/// cleanups.
~LexicalScope() {
if (CGDebugInfo *DI = CGF.getDebugInfo())
@@ -673,7 +790,7 @@ public:
}
}
- /// \brief Force the emission of cleanups now, instead of waiting
+ /// Force the emission of cleanups now, instead of waiting
/// until this object is destroyed.
void ForceCleanup() {
CGF.CurLexicalScope = ParentScope;
@@ -692,57 +809,107 @@ public:
typedef llvm::DenseMap<const Decl *, Address> DeclMapTy;
- /// \brief The scope used to remap some variables as private in the OpenMP
- /// loop body (or other captured region emitted without outlining), and to
- /// restore old vars back on exit.
- class OMPPrivateScope : public RunCleanupsScope {
+ /// The class used to assign some variables some temporarily addresses.
+ class OMPMapVars {
DeclMapTy SavedLocals;
- DeclMapTy SavedPrivates;
-
- private:
- OMPPrivateScope(const OMPPrivateScope &) = delete;
- void operator=(const OMPPrivateScope &) = delete;
+ DeclMapTy SavedTempAddresses;
+ OMPMapVars(const OMPMapVars &) = delete;
+ void operator=(const OMPMapVars &) = delete;
public:
- /// \brief Enter a new OpenMP private scope.
- explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {}
-
- /// \brief Registers \a LocalVD variable as a private and apply \a
- /// PrivateGen function for it to generate corresponding private variable.
- /// \a PrivateGen returns an address of the generated private variable.
- /// \return true if the variable is registered as private, false if it has
- /// been privatized already.
- bool
- addPrivate(const VarDecl *LocalVD,
- llvm::function_ref<Address()> PrivateGen) {
- assert(PerformCleanup && "adding private to dead scope");
+ explicit OMPMapVars() = default;
+ ~OMPMapVars() {
+ assert(SavedLocals.empty() && "Did not restored original addresses.");
+ };
+ /// Sets the address of the variable \p LocalVD to be \p TempAddr in
+ /// function \p CGF.
+ /// \return true if at least one variable was set already, false otherwise.
+ bool setVarAddr(CodeGenFunction &CGF, const VarDecl *LocalVD,
+ Address TempAddr) {
LocalVD = LocalVD->getCanonicalDecl();
// Only save it once.
if (SavedLocals.count(LocalVD)) return false;
// Copy the existing local entry to SavedLocals.
auto it = CGF.LocalDeclMap.find(LocalVD);
- if (it != CGF.LocalDeclMap.end()) {
- SavedLocals.insert({LocalVD, it->second});
- } else {
- SavedLocals.insert({LocalVD, Address::invalid()});
- }
+ if (it != CGF.LocalDeclMap.end())
+ SavedLocals.try_emplace(LocalVD, it->second);
+ else
+ SavedLocals.try_emplace(LocalVD, Address::invalid());
// Generate the private entry.
- Address Addr = PrivateGen();
QualType VarTy = LocalVD->getType();
if (VarTy->isReferenceType()) {
Address Temp = CGF.CreateMemTemp(VarTy);
- CGF.Builder.CreateStore(Addr.getPointer(), Temp);
- Addr = Temp;
+ CGF.Builder.CreateStore(TempAddr.getPointer(), Temp);
+ TempAddr = Temp;
}
- SavedPrivates.insert({LocalVD, Addr});
+ SavedTempAddresses.try_emplace(LocalVD, TempAddr);
return true;
}
- /// \brief Privatizes local variables previously registered as private.
+ /// Applies new addresses to the list of the variables.
+ /// \return true if at least one variable is using new address, false
+ /// otherwise.
+ bool apply(CodeGenFunction &CGF) {
+ copyInto(SavedTempAddresses, CGF.LocalDeclMap);
+ SavedTempAddresses.clear();
+ return !SavedLocals.empty();
+ }
+
+ /// Restores original addresses of the variables.
+ void restore(CodeGenFunction &CGF) {
+ if (!SavedLocals.empty()) {
+ copyInto(SavedLocals, CGF.LocalDeclMap);
+ SavedLocals.clear();
+ }
+ }
+
+ private:
+ /// Copy all the entries in the source map over the corresponding
+ /// entries in the destination, which must exist.
+ static void copyInto(const DeclMapTy &Src, DeclMapTy &Dest) {
+ for (auto &Pair : Src) {
+ if (!Pair.second.isValid()) {
+ Dest.erase(Pair.first);
+ continue;
+ }
+
+ auto I = Dest.find(Pair.first);
+ if (I != Dest.end())
+ I->second = Pair.second;
+ else
+ Dest.insert(Pair);
+ }
+ }
+ };
+
+ /// The scope used to remap some variables as private in the OpenMP loop body
+ /// (or other captured region emitted without outlining), and to restore old
+ /// vars back on exit.
+ class OMPPrivateScope : public RunCleanupsScope {
+ OMPMapVars MappedVars;
+ OMPPrivateScope(const OMPPrivateScope &) = delete;
+ void operator=(const OMPPrivateScope &) = delete;
+
+ public:
+ /// Enter a new OpenMP private scope.
+ explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {}
+
+ /// Registers \p LocalVD variable as a private and apply \p PrivateGen
+ /// function for it to generate corresponding private variable. \p
+ /// PrivateGen returns an address of the generated private variable.
+ /// \return true if the variable is registered as private, false if it has
+ /// been privatized already.
+ bool addPrivate(const VarDecl *LocalVD,
+ const llvm::function_ref<Address()> PrivateGen) {
+ assert(PerformCleanup && "adding private to dead scope");
+ return MappedVars.setVarAddr(CGF, LocalVD, PrivateGen());
+ }
+
+ /// Privatizes local variables previously registered as private.
/// Registration is separate from the actual privatization to allow
/// initializers use values of the original variables, not the private one.
/// This is important, for example, if the private variable is a class
@@ -750,19 +917,14 @@ public:
/// variables. But at initialization original variables must be used, not
/// private copies.
/// \return true if at least one variable was privatized, false otherwise.
- bool Privatize() {
- copyInto(SavedPrivates, CGF.LocalDeclMap);
- SavedPrivates.clear();
- return !SavedLocals.empty();
- }
+ bool Privatize() { return MappedVars.apply(CGF); }
void ForceCleanup() {
RunCleanupsScope::ForceCleanup();
- copyInto(SavedLocals, CGF.LocalDeclMap);
- SavedLocals.clear();
+ MappedVars.restore(CGF);
}
- /// \brief Exit scope - all the mapped variables are restored.
+ /// Exit scope - all the mapped variables are restored.
~OMPPrivateScope() {
if (PerformCleanup)
ForceCleanup();
@@ -773,34 +935,15 @@ public:
VD = VD->getCanonicalDecl();
return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
}
-
- private:
- /// Copy all the entries in the source map over the corresponding
- /// entries in the destination, which must exist.
- static void copyInto(const DeclMapTy &src, DeclMapTy &dest) {
- for (auto &pair : src) {
- if (!pair.second.isValid()) {
- dest.erase(pair.first);
- continue;
- }
-
- auto it = dest.find(pair.first);
- if (it != dest.end()) {
- it->second = pair.second;
- } else {
- dest.insert(pair);
- }
- }
- }
};
- /// \brief Takes the old cleanup stack size and emits the cleanup blocks
+ /// Takes the old cleanup stack size and emits the cleanup blocks
/// that have been added.
void
PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
std::initializer_list<llvm::Value **> ValuesToReload = {});
- /// \brief Takes the old cleanup stack size and emits the cleanup blocks
+ /// Takes the old cleanup stack size and emits the cleanup blocks
/// that have been added, then adds all lifetime-extended cleanups from
/// the given position to the stack.
void
@@ -843,7 +986,8 @@ public:
llvm::BasicBlock *getEHResumeBlock(bool isCleanup);
llvm::BasicBlock *getEHDispatchBlock(EHScopeStack::stable_iterator scope);
- llvm::BasicBlock *getMSVCDispatchBlock(EHScopeStack::stable_iterator scope);
+ llvm::BasicBlock *
+ getFuncletEHDispatchBlock(EHScopeStack::stable_iterator scope);
/// An object to manage conditionally-evaluated expressions.
class ConditionalEvaluation {
@@ -1052,22 +1196,27 @@ public:
private:
CGDebugInfo *DebugInfo;
- bool DisableDebugInfo;
+ bool DisableDebugInfo = false;
/// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid
/// calling llvm.stacksave for multiple VLAs in the same scope.
- bool DidCallStackSave;
+ bool DidCallStackSave = false;
/// IndirectBranch - The first time an indirect goto is seen we create a block
/// with an indirect branch. Every time we see the address of a label taken,
/// we add the label to the indirect goto. Every subsequent indirect goto is
/// codegen'd as a jump to the IndirectBranch's basic block.
- llvm::IndirectBrInst *IndirectBranch;
+ llvm::IndirectBrInst *IndirectBranch = nullptr;
/// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C
/// decls.
DeclMapTy LocalDeclMap;
+ // Keep track of the cleanups for callee-destructed parameters pushed to the
+ // cleanup stack so that they can be deactivated later.
+ llvm::DenseMap<const ParmVarDecl *, EHScopeStack::stable_iterator>
+ CalleeDestructedParamCleanups;
+
/// SizeArguments - If a ParmVarDecl had the pass_object_size attribute, this
/// will contain a mapping from said ParmVarDecl to its implicit "object_size"
/// parameter.
@@ -1119,7 +1268,7 @@ private:
/// Emits exit block with special codegen procedure specific for the related
/// OpenMP construct + emits code for normal construct cleanup.
void emitExit(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
- const llvm::function_ref<void(CodeGenFunction &)> &CodeGen) {
+ const llvm::function_ref<void(CodeGenFunction &)> CodeGen) {
if (Stack.back().Kind == Kind && getExitBlock().isValid()) {
assert(CGF.getOMPCancelDestination(Kind).isValid());
assert(CGF.HaveInsertPoint());
@@ -1207,13 +1356,13 @@ private:
/// SwitchInsn - This is nearest current switch instruction. It is null if
/// current context is not in a switch.
- llvm::SwitchInst *SwitchInsn;
+ llvm::SwitchInst *SwitchInsn = nullptr;
/// The branch weights of SwitchInsn when doing instrumentation based PGO.
- SmallVector<uint64_t, 16> *SwitchWeights;
+ SmallVector<uint64_t, 16> *SwitchWeights = nullptr;
/// CaseRangeBlock - This block holds if condition check for last case
/// statement range in current switch instruction.
- llvm::BasicBlock *CaseRangeBlock;
+ llvm::BasicBlock *CaseRangeBlock = nullptr;
/// OpaqueLValues - Keeps track of the current set of opaque value
/// expressions.
@@ -1230,13 +1379,13 @@ private:
/// A block containing a single 'unreachable' instruction. Created
/// lazily by getUnreachableBlock().
- llvm::BasicBlock *UnreachableBlock;
+ llvm::BasicBlock *UnreachableBlock = nullptr;
/// Counts of the number return expressions in the function.
- unsigned NumReturnExprs;
+ unsigned NumReturnExprs = 0;
/// Count the number of simple (constant) return expressions in the function.
- unsigned NumSimpleReturnExprs;
+ unsigned NumSimpleReturnExprs = 0;
/// The last regular (non-return) debug location (breakpoint) in the function.
SourceLocation LastStopPoint;
@@ -1356,9 +1505,9 @@ public:
private:
/// CXXThisDecl - When generating code for a C++ member function,
/// this will hold the implicit 'this' declaration.
- ImplicitParamDecl *CXXABIThisDecl;
- llvm::Value *CXXABIThisValue;
- llvm::Value *CXXThisValue;
+ ImplicitParamDecl *CXXABIThisDecl = nullptr;
+ llvm::Value *CXXABIThisValue = nullptr;
+ llvm::Value *CXXThisValue = nullptr;
CharUnits CXXABIThisAlignment;
CharUnits CXXThisAlignment;
@@ -1376,16 +1525,16 @@ private:
/// CXXStructorImplicitParamDecl - When generating code for a constructor or
/// destructor, this will hold the implicit argument (e.g. VTT).
- ImplicitParamDecl *CXXStructorImplicitParamDecl;
- llvm::Value *CXXStructorImplicitParamValue;
+ ImplicitParamDecl *CXXStructorImplicitParamDecl = nullptr;
+ llvm::Value *CXXStructorImplicitParamValue = nullptr;
/// OutermostConditional - Points to the outermost active
/// conditional control. This is used so that we know if a
/// temporary should be destroyed conditionally.
- ConditionalEvaluation *OutermostConditional;
+ ConditionalEvaluation *OutermostConditional = nullptr;
/// The current lexical scope.
- LexicalScope *CurLexicalScope;
+ LexicalScope *CurLexicalScope = nullptr;
/// The current source location that should be used for exception
/// handling code.
@@ -1416,14 +1565,21 @@ private:
CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>());
}
- llvm::BasicBlock *TerminateLandingPad;
- llvm::BasicBlock *TerminateHandler;
- llvm::BasicBlock *TrapBB;
+ llvm::BasicBlock *TerminateLandingPad = nullptr;
+ llvm::BasicBlock *TerminateHandler = nullptr;
+ llvm::BasicBlock *TrapBB = nullptr;
+
+ /// Terminate funclets keyed by parent funclet pad.
+ llvm::MapVector<llvm::Value *, llvm::BasicBlock *> TerminateFunclets;
+
+ /// Largest vector width used in ths function. Will be used to create a
+ /// function attribute.
+ unsigned LargestVectorWidth = 0;
/// True if we need emit the life-time markers.
const bool ShouldEmitLifetimeMarkers;
- /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
+ /// Add OpenCL kernel arg metadata and the kernel attribute metadata to
/// the function metadata.
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn);
@@ -1532,6 +1688,7 @@ public:
return false;
case QualType::DK_cxx_destructor:
case QualType::DK_objc_weak_lifetime:
+ case QualType::DK_nontrivial_c_struct:
return getLangOpts().Exceptions;
case QualType::DK_objc_strong_lifetime:
return getLangOpts().Exceptions &&
@@ -1579,10 +1736,7 @@ public:
/// \return an LLVM value which is a pointer to a struct which contains
/// information about the block, including the block invoke function, the
/// captured variables, etc.
- /// \param InvokeF will contain the block invoke function if it is not
- /// nullptr.
- llvm::Value *EmitBlockLiteral(const BlockExpr *,
- llvm::Function **InvokeF = nullptr);
+ llvm::Value *EmitBlockLiteral(const BlockExpr *);
static void destroyBlockInfos(CGBlockInfo *info);
llvm::Function *GenerateBlockFunction(GlobalDecl GD,
@@ -1604,7 +1758,25 @@ public:
class AutoVarEmission;
void emitByrefStructureInit(const AutoVarEmission &emission);
- void enterByrefCleanup(const AutoVarEmission &emission);
+
+ /// Enter a cleanup to destroy a __block variable. Note that this
+ /// cleanup should be a no-op if the variable hasn't left the stack
+ /// yet; if a cleanup is required for the variable itself, that needs
+ /// to be done externally.
+ ///
+ /// \param Kind Cleanup kind.
+ ///
+ /// \param Addr When \p LoadBlockVarAddr is false, the address of the __block
+ /// structure that will be passed to _Block_object_dispose. When
+ /// \p LoadBlockVarAddr is true, the address of the field of the block
+ /// structure that holds the address of the __block structure.
+ ///
+ /// \param Flags The flag that will be passed to _Block_object_dispose.
+ ///
+ /// \param LoadBlockVarAddr Indicates whether we need to emit a load from
+ /// \p Addr to get the address of the __block structure.
+ void enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags,
+ bool LoadBlockVarAddr);
void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum,
llvm::Value *ptr);
@@ -1627,7 +1799,7 @@ public:
void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
const CGFunctionInfo &FnInfo);
- /// \brief Emit code for the start of a function.
+ /// Emit code for the start of a function.
/// \param Loc The location to be associated with the function.
/// \param StartLoc The location of the function body.
void StartFunction(GlobalDecl GD,
@@ -1653,7 +1825,7 @@ public:
void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD);
void EmitAsanPrologueOrEpilogue(bool Prologue);
- /// \brief Emit the unified return block, trying to avoid its emission when
+ /// Emit the unified return block, trying to avoid its emission when
/// possible.
/// \return The debug location of the user written return statement if the
/// return block is is avoided.
@@ -1664,10 +1836,10 @@ public:
void FinishFunction(SourceLocation EndLoc=SourceLocation());
void StartThunk(llvm::Function *Fn, GlobalDecl GD,
- const CGFunctionInfo &FnInfo);
+ const CGFunctionInfo &FnInfo, bool IsUnprototyped);
- void EmitCallAndReturnForThunk(llvm::Constant *Callee,
- const ThunkInfo *Thunk);
+ void EmitCallAndReturnForThunk(llvm::Constant *Callee, const ThunkInfo *Thunk,
+ bool IsUnprototyped);
void FinishThunk();
@@ -1677,7 +1849,8 @@ public:
/// Generate a thunk for the given method.
void generateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo,
- GlobalDecl GD, const ThunkInfo &Thunk);
+ GlobalDecl GD, const ThunkInfo &Thunk,
+ bool IsUnprototyped);
llvm::Function *GenerateVarArgsThunk(llvm::Function *Fn,
const CGFunctionInfo &FnInfo,
@@ -1688,7 +1861,7 @@ public:
void EmitInitializerForField(FieldDecl *Field, LValue LHS, Expr *Init);
- /// Struct with all informations about dynamic [sub]class needed to set vptr.
+ /// Struct with all information about dynamic [sub]class needed to set vptr.
struct VPtr {
BaseSubobject Base;
const CXXRecordDecl *NearestVBase;
@@ -1723,9 +1896,11 @@ public:
CFITCK_DerivedCast,
CFITCK_UnrelatedCast,
CFITCK_ICall,
+ CFITCK_NVMFCall,
+ CFITCK_VMFCall,
};
- /// \brief Derived is the presumed address of an object of type T after a
+ /// Derived is the presumed address of an object of type T after a
/// cast. If T is a polymorphic class type, emit a check that the virtual
/// table for Derived belongs to a class derived from T.
void EmitVTablePtrCheckForCast(QualType T, llvm::Value *Derived,
@@ -1775,6 +1950,10 @@ public:
/// XRay custom event handling calls.
bool AlwaysEmitXRayCustomEvents() const;
+ /// AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit
+ /// XRay typed event handling calls.
+ bool AlwaysEmitXRayTypedEvents() const;
+
/// Encode an address into a form suitable for use in a function prologue.
llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F,
llvm::Constant *Addr);
@@ -1808,6 +1987,10 @@ public:
/// getTerminateLandingPad - Return a landing pad that just calls terminate.
llvm::BasicBlock *getTerminateLandingPad();
+ /// getTerminateLandingPad - Return a cleanup funclet that just calls
+ /// terminate.
+ llvm::BasicBlock *getTerminateFunclet();
+
/// getTerminateHandler - Return a handler (not a landing pad, just
/// a catch handler) that just calls terminate. This is used when
/// a terminate scope encloses a try.
@@ -1841,11 +2024,7 @@ public:
llvm::BasicBlock *createBasicBlock(const Twine &name = "",
llvm::Function *parent = nullptr,
llvm::BasicBlock *before = nullptr) {
-#ifdef NDEBUG
- return llvm::BasicBlock::Create(getLLVMContext(), "", parent, before);
-#else
return llvm::BasicBlock::Create(getLLVMContext(), name, parent, before);
-#endif
}
/// getBasicBlockForLabel - Return the LLVM basicblock that the specified
@@ -1975,15 +2154,20 @@ public:
/// to the stack.
///
/// Because the address of a temporary is often exposed to the program in
- /// various ways, this function will perform the cast by default. The cast
- /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is
+ /// various ways, this function will perform the cast. The original alloca
+ /// instruction is returned through \p Alloca if it is not nullptr.
+ ///
+ /// The cast is not performaed in CreateTempAllocaWithoutCast. This is
/// more efficient if the caller knows that the address will not be exposed.
llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
llvm::Value *ArraySize = nullptr);
Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
const Twine &Name = "tmp",
llvm::Value *ArraySize = nullptr,
- bool CastToDefaultAddrSpace = true);
+ Address *Alloca = nullptr);
+ Address CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align,
+ const Twine &Name = "tmp",
+ llvm::Value *ArraySize = nullptr);
/// CreateDefaultAlignedTempAlloca - This creates an alloca with the
/// default ABI alignment of the given LLVM type.
@@ -2018,12 +2202,18 @@ public:
Address CreateIRTemp(QualType T, const Twine &Name = "tmp");
/// CreateMemTemp - Create a temporary memory object of the given type, with
- /// appropriate alignment. Cast it to the default address space if
- /// \p CastToDefaultAddrSpace is true.
+ /// appropriate alignmen and cast it to the default address space. Returns
+ /// the original alloca instruction by \p Alloca if it is not nullptr.
Address CreateMemTemp(QualType T, const Twine &Name = "tmp",
- bool CastToDefaultAddrSpace = true);
+ Address *Alloca = nullptr);
Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp",
- bool CastToDefaultAddrSpace = true);
+ Address *Alloca = nullptr);
+
+ /// CreateMemTemp - Create a temporary memory object of the given type, with
+ /// appropriate alignmen without casting it to the default address space.
+ Address CreateMemTempWithoutCast(QualType T, const Twine &Name = "tmp");
+ Address CreateMemTempWithoutCast(QualType T, CharUnits Align,
+ const Twine &Name = "tmp");
/// CreateAggTemp - Create a temporary memory object for the given
/// aggregate type.
@@ -2032,7 +2222,8 @@ public:
T.getQualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
}
/// Emit a cast to void* in the appropriate address space.
@@ -2089,31 +2280,52 @@ public:
}
return false;
}
- /// EmitAggregateCopy - Emit an aggregate assignment.
- ///
- /// The difference to EmitAggregateCopy is that tail padding is not copied.
- /// This is required for correctness when assigning non-POD structures in C++.
- void EmitAggregateAssign(Address DestPtr, Address SrcPtr,
- QualType EltTy) {
+
+ /// Determine whether a return value slot may overlap some other object.
+ AggValueSlot::Overlap_t overlapForReturnValue() {
+ // FIXME: Assuming no overlap here breaks guaranteed copy elision for base
+ // class subobjects. These cases may need to be revisited depending on the
+ // resolution of the relevant core issue.
+ return AggValueSlot::DoesNotOverlap;
+ }
+
+ /// Determine whether a field initialization may overlap some other object.
+ AggValueSlot::Overlap_t overlapForFieldInit(const FieldDecl *FD) {
+ // FIXME: These cases can result in overlap as a result of P0840R0's
+ // [[no_unique_address]] attribute. We can still infer NoOverlap in the
+ // presence of that attribute if the field is within the nvsize of its
+ // containing class, because non-virtual subobjects are initialized in
+ // address order.
+ return AggValueSlot::DoesNotOverlap;
+ }
+
+ /// Determine whether a base class initialization may overlap some other
+ /// object.
+ AggValueSlot::Overlap_t overlapForBaseInit(const CXXRecordDecl *RD,
+ const CXXRecordDecl *BaseRD,
+ bool IsVirtual);
+
+ /// Emit an aggregate assignment.
+ void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) {
bool IsVolatile = hasVolatileMember(EltTy);
- EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, true);
+ EmitAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile);
}
- void EmitAggregateCopyCtor(Address DestPtr, Address SrcPtr,
- QualType DestTy, QualType SrcTy) {
- EmitAggregateCopy(DestPtr, SrcPtr, SrcTy, /*IsVolatile=*/false,
- /*IsAssignment=*/false);
+ void EmitAggregateCopyCtor(LValue Dest, LValue Src,
+ AggValueSlot::Overlap_t MayOverlap) {
+ EmitAggregateCopy(Dest, Src, Src.getType(), MayOverlap);
}
/// EmitAggregateCopy - Emit an aggregate copy.
///
- /// \param isVolatile - True iff either the source or the destination is
- /// volatile.
- /// \param isAssignment - If false, allow padding to be copied. This often
- /// yields more efficient.
- void EmitAggregateCopy(Address DestPtr, Address SrcPtr,
- QualType EltTy, bool isVolatile=false,
- bool isAssignment = false);
+ /// \param isVolatile \c true iff either the source or the destination is
+ /// volatile.
+ /// \param MayOverlap Whether the tail padding of the destination might be
+ /// occupied by some other object. More efficient code can often be
+ /// generated if not.
+ void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy,
+ AggValueSlot::Overlap_t MayOverlap,
+ bool isVolatile = false);
/// GetAddrOfLocalVar - Return the address of a local variable.
Address GetAddrOfLocalVar(const VarDecl *VD) {
@@ -2123,27 +2335,13 @@ public:
return it->second;
}
- /// getOpaqueLValueMapping - Given an opaque value expression (which
- /// must be mapped to an l-value), return its mapping.
- const LValue &getOpaqueLValueMapping(const OpaqueValueExpr *e) {
- assert(OpaqueValueMapping::shouldBindAsLValue(e));
+ /// Given an opaque value expression, return its LValue mapping if it exists,
+ /// otherwise create one.
+ LValue getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e);
- llvm::DenseMap<const OpaqueValueExpr*,LValue>::iterator
- it = OpaqueLValues.find(e);
- assert(it != OpaqueLValues.end() && "no mapping for opaque value!");
- return it->second;
- }
-
- /// getOpaqueRValueMapping - Given an opaque value expression (which
- /// must be mapped to an r-value), return its mapping.
- const RValue &getOpaqueRValueMapping(const OpaqueValueExpr *e) {
- assert(!OpaqueValueMapping::shouldBindAsLValue(e));
-
- llvm::DenseMap<const OpaqueValueExpr*,RValue>::iterator
- it = OpaqueRValues.find(e);
- assert(it != OpaqueRValues.end() && "no mapping for opaque value!");
- return it->second;
- }
+ /// Given an opaque value expression, return its RValue mapping if it exists,
+ /// otherwise create one.
+ RValue getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e);
/// Get the index of the current ArrayInitLoopExpr, if any.
llvm::Value *getArrayInitIndex() { return ArrayInitIndex; }
@@ -2193,12 +2391,24 @@ public:
/// This function can be called with a null (unreachable) insert point.
void EmitVariablyModifiedType(QualType Ty);
- /// getVLASize - Returns an LLVM value that corresponds to the size,
+ struct VlaSizePair {
+ llvm::Value *NumElts;
+ QualType Type;
+
+ VlaSizePair(llvm::Value *NE, QualType T) : NumElts(NE), Type(T) {}
+ };
+
+ /// Return the number of elements for a single dimension
+ /// for the given array type.
+ VlaSizePair getVLAElements1D(const VariableArrayType *vla);
+ VlaSizePair getVLAElements1D(QualType vla);
+
+ /// Returns an LLVM value that corresponds to the size,
/// in non-variably-sized elements, of a variable length array type,
/// plus that largest non-variably-sized element type. Assumes that
/// the type has already been emitted with EmitVariablyModifiedType.
- std::pair<llvm::Value*,QualType> getVLASize(const VariableArrayType *vla);
- std::pair<llvm::Value*,QualType> getVLASize(QualType vla);
+ VlaSizePair getVLASize(const VariableArrayType *vla);
+ VlaSizePair getVLASize(QualType vla);
/// LoadCXXThis - Load the value of 'this'. This function is only valid while
/// generating code for an C++ member function.
@@ -2279,11 +2489,14 @@ public:
void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating,
- Address This, const CXXConstructExpr *E);
+ Address This, const CXXConstructExpr *E,
+ AggValueSlot::Overlap_t Overlap);
void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating,
- Address This, CallArgList &Args);
+ Address This, CallArgList &Args,
+ AggValueSlot::Overlap_t Overlap,
+ SourceLocation Loc);
/// Emit assumption load for all bases. Requires to be be called only on
/// most-derived class and not under construction of the object.
@@ -2333,13 +2546,13 @@ public:
CharUnits CookieSize = CharUnits());
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
- const Expr *Arg, bool IsDelete);
+ const CallExpr *TheCallExpr, bool IsDelete);
llvm::Value *EmitCXXTypeidExpr(const CXXTypeidExpr *E);
llvm::Value *EmitDynamicCast(Address V, const CXXDynamicCastExpr *DCE);
Address EmitCXXUuidofExpr(const CXXUuidofExpr *E);
- /// \brief Situations in which we might emit a check for the suitability of a
+ /// Situations in which we might emit a check for the suitability of a
/// pointer or glvalue.
enum TypeCheckKind {
/// Checking the operand of a load. Must be suitably sized and aligned.
@@ -2383,17 +2596,17 @@ public:
/// Determine whether the pointer type check \p TCK requires a vptr check.
static bool isVptrCheckRequired(TypeCheckKind TCK, QualType Ty);
- /// \brief Whether any type-checking sanitizers are enabled. If \c false,
+ /// Whether any type-checking sanitizers are enabled. If \c false,
/// calls to EmitTypeCheck can be skipped.
bool sanitizePerformTypeCheck() const;
- /// \brief Emit a check that \p V is the address of storage of the
+ /// Emit a check that \p V is the address of storage of the
/// appropriate size and alignment for an object of type \p Type.
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
QualType Type, CharUnits Alignment = CharUnits::Zero(),
SanitizerSet SkippedChecks = SanitizerSet());
- /// \brief Emit a check that \p Base points into an array object, which
+ /// Emit a check that \p Base points into an array object, which
/// we can access at index \p Index. \p Accessed should be \c false if we
/// this expression is used as an lvalue, for instance in "&Arr[Idx]".
void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index,
@@ -2434,7 +2647,7 @@ public:
typedef void SpecialInitFn(CodeGenFunction &Init, const VarDecl &D,
llvm::Value *Address);
- /// \brief Determine whether the given initializer is trivial in the sense
+ /// Determine whether the given initializer is trivial in the sense
/// that it requires no code to be generated.
bool isTrivialInitializer(const Expr *Init);
@@ -2448,7 +2661,9 @@ public:
const VarDecl *Variable;
- /// The address of the alloca. Invalid if the variable was emitted
+ /// The address of the alloca for languages with explicit address space
+ /// (e.g. OpenCL) or alloca casted to generic pointer for address space
+ /// agnostic languages (e.g. C++). Invalid if the variable was emitted
/// as a global constant.
Address Addr;
@@ -2464,13 +2679,19 @@ public:
/// Non-null if we should use lifetime annotations.
llvm::Value *SizeForLifetimeMarkers;
+ /// Address with original alloca instruction. Invalid if the variable was
+ /// emitted as a global constant.
+ Address AllocaAddr;
+
struct Invalid {};
- AutoVarEmission(Invalid) : Variable(nullptr), Addr(Address::invalid()) {}
+ AutoVarEmission(Invalid)
+ : Variable(nullptr), Addr(Address::invalid()),
+ AllocaAddr(Address::invalid()) {}
AutoVarEmission(const VarDecl &variable)
- : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
- IsByRef(false), IsConstantAggregate(false),
- SizeForLifetimeMarkers(nullptr) {}
+ : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
+ IsByRef(false), IsConstantAggregate(false),
+ SizeForLifetimeMarkers(nullptr), AllocaAddr(Address::invalid()) {}
bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
@@ -2486,11 +2707,15 @@ public:
}
/// Returns the raw, allocated address, which is not necessarily
- /// the address of the object itself.
+ /// the address of the object itself. It is casted to default
+ /// address space for address space agnostic languages.
Address getAllocatedAddress() const {
return Addr;
}
+ /// Returns the address for the original alloca instruction.
+ Address getOriginalAllocatedAddress() const { return AllocaAddr; }
+
/// Returns the address of the object within this declaration.
/// Note that this does not chase the forwarding pointer for
/// __block decls.
@@ -2506,6 +2731,15 @@ public:
void emitAutoVarTypeCleanup(const AutoVarEmission &emission,
QualType::DestructionKind dtorKind);
+ /// Emits the alloca and debug information for the size expressions for each
+ /// dimension of an array. It registers the association of its (1-dimensional)
+ /// QualTypes and size expression's debug node, so that CGDebugInfo can
+ /// reference this node when creating the DISubrange object to describe the
+ /// array types.
+ void EmitAndRegisterVariableArrayDimensions(CGDebugInfo *DI,
+ const VarDecl &D,
+ bool EmitDebugInfo);
+
void EmitStaticVarDecl(const VarDecl &D,
llvm::GlobalValue::LinkageTypes Linkage);
@@ -2655,6 +2889,9 @@ public:
llvm::Value *EmitSEHExceptionInfo();
llvm::Value *EmitSEHAbnormalTermination();
+ /// Emit simple code for OpenMP directives in Simd-only mode.
+ void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D);
+
/// Scan the outlined statement for captures from the parent function. For
/// each capture, mark the capture as escaped and emit a call to
/// llvm.localrecover. Insert the localrecover result into the LocalDeclMap.
@@ -2697,7 +2934,7 @@ public:
SmallVectorImpl<llvm::Value *> &CapturedVars);
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
SourceLocation Loc);
- /// \brief Perform element by element copying of arrays with type \a
+ /// Perform element by element copying of arrays with type \a
/// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
/// generated by \a CopyGen.
///
@@ -2708,8 +2945,8 @@ public:
/// to another single array element.
void EmitOMPAggregateAssign(
Address DestAddr, Address SrcAddr, QualType OriginalType,
- const llvm::function_ref<void(Address, Address)> &CopyGen);
- /// \brief Emit proper copying of data from one variable to another.
+ const llvm::function_ref<void(Address, Address)> CopyGen);
+ /// Emit proper copying of data from one variable to another.
///
/// \param OriginalType Original type of the copied variables.
/// \param DestAddr Destination address.
@@ -2724,7 +2961,7 @@ public:
Address DestAddr, Address SrcAddr,
const VarDecl *DestVD, const VarDecl *SrcVD,
const Expr *Copy);
- /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or
+ /// Emit atomic update code for constructs: \a X = \a X \a BO \a E or
/// \a X = \a E \a BO \a E.
///
/// \param X Value to be updated.
@@ -2740,7 +2977,7 @@ public:
std::pair<bool, RValue> EmitOMPAtomicSimpleUpdateExpr(
LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
llvm::AtomicOrdering AO, SourceLocation Loc,
- const llvm::function_ref<RValue(RValue)> &CommonGen);
+ const llvm::function_ref<RValue(RValue)> CommonGen);
bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
void EmitOMPPrivateClause(const OMPExecutableDirective &D,
@@ -2748,7 +2985,7 @@ public:
void EmitOMPUseDevicePtrClause(
const OMPClause &C, OMPPrivateScope &PrivateScope,
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
- /// \brief Emit code for copyin clause in \a D directive. The next code is
+ /// Emit code for copyin clause in \a D directive. The next code is
/// generated at the start of outlined functions for directives:
/// \code
/// threadprivate_var1 = master_threadprivate_var1;
@@ -2760,7 +2997,7 @@ public:
/// \param D OpenMP directive possibly with 'copyin' clause(s).
/// \returns true if at least one copyin variable is found, false otherwise.
bool EmitOMPCopyinClause(const OMPExecutableDirective &D);
- /// \brief Emit initial code for lastprivate variables. If some variable is
+ /// Emit initial code for lastprivate variables. If some variable is
/// not also firstprivate, then the default initialization is used. Otherwise
/// initialization of this variable is performed by EmitOMPFirstprivateClause
/// method.
@@ -2773,7 +3010,7 @@ public:
/// otherwise.
bool EmitOMPLastprivateClauseInit(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
- /// \brief Emit final copying of lastprivate values to original variables at
+ /// Emit final copying of lastprivate values to original variables at
/// the end of the worksharing or simd directive.
///
/// \param D Directive that has at least one 'lastprivate' directives.
@@ -2791,8 +3028,8 @@ public:
/// linear clause.
void EmitOMPLinearClauseFinal(
const OMPLoopDirective &D,
- const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
- /// \brief Emit initial code for reduction variables. Creates reduction copies
+ const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen);
+ /// Emit initial code for reduction variables. Creates reduction copies
/// and initializes them with the values according to OpenMP standard.
///
/// \param D Directive (possibly) with the 'reduction' clause.
@@ -2801,14 +3038,14 @@ public:
///
void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
- /// \brief Emit final update of reduction values to original variables at
+ /// Emit final update of reduction values to original variables at
/// the end of the directive.
///
/// \param D Directive that has at least one 'reduction' directives.
/// \param ReductionKind The kind of reduction to perform.
void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D,
const OpenMPDirectiveKind ReductionKind);
- /// \brief Emit initial code for linear variables. Creates private copies
+ /// Emit initial code for linear variables. Creates private copies
/// and initializes them with the values according to OpenMP standard.
///
/// \param D Directive (possibly) with the 'linear' clause.
@@ -2821,6 +3058,7 @@ public:
const OMPTaskDataTy & /*Data*/)>
TaskGenTy;
void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+ const OpenMPDirectiveKind CapturedRegion,
const RegionCodeGenTy &BodyGen,
const TaskGenTy &TaskGen, OMPTaskDataTy &Data);
struct OMPTargetDataInfo {
@@ -2930,7 +3168,16 @@ public:
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM,
StringRef ParentName,
const OMPTargetSimdDirective &S);
- /// \brief Emit inner loop of the worksharing/simd construct.
+ /// Emit device code for the target teams distribute parallel for simd
+ /// directive.
+ static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S);
+
+ static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDistributeParallelForDirective &S);
+ /// Emit inner loop of the worksharing/simd construct.
///
/// \param S Directive, for which the inner loop must be emitted.
/// \param RequiresCleanup true, if directive has some associated private
@@ -2943,8 +3190,8 @@ public:
void EmitOMPInnerLoop(
const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
const Expr *IncExpr,
- const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
- const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen);
+ const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
+ const llvm::function_ref<void(CodeGenFunction &)> PostIncGen);
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind);
/// Emit initial code for loop counters of loop-based directives.
@@ -2954,7 +3201,7 @@ public:
/// Helper for the OpenMP loop directives.
void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
- /// \brief Emit code for the worksharing loop-based directive.
+ /// Emit code for the worksharing loop-based directive.
/// \return true, if this construct has any lastprivate clause, false -
/// otherwise.
bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB,
@@ -2969,17 +3216,14 @@ public:
void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
void EmitOMPSimdFinal(
const OMPLoopDirective &D,
- const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
+ const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen);
/// Emits the lvalue for the expression with possibly captured variable.
LValue EmitOMPSharedLValue(const Expr *E);
private:
- /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not
- /// nullptr. It should be called without \p InvokeF if the caller does not
- /// need invoke function to be returned.
- llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info,
- llvm::Function **InvokeF = nullptr);
+ /// Helpers for blocks.
+ llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
/// struct with the values to be passed to the OpenMP loop-related functions
struct OMPLoopArguments {
@@ -3030,7 +3274,7 @@ private:
OMPPrivateScope &LoopScope,
const OMPLoopArguments &LoopArgs,
const CodeGenLoopTy &CodeGenLoopContent);
- /// \brief Emit code for sections directive.
+ /// Emit code for sections directive.
void EmitSections(const OMPExecutableDirective &S);
public:
@@ -3071,7 +3315,7 @@ public:
///
LValue EmitLValue(const Expr *E);
- /// \brief Same as EmitLValue but additionally we generate checking code to
+ /// Same as EmitLValue but additionally we generate checking code to
/// guard against undefined behavior. This is only suitable when we know
/// that the address will be used to access the object.
LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK);
@@ -3332,6 +3576,9 @@ public:
ArrayRef<llvm::Value*> args,
const Twine &name = "");
+ SmallVector<llvm::OperandBundleDef, 1>
+ getBundlesForFunclet(llvm::Value *Callee);
+
llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee,
ArrayRef<llvm::Value *> Args,
const Twine &Name = "");
@@ -3351,6 +3598,16 @@ public:
CXXDtorType Type,
const CXXRecordDecl *RD);
+ // These functions emit calls to the special functions of non-trivial C
+ // structs.
+ void defaultInitNonTrivialCStructVar(LValue Dst);
+ void callCStructDefaultConstructor(LValue Dst);
+ void callCStructDestructor(LValue Dst);
+ void callCStructCopyConstructor(LValue Dst, LValue Src);
+ void callCStructMoveConstructor(LValue Dst, LValue Src);
+ void callCStructCopyAssignmentOperator(LValue Dst, LValue Src);
+ void callCStructMoveAssignmentOperator(LValue Dst, LValue Src);
+
RValue
EmitCXXMemberOrOperatorCall(const CXXMethodDecl *Method,
const CGCallee &Callee,
@@ -3424,6 +3681,10 @@ public:
SmallVectorImpl<llvm::Value *> &Ops,
Address PtrOp0, Address PtrOp1,
llvm::Triple::ArchType Arch);
+
+ llvm::Value *EmitISOVolatileLoad(const CallExpr *E);
+ llvm::Value *EmitISOVolatileStore(const CallExpr *E);
+
llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
unsigned Modifier, llvm::Type *ArgTy,
const CallExpr *E);
@@ -3482,6 +3743,8 @@ public:
llvm::Value *EmitARCLoadWeak(Address addr);
llvm::Value *EmitARCLoadWeakRetained(Address addr);
llvm::Value *EmitARCStoreWeak(Address addr, llvm::Value *value, bool ignored);
+ void emitARCCopyAssignWeak(QualType Ty, Address DstAddr, Address SrcAddr);
+ void emitARCMoveAssignWeak(QualType Ty, Address DstAddr, Address SrcAddr);
void EmitARCCopyWeak(Address dst, Address src);
void EmitARCMoveWeak(Address dst, Address src);
llvm::Value *EmitARCRetainAutorelease(QualType type, llvm::Value *value);
@@ -3525,6 +3788,7 @@ public:
static Destroyer destroyARCStrongPrecise;
static Destroyer destroyARCWeak;
static Destroyer emitARCIntrinsicUse;
+ static Destroyer destroyNonTrivialCStruct;
void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr);
llvm::Value *EmitObjCAutoreleasePoolPush();
@@ -3532,7 +3796,7 @@ public:
void EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr);
void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr);
- /// \brief Emits a reference binding to the passed in expression.
+ /// Emits a reference binding to the passed in expression.
RValue EmitReferenceBindingToExpr(const Expr *E);
//===--------------------------------------------------------------------===//
@@ -3610,6 +3874,9 @@ public:
void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::Constant *fn,
llvm::Constant *addr);
+ /// Call atexit() with function dtorStub.
+ void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);
+
/// Emit code in this function to perform a guarded variable
/// initialization. Guarded initializations are used when it's not
/// possible to prove that an initialization will be done exactly
@@ -3746,26 +4013,26 @@ public:
/// enabled, a runtime check specified by \p Kind is also emitted.
llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind);
- /// \brief Emit a description of a type in a format suitable for passing to
+ /// Emit a description of a type in a format suitable for passing to
/// a runtime sanitizer handler.
llvm::Constant *EmitCheckTypeDescriptor(QualType T);
- /// \brief Convert a value into a format suitable for passing to a runtime
+ /// Convert a value into a format suitable for passing to a runtime
/// sanitizer handler.
llvm::Value *EmitCheckValue(llvm::Value *V);
- /// \brief Emit a description of a source location in a format suitable for
+ /// Emit a description of a source location in a format suitable for
/// passing to a runtime sanitizer handler.
llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc);
- /// \brief Create a basic block that will call a handler function in a
+ /// Create a basic block that will call a handler function in a
/// sanitizer runtime with the provided arguments, and create a conditional
/// branch to it.
void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked,
SanitizerHandler Check, ArrayRef<llvm::Constant *> StaticArgs,
ArrayRef<llvm::Value *> DynamicArgs);
- /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath
+ /// Emit a slow path cross-DSO CFI check which calls __cfi_slowpath
/// if Cond if false.
void EmitCfiSlowPathCheck(SanitizerMask Kind, llvm::Value *Cond,
llvm::ConstantInt *TypeId, llvm::Value *Ptr,
@@ -3775,21 +4042,21 @@ public:
/// checking is enabled. Otherwise, just emit an unreachable instruction.
void EmitUnreachable(SourceLocation Loc);
- /// \brief Create a basic block that will call the trap intrinsic, and emit a
+ /// Create a basic block that will call the trap intrinsic, and emit a
/// conditional branch to it, for the -ftrapv checks.
void EmitTrapCheck(llvm::Value *Checked);
- /// \brief Emit a call to trap or debugtrap and attach function attribute
+ /// Emit a call to trap or debugtrap and attach function attribute
/// "trap-func-name" if specified.
llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);
- /// \brief Emit a stub for the cross-DSO CFI check function.
+ /// Emit a stub for the cross-DSO CFI check function.
void EmitCfiCheckStub();
- /// \brief Emit a cross-DSO CFI failure handling function.
+ /// Emit a cross-DSO CFI failure handling function.
void EmitCfiCheckFail();
- /// \brief Create a check for a function parameter that may potentially be
+ /// Create a check for a function parameter that may potentially be
/// declared as non-null.
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
AbstractCallee AC, unsigned ParmNum);
@@ -3829,10 +4096,10 @@ private:
void ExpandTypeFromArgs(QualType Ty, LValue Dst,
SmallVectorImpl<llvm::Value *>::iterator &AI);
- /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg
+ /// ExpandTypeToArgs - Expand an CallArg \arg Arg, with the LLVM type for \arg
/// Ty, into individual arguments on the provided vector \arg IRCallArgs,
/// starting at index \arg IRCallArgPos. See ABIArgInfo::Expand.
- void ExpandTypeToArgs(QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy,
+ void ExpandTypeToArgs(QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
SmallVectorImpl<llvm::Value *> &IRCallArgs,
unsigned &IRCallArgPos);
@@ -3844,7 +4111,7 @@ private:
std::string &ConstraintStr,
SourceLocation Loc);
- /// \brief Attempts to statically evaluate the object size of E. If that
+ /// Attempts to statically evaluate the object size of E. If that
/// fails, emits code to figure the size of E out for us. This is
/// pass_object_size aware.
///
@@ -3853,7 +4120,7 @@ private:
llvm::IntegerType *ResType,
llvm::Value *EmittedE);
- /// \brief Emits the size of E, as required by __builtin_object_size. This
+ /// Emits the size of E, as required by __builtin_object_size. This
/// function is aware of pass_object_size parameters, and will act accordingly
/// if E is a parameter with the pass_object_size attribute.
llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type,
@@ -3973,6 +4240,48 @@ public:
void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
+ struct TargetMultiVersionResolverOption {
+ llvm::Function *Function;
+ TargetAttr::ParsedTargetAttr ParsedAttribute;
+ unsigned Priority;
+ TargetMultiVersionResolverOption(
+ const TargetInfo &TargInfo, llvm::Function *F,
+ const clang::TargetAttr::ParsedTargetAttr &PT)
+ : Function(F), ParsedAttribute(PT), Priority(0u) {
+ for (StringRef Feat : PT.Features)
+ Priority = std::max(Priority,
+ TargInfo.multiVersionSortPriority(Feat.substr(1)));
+
+ if (!PT.Architecture.empty())
+ Priority = std::max(Priority,
+ TargInfo.multiVersionSortPriority(PT.Architecture));
+ }
+
+ bool operator>(const TargetMultiVersionResolverOption &Other) const {
+ return Priority > Other.Priority;
+ }
+ };
+ void EmitTargetMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<TargetMultiVersionResolverOption> Options);
+
+ struct CPUDispatchMultiVersionResolverOption {
+ llvm::Function *Function;
+ // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask
+ // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports,
+ // this can be extended to 64 bits.
+ uint32_t FeatureMask;
+ CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask)
+ : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {}
+ bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const {
+ return FeatureMask > Other.FeatureMask;
+ }
+ };
+ void EmitCPUDispatchMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<CPUDispatchMultiVersionResolverOption> Options);
+ static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+
private:
QualType getVarArgType(const Expr *Arg);
@@ -3988,110 +4297,35 @@ private:
llvm::Value *EmitX86CpuIs(StringRef CPUStr);
llvm::Value *EmitX86CpuSupports(const CallExpr *E);
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
+ llvm::Value *EmitX86CpuSupports(uint32_t Mask);
llvm::Value *EmitX86CpuInit();
+ llvm::Value *
+ FormResolverCondition(const TargetMultiVersionResolverOption &RO);
};
-/// Helper class with most of the code for saving a value for a
-/// conditional expression cleanup.
-struct DominatingLLVMValue {
- typedef llvm::PointerIntPair<llvm::Value*, 1, bool> saved_type;
-
- /// Answer whether the given value needs extra work to be saved.
- static bool needsSaving(llvm::Value *value) {
- // If it's not an instruction, we don't need to save.
- if (!isa<llvm::Instruction>(value)) return false;
-
- // If it's an instruction in the entry block, we don't need to save.
- llvm::BasicBlock *block = cast<llvm::Instruction>(value)->getParent();
- return (block != &block->getParent()->getEntryBlock());
- }
-
- /// Try to save the given value.
- static saved_type save(CodeGenFunction &CGF, llvm::Value *value) {
- if (!needsSaving(value)) return saved_type(value, false);
-
- // Otherwise, we need an alloca.
- auto align = CharUnits::fromQuantity(
- CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType()));
- Address alloca =
- CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save");
- CGF.Builder.CreateStore(value, alloca);
-
- return saved_type(alloca.getPointer(), true);
- }
-
- static llvm::Value *restore(CodeGenFunction &CGF, saved_type value) {
- // If the value says it wasn't saved, trust that it's still dominating.
- if (!value.getInt()) return value.getPointer();
-
- // Otherwise, it should be an alloca instruction, as set up in save().
- auto alloca = cast<llvm::AllocaInst>(value.getPointer());
- return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment());
- }
-};
-
-/// A partial specialization of DominatingValue for llvm::Values that
-/// might be llvm::Instructions.
-template <class T> struct DominatingPointer<T,true> : DominatingLLVMValue {
- typedef T *type;
- static type restore(CodeGenFunction &CGF, saved_type value) {
- return static_cast<T*>(DominatingLLVMValue::restore(CGF, value));
- }
-};
-
-/// A specialization of DominatingValue for Address.
-template <> struct DominatingValue<Address> {
- typedef Address type;
-
- struct saved_type {
- DominatingLLVMValue::saved_type SavedValue;
- CharUnits Alignment;
- };
-
- static bool needsSaving(type value) {
- return DominatingLLVMValue::needsSaving(value.getPointer());
- }
- static saved_type save(CodeGenFunction &CGF, type value) {
- return { DominatingLLVMValue::save(CGF, value.getPointer()),
- value.getAlignment() };
- }
- static type restore(CodeGenFunction &CGF, saved_type value) {
- return Address(DominatingLLVMValue::restore(CGF, value.SavedValue),
- value.Alignment);
- }
-};
-
-/// A specialization of DominatingValue for RValue.
-template <> struct DominatingValue<RValue> {
- typedef RValue type;
- class saved_type {
- enum Kind { ScalarLiteral, ScalarAddress, AggregateLiteral,
- AggregateAddress, ComplexAddress };
+inline DominatingLLVMValue::saved_type
+DominatingLLVMValue::save(CodeGenFunction &CGF, llvm::Value *value) {
+ if (!needsSaving(value)) return saved_type(value, false);
- llvm::Value *Value;
- unsigned K : 3;
- unsigned Align : 29;
- saved_type(llvm::Value *v, Kind k, unsigned a = 0)
- : Value(v), K(k), Align(a) {}
+ // Otherwise, we need an alloca.
+ auto align = CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType()));
+ Address alloca =
+ CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save");
+ CGF.Builder.CreateStore(value, alloca);
- public:
- static bool needsSaving(RValue value);
- static saved_type save(CodeGenFunction &CGF, RValue value);
- RValue restore(CodeGenFunction &CGF);
+ return saved_type(alloca.getPointer(), true);
+}
- // implementations in CGCleanup.cpp
- };
+inline llvm::Value *DominatingLLVMValue::restore(CodeGenFunction &CGF,
+ saved_type value) {
+ // If the value says it wasn't saved, trust that it's still dominating.
+ if (!value.getInt()) return value.getPointer();
- static bool needsSaving(type value) {
- return saved_type::needsSaving(value);
- }
- static saved_type save(CodeGenFunction &CGF, type value) {
- return saved_type::save(CGF, value);
- }
- static type restore(CodeGenFunction &CGF, saved_type value) {
- return value.restore(CGF);
- }
-};
+ // Otherwise, it should be an alloca instruction, as set up in save().
+ auto alloca = cast<llvm::AllocaInst>(value.getPointer());
+ return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment());
+}
} // end namespace CodeGen
} // end namespace clang
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 5bdf81aaf66e..ecdf78d4b347 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -123,7 +123,6 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace();
RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
- BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC();
if (LangOpts.ObjC1)
createObjCRuntime();
@@ -208,7 +207,10 @@ void CodeGenModule::createOpenMPRuntime() {
OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
break;
default:
- OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
+ if (LangOpts.OpenMPSimd)
+ OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this));
+ else
+ OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
break;
}
}
@@ -392,26 +394,29 @@ void CodeGenModule::Release() {
applyGlobalValReplacements();
applyReplacements();
checkAliases();
+ emitMultiVersionFunctions();
EmitCXXGlobalInitFunc();
EmitCXXGlobalDtorFunc();
+ registerGlobalDtorsWithAtExit();
EmitCXXThreadLocalInitFunc();
if (ObjCRuntime)
if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction())
AddGlobalCtor(ObjCInitFunction);
if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice &&
CUDARuntime) {
- if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction())
+ if (llvm::Function *CudaCtorFunction =
+ CUDARuntime->makeModuleCtorFunction())
AddGlobalCtor(CudaCtorFunction);
- if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction())
- AddGlobalDtor(CudaDtorFunction);
}
- if (OpenMPRuntime)
+ if (OpenMPRuntime) {
if (llvm::Function *OpenMPRegistrationFunction =
OpenMPRuntime->emitRegistrationFunction()) {
auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ?
OpenMPRegistrationFunction : nullptr;
AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey);
}
+ OpenMPRuntime->clear();
+ }
if (PGOReader) {
getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext));
if (PGOStats.hasDiagnostics())
@@ -453,6 +458,10 @@ void CodeGenModule::Release() {
// Indicate that we want CodeView in the metadata.
getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1);
}
+ if (CodeGenOpts.ControlFlowGuard) {
+ // We want function ID tables for Control Flow Guard.
+ getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1);
+ }
if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) {
// We don't support LTO with 2 with different StrictVTablePointers
// FIXME: we could support it by stripping all the information introduced
@@ -498,12 +507,26 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
}
+ if (CodeGenOpts.CFProtectionReturn &&
+ Target.checkCFProtectionReturnSupported(getDiags())) {
+ // Indicate that we want to instrument return control flow protection.
+ getModule().addModuleFlag(llvm::Module::Override, "cf-protection-return",
+ 1);
+ }
+
+ if (CodeGenOpts.CFProtectionBranch &&
+ Target.checkCFProtectionBranchSupported(getDiags())) {
+ // Indicate that we want to instrument branch control flow protection.
+ getModule().addModuleFlag(llvm::Module::Override, "cf-protection-branch",
+ 1);
+ }
+
if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) {
// Indicate whether __nvvm_reflect should be configured to flush denormal
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
// property.)
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
- LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
+ CodeGenOpts.FlushDenorm ? 1 : 0);
}
// Emit OpenCL specific module metadata: OpenCL/SPIR version.
@@ -533,6 +556,9 @@ void CodeGenModule::Release() {
getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel));
}
+ if (CodeGenOpts.NoPLT)
+ getModule().setRtLibUseGOT();
+
SimplifyPersonality();
if (getCodeGenOpts().EmitDeclMetadata)
@@ -544,7 +570,8 @@ void CodeGenModule::Release() {
if (DebugInfo)
DebugInfo->finalize();
- EmitVersionIdentMetadata();
+ if (getCodeGenOpts().EmitVersionIdentMetadata)
+ EmitVersionIdentMetadata();
EmitTargetMetadata();
}
@@ -580,13 +607,9 @@ llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) {
}
TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) {
- // Pointee values may have incomplete types, but they shall never be
- // dereferenced.
- if (AccessType->isIncompleteType())
- return TBAAAccessInfo::getIncompleteInfo();
-
- uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity();
- return TBAAAccessInfo(getTBAATypeInfo(AccessType), Size);
+ if (!TBAA)
+ return TBAAAccessInfo();
+ return TBAA->getAccessInfo(AccessType);
}
TBAAAccessInfo
@@ -629,6 +652,14 @@ CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB);
}
+TBAAAccessInfo
+CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+ TBAAAccessInfo SrcInfo) {
+ if (!TBAA)
+ return TBAAAccessInfo();
+ return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo);
+}
+
void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst,
TBAAAccessInfo TBAAInfo) {
if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo))
@@ -670,21 +701,129 @@ llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) {
}
void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV,
- const NamedDecl *D,
- ForDefinition_t IsForDefinition) const {
+ const NamedDecl *D) const {
+ if (GV->hasDLLImportStorageClass())
+ return;
// Internal definitions always have default visibility.
if (GV->hasLocalLinkage()) {
GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
return;
}
-
+ if (!D)
+ return;
// Set visibility for definitions.
LinkageInfo LV = D->getLinkageAndVisibility();
- if (LV.isVisibilityExplicit() ||
- (IsForDefinition && !GV->hasAvailableExternallyLinkage()))
+ if (LV.isVisibilityExplicit() || !GV->isDeclarationForLinker())
GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
}
+static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
+ llvm::GlobalValue *GV) {
+ if (GV->hasLocalLinkage())
+ return true;
+
+ if (!GV->hasDefaultVisibility() && !GV->hasExternalWeakLinkage())
+ return true;
+
+ // DLLImport explicitly marks the GV as external.
+ if (GV->hasDLLImportStorageClass())
+ return false;
+
+ const llvm::Triple &TT = CGM.getTriple();
+ // Every other GV is local on COFF.
+ // Make an exception for windows OS in the triple: Some firmware builds use
+ // *-win32-macho triples. This (accidentally?) produced windows relocations
+ // without GOT tables in older clang versions; Keep this behaviour.
+ // FIXME: even thread local variables?
+ if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
+ return true;
+
+ // Only handle COFF and ELF for now.
+ if (!TT.isOSBinFormatELF())
+ return false;
+
+ // If this is not an executable, don't assume anything is local.
+ const auto &CGOpts = CGM.getCodeGenOpts();
+ llvm::Reloc::Model RM = CGOpts.RelocationModel;
+ const auto &LOpts = CGM.getLangOpts();
+ if (RM != llvm::Reloc::Static && !LOpts.PIE)
+ return false;
+
+ // A definition cannot be preempted from an executable.
+ if (!GV->isDeclarationForLinker())
+ return true;
+
+ // Most PIC code sequences that assume that a symbol is local cannot produce a
+ // 0 if it turns out the symbol is undefined. While this is ABI and relocation
+ // depended, it seems worth it to handle it here.
+ if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage())
+ return false;
+
+ // PPC has no copy relocations and cannot use a plt entry as a symbol address.
+ llvm::Triple::ArchType Arch = TT.getArch();
+ if (Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 ||
+ Arch == llvm::Triple::ppc64le)
+ return false;
+
+ // If we can use copy relocations we can assume it is local.
+ if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV))
+ if (!Var->isThreadLocal() &&
+ (RM == llvm::Reloc::Static || CGOpts.PIECopyRelocations))
+ return true;
+
+ // If we can use a plt entry as the symbol address we can assume it
+ // is local.
+ // FIXME: This should work for PIE, but the gold linker doesn't support it.
+ if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static)
+ return true;
+
+ // Otherwise don't assue it is local.
+ return false;
+}
+
+void CodeGenModule::setDSOLocal(llvm::GlobalValue *GV) const {
+ GV->setDSOLocal(shouldAssumeDSOLocal(*this, GV));
+}
+
+void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV,
+ GlobalDecl GD) const {
+ const auto *D = dyn_cast<NamedDecl>(GD.getDecl());
+ // C++ destructors have a few C++ ABI specific special cases.
+ if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(D)) {
+ getCXXABI().setCXXDestructorDLLStorage(GV, Dtor, GD.getDtorType());
+ return;
+ }
+ setDLLImportDLLExport(GV, D);
+}
+
+void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV,
+ const NamedDecl *D) const {
+ if (D && D->isExternallyVisible()) {
+ if (D->hasAttr<DLLImportAttr>())
+ GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+ else if (D->hasAttr<DLLExportAttr>() && !GV->isDeclarationForLinker())
+ GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
+ }
+}
+
+void CodeGenModule::setGVProperties(llvm::GlobalValue *GV,
+ GlobalDecl GD) const {
+ setDLLImportDLLExport(GV, GD);
+ setGlobalVisibilityAndLocal(GV, dyn_cast<NamedDecl>(GD.getDecl()));
+}
+
+void CodeGenModule::setGVProperties(llvm::GlobalValue *GV,
+ const NamedDecl *D) const {
+ setDLLImportDLLExport(GV, D);
+ setGlobalVisibilityAndLocal(GV, D);
+}
+
+void CodeGenModule::setGlobalVisibilityAndLocal(llvm::GlobalValue *GV,
+ const NamedDecl *D) const {
+ setGlobalVisibility(GV, D);
+ setDSOLocal(GV);
+}
+
static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) {
return llvm::StringSwitch<llvm::GlobalVariable::ThreadLocalMode>(S)
.Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel)
@@ -722,36 +861,68 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const {
GV->setThreadLocalMode(TLM);
}
-StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
- GlobalDecl CanonicalGD = GD.getCanonicalDecl();
+static std::string getCPUSpecificMangling(const CodeGenModule &CGM,
+ StringRef Name) {
+ const TargetInfo &Target = CGM.getTarget();
+ return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str();
+}
- // Some ABIs don't have constructor variants. Make sure that base and
- // complete constructors get mangled the same.
- if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) {
- if (!getTarget().getCXXABI().hasConstructorVariants()) {
- CXXCtorType OrigCtorType = GD.getCtorType();
- assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete);
- if (OrigCtorType == Ctor_Base)
- CanonicalGD = GlobalDecl(CD, Ctor_Complete);
- }
+static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
+ const CPUSpecificAttr *Attr,
+ raw_ostream &Out) {
+ // cpu_specific gets the current name, dispatch gets the resolver.
+ if (Attr)
+ Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName());
+ else
+ Out << ".resolver";
+}
+
+static void AppendTargetMangling(const CodeGenModule &CGM,
+ const TargetAttr *Attr, raw_ostream &Out) {
+ if (Attr->isDefaultVersion())
+ return;
+
+ Out << '.';
+ const TargetInfo &Target = CGM.getTarget();
+ TargetAttr::ParsedTargetAttr Info =
+ Attr->parse([&Target](StringRef LHS, StringRef RHS) {
+ // Multiversioning doesn't allow "no-${feature}", so we can
+ // only have "+" prefixes here.
+ assert(LHS.startswith("+") && RHS.startswith("+") &&
+ "Features should always have a prefix.");
+ return Target.multiVersionSortPriority(LHS.substr(1)) >
+ Target.multiVersionSortPriority(RHS.substr(1));
+ });
+
+ bool IsFirst = true;
+
+ if (!Info.Architecture.empty()) {
+ IsFirst = false;
+ Out << "arch_" << Info.Architecture;
}
- auto FoundName = MangledDeclNames.find(CanonicalGD);
- if (FoundName != MangledDeclNames.end())
- return FoundName->second;
+ for (StringRef Feat : Info.Features) {
+ if (!IsFirst)
+ Out << '_';
+ IsFirst = false;
+ Out << Feat.substr(1);
+ }
+}
- const auto *ND = cast<NamedDecl>(GD.getDecl());
+static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
+ const NamedDecl *ND,
+ bool OmitMultiVersionMangling = false) {
SmallString<256> Buffer;
- StringRef Str;
- if (getCXXABI().getMangleContext().shouldMangleDeclName(ND)) {
+ llvm::raw_svector_ostream Out(Buffer);
+ MangleContext &MC = CGM.getCXXABI().getMangleContext();
+ if (MC.shouldMangleDeclName(ND)) {
llvm::raw_svector_ostream Out(Buffer);
if (const auto *D = dyn_cast<CXXConstructorDecl>(ND))
- getCXXABI().getMangleContext().mangleCXXCtor(D, GD.getCtorType(), Out);
+ MC.mangleCXXCtor(D, GD.getCtorType(), Out);
else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND))
- getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out);
+ MC.mangleCXXDtor(D, GD.getDtorType(), Out);
else
- getCXXABI().getMangleContext().mangleName(ND, Out);
- Str = Out.str();
+ MC.mangleName(ND, Out);
} else {
IdentifierInfo *II = ND->getIdentifier();
assert(II && "Attempt to mangle unnamed decl.");
@@ -761,14 +932,103 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
llvm::raw_svector_ostream Out(Buffer);
Out << "__regcall3__" << II->getName();
- Str = Out.str();
} else {
- Str = II->getName();
+ Out << II->getName();
+ }
+ }
+
+ if (const auto *FD = dyn_cast<FunctionDecl>(ND))
+ if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
+ if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())
+ AppendCPUSpecificCPUDispatchMangling(
+ CGM, FD->getAttr<CPUSpecificAttr>(), Out);
+ else
+ AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
+ }
+
+ return Out.str();
+}
+
+void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD,
+ const FunctionDecl *FD) {
+ if (!FD->isMultiVersion())
+ return;
+
+ // Get the name of what this would be without the 'target' attribute. This
+ // allows us to lookup the version that was emitted when this wasn't a
+ // multiversion function.
+ std::string NonTargetName =
+ getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ GlobalDecl OtherGD;
+ if (lookupRepresentativeDecl(NonTargetName, OtherGD)) {
+ assert(OtherGD.getCanonicalDecl()
+ .getDecl()
+ ->getAsFunction()
+ ->isMultiVersion() &&
+ "Other GD should now be a multiversioned function");
+ // OtherFD is the version of this function that was mangled BEFORE
+ // becoming a MultiVersion function. It potentially needs to be updated.
+ const FunctionDecl *OtherFD =
+ OtherGD.getCanonicalDecl().getDecl()->getAsFunction();
+ std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD);
+ // This is so that if the initial version was already the 'default'
+ // version, we don't try to update it.
+ if (OtherName != NonTargetName) {
+ // Remove instead of erase, since others may have stored the StringRef
+ // to this.
+ const auto ExistingRecord = Manglings.find(NonTargetName);
+ if (ExistingRecord != std::end(Manglings))
+ Manglings.remove(&(*ExistingRecord));
+ auto Result = Manglings.insert(std::make_pair(OtherName, OtherGD));
+ MangledDeclNames[OtherGD.getCanonicalDecl()] = Result.first->first();
+ if (llvm::GlobalValue *Entry = GetGlobalValue(NonTargetName))
+ Entry->setName(OtherName);
+ }
+ }
+}
+
+StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
+ GlobalDecl CanonicalGD = GD.getCanonicalDecl();
+
+ // Some ABIs don't have constructor variants. Make sure that base and
+ // complete constructors get mangled the same.
+ if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) {
+ if (!getTarget().getCXXABI().hasConstructorVariants()) {
+ CXXCtorType OrigCtorType = GD.getCtorType();
+ assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete);
+ if (OrigCtorType == Ctor_Base)
+ CanonicalGD = GlobalDecl(CD, Ctor_Complete);
}
}
+ const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
+ // Since CPUSpecific can require multiple emits per decl, store the manglings
+ // separately.
+ if (FD &&
+ (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) {
+ const auto *SD = FD->getAttr<CPUSpecificAttr>();
+
+ std::pair<GlobalDecl, unsigned> SpecCanonicalGD{
+ CanonicalGD,
+ SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()};
+
+ auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD);
+ if (FoundName != CPUSpecificMangledDeclNames.end())
+ return FoundName->second;
+
+ auto Result = CPUSpecificManglings.insert(
+ std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD));
+ return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first();
+ }
+
+ auto FoundName = MangledDeclNames.find(CanonicalGD);
+ if (FoundName != MangledDeclNames.end())
+ return FoundName->second;
+
// Keep the first result in the case of a mangling collision.
- auto Result = Manglings.insert(std::make_pair(Str, GD));
+ const auto *ND = cast<NamedDecl>(GD.getDecl());
+ auto Result =
+ Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD));
return MangledDeclNames[CanonicalGD] = Result.first->first();
}
@@ -808,6 +1068,11 @@ void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
/// AddGlobalDtor - Add a function to the list that will be called
/// when the module is unloaded.
void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) {
+ if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) {
+ DtorsUsingAtExit[Priority].push_back(Dtor);
+ return;
+ }
+
// FIXME: Type coercion of void()* types.
GlobalDtors.push_back(Structor(Priority, Dtor, nullptr));
}
@@ -855,14 +1120,8 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
GVALinkage Linkage = getContext().GetGVALinkageForFunction(D);
- if (isa<CXXDestructorDecl>(D) &&
- getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
- GD.getDtorType())) {
- // Destructor variants in the Microsoft C++ ABI are always internal or
- // linkonce_odr thunks emitted on an as-needed basis.
- return Linkage == GVA_Internal ? llvm::GlobalValue::InternalLinkage
- : llvm::GlobalValue::LinkOnceODRLinkage;
- }
+ if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(D))
+ return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType());
if (isa<CXXConstructorDecl>(D) &&
cast<CXXConstructorDecl>(D)->isInheritingConstructor() &&
@@ -876,25 +1135,6 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
}
-void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F) {
- const auto *FD = cast<FunctionDecl>(GD.getDecl());
-
- if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) {
- if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
- // Don't dllexport/import destructor thunks.
- F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
- return;
- }
- }
-
- if (FD->hasAttr<DLLImportAttr>())
- F->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
- else if (FD->hasAttr<DLLExportAttr>())
- F->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
- else
- F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
-}
-
llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD);
if (!MDS) return nullptr;
@@ -902,11 +1142,6 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString()));
}
-void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D,
- llvm::Function *F) {
- setNonAliasAttributes(D, F);
-}
-
void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
const CGFunctionInfo &Info,
llvm::Function *F) {
@@ -937,6 +1172,34 @@ static bool hasUnwindExceptions(const LangOptions &LangOpts) {
return true;
}
+static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM,
+ const CXXMethodDecl *MD) {
+ // Check that the type metadata can ever actually be used by a call.
+ if (!CGM.getCodeGenOpts().LTOUnit ||
+ !CGM.HasHiddenLTOVisibility(MD->getParent()))
+ return false;
+
+ // Only functions whose address can be taken with a member function pointer
+ // need this sort of type metadata.
+ return !MD->isStatic() && !MD->isVirtual() && !isa<CXXConstructorDecl>(MD) &&
+ !isa<CXXDestructorDecl>(MD);
+}
+
+std::vector<const CXXRecordDecl *>
+CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) {
+ llvm::SetVector<const CXXRecordDecl *> MostBases;
+
+ std::function<void (const CXXRecordDecl *)> CollectMostBases;
+ CollectMostBases = [&](const CXXRecordDecl *RD) {
+ if (RD->getNumBases() == 0)
+ MostBases.insert(RD);
+ for (const CXXBaseSpecifier &B : RD->bases())
+ CollectMostBases(B.getType()->getAsCXXRecordDecl());
+ };
+ CollectMostBases(RD);
+ return MostBases.takeVector();
+}
+
void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
llvm::Function *F) {
llvm::AttrBuilder B;
@@ -947,12 +1210,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if (!hasUnwindExceptions(LangOpts))
B.addAttribute(llvm::Attribute::NoUnwind);
- if (LangOpts.getStackProtector() == LangOptions::SSPOn)
- B.addAttribute(llvm::Attribute::StackProtect);
- else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
- B.addAttribute(llvm::Attribute::StackProtectStrong);
- else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
- B.addAttribute(llvm::Attribute::StackProtectReq);
+ if (!D || !D->hasAttr<NoStackProtectorAttr>()) {
+ if (LangOpts.getStackProtector() == LangOptions::SSPOn)
+ B.addAttribute(llvm::Attribute::StackProtect);
+ else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
+ B.addAttribute(llvm::Attribute::StackProtectStrong);
+ else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
+ B.addAttribute(llvm::Attribute::StackProtectReq);
+ }
if (!D) {
// If we don't have a declaration to control inlining, the function isn't
@@ -1044,6 +1309,10 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if (alignment)
F->setAlignment(alignment);
+ if (!D->hasAttr<AlignedAttr>())
+ if (LangOpts.FunctionAlignment)
+ F->setAlignment(1 << LangOpts.FunctionAlignment);
+
// Some C++ ABIs require 2-byte alignment for member functions, in order to
// reserve a bit for differentiating between virtual and non-virtual member
// functions. If the current target's C++ ABI requires this and this is a
@@ -1056,13 +1325,26 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// In the cross-dso CFI mode, we want !type attributes on definitions only.
if (CodeGenOpts.SanitizeCfiCrossDso)
if (auto *FD = dyn_cast<FunctionDecl>(D))
- CreateFunctionTypeMetadata(FD, F);
+ CreateFunctionTypeMetadataForIcall(FD, F);
+
+ // Emit type metadata on member functions for member function pointer checks.
+ // These are only ever necessary on definitions; we're guaranteed that the
+ // definition will be present in the LTO unit as a result of LTO visibility.
+ auto *MD = dyn_cast<CXXMethodDecl>(D);
+ if (MD && requiresMemberFunctionPointerTypeMetadata(*this, MD)) {
+ for (const CXXRecordDecl *Base : getMostBaseClasses(MD->getParent())) {
+ llvm::Metadata *Id =
+ CreateMetadataIdentifierForType(Context.getMemberPointerType(
+ MD->getType(), Context.getRecordType(Base).getTypePtr()));
+ F->addTypeMetadata(0, Id);
+ }
+ }
}
-void CodeGenModule::SetCommonAttributes(const Decl *D,
- llvm::GlobalValue *GV) {
- if (const auto *ND = dyn_cast_or_null<NamedDecl>(D))
- setGlobalVisibility(GV, ND, ForDefinition);
+void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
+ const Decl *D = GD.getDecl();
+ if (dyn_cast_or_null<NamedDecl>(D))
+ setGVProperties(GV, GD);
else
GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
@@ -1070,19 +1352,59 @@ void CodeGenModule::SetCommonAttributes(const Decl *D,
addUsedGlobal(GV);
}
-void CodeGenModule::setAliasAttributes(const Decl *D,
- llvm::GlobalValue *GV) {
- SetCommonAttributes(D, GV);
+bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D,
+ llvm::AttrBuilder &Attrs) {
+ // Add target-cpu and target-features attributes to functions. If
+ // we have a decl for the function and it has a target attribute then
+ // parse that and add it to the feature set.
+ StringRef TargetCPU = getTarget().getTargetOpts().CPU;
+ std::vector<std::string> Features;
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ FD = FD ? FD->getMostRecentDecl() : FD;
+ const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
+ const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
+ bool AddedAttr = false;
+ if (TD || SD) {
+ llvm::StringMap<bool> FeatureMap;
+ getFunctionFeatureMap(FeatureMap, FD);
+
+ // Produce the canonical string for this set of features.
+ for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
+ Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str());
+
+ // Now add the target-cpu and target-features to the function.
+ // While we populated the feature map above, we still need to
+ // get and parse the target attribute so we can get the cpu for
+ // the function.
+ if (TD) {
+ TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
+ if (ParsedAttr.Architecture != "" &&
+ getTarget().isValidCPUName(ParsedAttr.Architecture))
+ TargetCPU = ParsedAttr.Architecture;
+ }
+ } else {
+ // Otherwise just add the existing target cpu and target features to the
+ // function.
+ Features = getTarget().getTargetOpts().Features;
+ }
+
+ if (TargetCPU != "") {
+ Attrs.addAttribute("target-cpu", TargetCPU);
+ AddedAttr = true;
+ }
+ if (!Features.empty()) {
+ llvm::sort(Features.begin(), Features.end());
+ Attrs.addAttribute("target-features", llvm::join(Features, ","));
+ AddedAttr = true;
+ }
- // Process the dllexport attribute based on whether the original definition
- // (not necessarily the aliasee) was exported.
- if (D->hasAttr<DLLExportAttr>())
- GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ return AddedAttr;
}
-void CodeGenModule::setNonAliasAttributes(const Decl *D,
+void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
llvm::GlobalObject *GO) {
- SetCommonAttributes(D, GO);
+ const Decl *D = GD.getDecl();
+ SetCommonAttributes(GD, GO);
if (D) {
if (auto *GV = dyn_cast<llvm::GlobalVariable>(GO)) {
@@ -1096,55 +1418,60 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D,
if (auto *F = dyn_cast<llvm::Function>(GO)) {
if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>())
- if (!D->getAttr<SectionAttr>())
- F->addFnAttr("implicit-section-name", SA->getName());
+ if (!D->getAttr<SectionAttr>())
+ F->addFnAttr("implicit-section-name", SA->getName());
+
+ llvm::AttrBuilder Attrs;
+ if (GetCPUAndFeaturesAttributes(D, Attrs)) {
+ // We know that GetCPUAndFeaturesAttributes will always have the
+ // newest set, since it has the newest possible FunctionDecl, so the
+ // new ones should replace the old.
+ F->removeFnAttr("target-cpu");
+ F->removeFnAttr("target-features");
+ F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs);
+ }
}
-
- if (const SectionAttr *SA = D->getAttr<SectionAttr>())
+
+ if (const auto *CSA = D->getAttr<CodeSegAttr>())
+ GO->setSection(CSA->getName());
+ else if (const auto *SA = D->getAttr<SectionAttr>())
GO->setSection(SA->getName());
}
- getTargetCodeGenInfo().setTargetAttributes(D, GO, *this, ForDefinition);
+ getTargetCodeGenInfo().setTargetAttributes(D, GO, *this);
}
-void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
+void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD,
llvm::Function *F,
const CGFunctionInfo &FI) {
+ const Decl *D = GD.getDecl();
SetLLVMFunctionAttributes(D, FI, F);
SetLLVMFunctionAttributesForDefinition(D, F);
F->setLinkage(llvm::Function::InternalLinkage);
- setNonAliasAttributes(D, F);
+ setNonAliasAttributes(GD, F);
}
-static void setLinkageForGV(llvm::GlobalValue *GV,
- const NamedDecl *ND) {
+static void setLinkageForGV(llvm::GlobalValue *GV, const NamedDecl *ND) {
// Set linkage and visibility in case we never see a definition.
LinkageInfo LV = ND->getLinkageAndVisibility();
- if (!isExternallyVisible(LV.getLinkage())) {
- // Don't set internal linkage on declarations.
- } else {
- if (ND->hasAttr<DLLImportAttr>()) {
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- } else if (ND->hasAttr<DLLExportAttr>()) {
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) {
- // "extern_weak" is overloaded in LLVM; we probably should have
- // separate linkage types for this.
- GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
- }
- }
+ // Don't set internal linkage on declarations.
+ // "extern_weak" is overloaded in LLVM; we probably should have
+ // separate linkage types for this.
+ if (isExternallyVisible(LV.getLinkage()) &&
+ (ND->hasAttr<WeakAttr>() || ND->isWeakImported()))
+ GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
}
-void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD,
- llvm::Function *F) {
+void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+ llvm::Function *F) {
// Only if we are checking indirect calls.
if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall))
return;
- // Non-static class methods are handled via vtable pointer checks elsewhere.
+ // Non-static class methods are handled via vtable or member function pointer
+ // checks elsewhere.
if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic())
return;
@@ -1168,8 +1495,7 @@ void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD,
void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
bool IsIncompleteFunction,
- bool IsThunk,
- ForDefinition_t IsForDefinition) {
+ bool IsThunk) {
if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) {
// If this is an intrinsic function, set the function's attributes
@@ -1183,9 +1509,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
if (!IsIncompleteFunction) {
SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
// Setup target-specific attributes.
- if (!IsForDefinition)
- getTargetCodeGenInfo().setTargetAttributes(FD, F, *this,
- NotForDefinition);
+ if (F->isDeclaration())
+ getTargetCodeGenInfo().setTargetAttributes(FD, F, *this);
}
// Add the Returned attribute for "this", except for iOS 5 and earlier
@@ -1204,14 +1529,12 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
// overridden by a definition.
setLinkageForGV(F, FD);
- setGlobalVisibility(F, FD, NotForDefinition);
-
- if (FD->getAttr<PragmaClangTextSectionAttr>()) {
- F->addFnAttr("implicit-section-name");
- }
+ setGVProperties(F, FD);
- if (const SectionAttr *SA = FD->getAttr<SectionAttr>())
- F->setSection(SA->getName());
+ if (const auto *CSA = FD->getAttr<CodeSegAttr>())
+ F->setSection(CSA->getName());
+ else if (const auto *SA = FD->getAttr<SectionAttr>())
+ F->setSection(SA->getName());
if (FD->isReplaceableGlobalAllocationFunction()) {
// A replaceable global allocation function does not act like a builtin by
@@ -1238,7 +1561,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
// Don't emit entries for function declarations in the cross-DSO mode. This
// is handled with better precision by the receiving DSO.
if (!CodeGenOpts.SanitizeCfiCrossDso)
- CreateFunctionTypeMetadata(FD, F);
+ CreateFunctionTypeMetadataForIcall(FD, F);
if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
getOpenMPRuntime().emitDeclareSimdFunction(FD, F);
@@ -1299,6 +1622,12 @@ void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) {
LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
}
+void CodeGenModule::AddELFLibDirective(StringRef Lib) {
+ auto &C = getLLVMContext();
+ LinkerOptionsMetadata.push_back(llvm::MDNode::get(
+ C, {llvm::MDString::get(C, "lib"), llvm::MDString::get(C, Lib)}));
+}
+
void CodeGenModule::AddDependentLib(StringRef Lib) {
llvm::SmallString<24> Opt;
getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt);
@@ -1306,7 +1635,7 @@ void CodeGenModule::AddDependentLib(StringRef Lib) {
LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
}
-/// \brief Add link options implied by the given module, including modules
+/// Add link options implied by the given module, including modules
/// it depends on, using a postorder walk.
static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
SmallVectorImpl<llvm::MDNode *> &Metadata,
@@ -1325,6 +1654,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
// Add linker options to link against the libraries/frameworks
// described by this module.
llvm::LLVMContext &Context = CGM.getLLVMContext();
+
+ // For modules that use export_as for linking, use that module
+ // name instead.
+ if (Mod->UseExportAsModuleLinkName)
+ return;
+
for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) {
// Link against a framework. Frameworks are currently Darwin only, so we
// don't to ask TargetCodeGenInfo for the spelling of the linker option.
@@ -1586,7 +1921,8 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV,
StringRef Category) const {
// For now globals can be blacklisted only in ASan and KASan.
const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask &
- (SanitizerKind::Address | SanitizerKind::KernelAddress | SanitizerKind::HWAddress);
+ (SanitizerKind::Address | SanitizerKind::KernelAddress |
+ SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress);
if (!EnabledAsanMask)
return false;
const auto &SanitizerBL = getContext().getSanitizerBlacklist();
@@ -1615,9 +1951,10 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
StringRef Category) const {
if (!LangOpts.XRayInstrument)
return false;
+
const auto &XRayFilter = getContext().getXRayFilter();
using ImbueAttr = XRayFunctionFilter::ImbueAttribute;
- auto Attr = XRayFunctionFilter::ImbueAttribute::NONE;
+ auto Attr = ImbueAttr::NONE;
if (Loc.isValid())
Attr = XRayFilter.shouldImbueLocation(Loc, Category);
if (Attr == ImbueAttr::NONE)
@@ -1662,7 +1999,8 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
// If OpenMP is enabled and threadprivates must be generated like TLS, delay
// codegen for global variables, because they may be marked as threadprivate.
if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS &&
- getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global))
+ getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) &&
+ !isTypeConstant(Global->getType(), false))
return false;
return true;
@@ -1691,6 +2029,7 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
/*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name);
if (supportsCOMDAT())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
+ setDSOLocal(GV);
return ConstantAddress(GV, Alignment);
}
@@ -1742,6 +2081,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
if (Global->hasAttr<IFuncAttr>())
return emitIFuncDefinition(GD);
+ // If this is a cpu_dispatch multiversion function, emit the resolver.
+ if (Global->hasAttr<CPUDispatchAttr>())
+ return emitCPUDispatchDefinition(GD);
+
// If this is CUDA, be selective about which declarations we emit.
if (LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {
@@ -2058,6 +2401,124 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
llvm::Function *NewFn);
+void CodeGenModule::emitMultiVersionFunctions() {
+ for (GlobalDecl GD : MultiVersionFuncs) {
+ SmallVector<CodeGenFunction::TargetMultiVersionResolverOption, 10> Options;
+ const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+ getContext().forEachMultiversionedFunctionVersion(
+ FD, [this, &GD, &Options](const FunctionDecl *CurFD) {
+ GlobalDecl CurGD{
+ (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
+ StringRef MangledName = getMangledName(CurGD);
+ llvm::Constant *Func = GetGlobalValue(MangledName);
+ if (!Func) {
+ if (CurFD->isDefined()) {
+ EmitGlobalFunctionDefinition(CurGD, nullptr);
+ Func = GetGlobalValue(MangledName);
+ } else {
+ const CGFunctionInfo &FI =
+ getTypes().arrangeGlobalDeclaration(GD);
+ llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+ Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
+ /*DontDefer=*/false, ForDefinition);
+ }
+ assert(Func && "This should have just been created");
+ }
+ Options.emplace_back(getTarget(), cast<llvm::Function>(Func),
+ CurFD->getAttr<TargetAttr>()->parse());
+ });
+
+ llvm::Function *ResolverFunc = cast<llvm::Function>(
+ GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+ if (supportsCOMDAT())
+ ResolverFunc->setComdat(
+ getModule().getOrInsertComdat(ResolverFunc->getName()));
+ std::stable_sort(
+ Options.begin(), Options.end(),
+ std::greater<CodeGenFunction::TargetMultiVersionResolverOption>());
+ CodeGenFunction CGF(*this);
+ CGF.EmitTargetMultiVersionResolver(ResolverFunc, Options);
+ }
+}
+
+void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+ assert(FD && "Not a FunctionDecl?");
+ const auto *DD = FD->getAttr<CPUDispatchAttr>();
+ assert(DD && "Not a cpu_dispatch Function?");
+ llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType());
+
+ StringRef ResolverName = getMangledName(GD);
+ llvm::Type *ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(DeclTy,
+ Context.getTargetAddressSpace(FD->getType())),
+ false);
+ auto *ResolverFunc = cast<llvm::Function>(
+ GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
+ /*ForVTable=*/false));
+
+ SmallVector<CodeGenFunction::CPUDispatchMultiVersionResolverOption, 10>
+ Options;
+ const TargetInfo &Target = getTarget();
+ for (const IdentifierInfo *II : DD->cpus()) {
+ // Get the name of the target function so we can look it up/create it.
+ std::string MangledName = getMangledNameImpl(*this, GD, FD, true) +
+ getCPUSpecificMangling(*this, II->getName());
+ llvm::Constant *Func = GetOrCreateLLVMFunction(
+ MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false,
+ /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+ llvm::SmallVector<StringRef, 32> Features;
+ Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
+ llvm::transform(Features, Features.begin(),
+ [](StringRef Str) { return Str.substr(1); });
+ Features.erase(std::remove_if(
+ Features.begin(), Features.end(), [&Target](StringRef Feat) {
+ return !Target.validateCpuSupports(Feat);
+ }), Features.end());
+ Options.emplace_back(cast<llvm::Function>(Func),
+ CodeGenFunction::GetX86CpuSupportsMask(Features));
+ }
+
+ llvm::sort(
+ Options.begin(), Options.end(),
+ std::greater<CodeGenFunction::CPUDispatchMultiVersionResolverOption>());
+ CodeGenFunction CGF(*this);
+ CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options);
+}
+
+/// If an ifunc for the specified mangled name is not in the module, create and
+/// return an llvm IFunc Function with the specified type.
+llvm::Constant *
+CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
+ const FunctionDecl *FD) {
+ std::string MangledName =
+ getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ std::string IFuncName = MangledName + ".ifunc";
+ if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName))
+ return IFuncGV;
+
+ // Since this is the first time we've created this IFunc, make sure
+ // that we put this multiversioned function into the list to be
+ // replaced later if necessary (target multiversioning only).
+ if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
+ MultiVersionFuncs.push_back(GD);
+
+ std::string ResolverName = MangledName + ".resolver";
+ llvm::Type *ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(DeclTy,
+ Context.getTargetAddressSpace(FD->getType())),
+ false);
+ llvm::Constant *Resolver =
+ GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
+ /*ForVTable=*/false);
+ llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
+ DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
+ GIF->setName(IFuncName);
+ SetCommonAttributes(FD, GIF);
+
+ return GIF;
+}
+
/// GetOrCreateLLVMFunction - If the specified mangled name is not in the
/// module, create and return an llvm Function with the specified type. If there
/// is something in the module with the specified name, return it potentially
@@ -2071,6 +2532,33 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
ForDefinition_t IsForDefinition) {
const Decl *D = GD.getDecl();
+ // Any attempts to use a MultiVersion function should result in retrieving
+ // the iFunc instead. Name Mangling will handle the rest of the changes.
+ if (const FunctionDecl *FD = cast_or_null<FunctionDecl>(D)) {
+ // For the device mark the function as one that should be emitted.
+ if (getLangOpts().OpenMPIsDevice && OpenMPRuntime &&
+ !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() &&
+ !DontDefer && !IsForDefinition) {
+ const FunctionDecl *FDDef = FD->getDefinition();
+ GlobalDecl GDDef;
+ if (const auto *CD = dyn_cast<CXXConstructorDecl>(FDDef))
+ GDDef = GlobalDecl(CD, GD.getCtorType());
+ else if (const auto *DD = dyn_cast<CXXDestructorDecl>(FDDef))
+ GDDef = GlobalDecl(DD, GD.getDtorType());
+ else
+ GDDef = GlobalDecl(FDDef);
+ addDeferredDeclToEmit(GDDef);
+ }
+
+ if (FD->isMultiVersion()) {
+ const auto *TA = FD->getAttr<TargetAttr>();
+ if (TA && TA->isDefaultVersion())
+ UpdateMultiVersionNames(GD, FD);
+ if (!IsForDefinition)
+ return GetOrCreateMultiVersionIFunc(GD, Ty, FD);
+ }
+ }
+
// Lookup the entry, lazily creating it if necessary.
llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
if (Entry) {
@@ -2081,8 +2569,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
}
// Handle dropped DLL attributes.
- if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>())
+ if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>()) {
Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ setDSOLocal(Entry);
+ }
// If there are two attempts to define the same mangled name, issue an
// error.
@@ -2094,8 +2584,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
(GD.getCanonicalDecl().getDecl() !=
OtherGD.getCanonicalDecl().getDecl()) &&
DiagnosedConflictingDefinitions.insert(GD).second) {
- getDiags().Report(D->getLocation(),
- diag::err_duplicate_mangled_name);
+ getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name)
+ << MangledName;
getDiags().Report(OtherGD.getDecl()->getLocation(),
diag::note_previous_definition);
}
@@ -2157,8 +2647,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
assert(F->getName() == MangledName && "name was uniqued!");
if (D)
- SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk,
- IsForDefinition);
+ SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) {
llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex);
F->addAttributes(llvm::AttributeList::FunctionIndex, B);
@@ -2234,6 +2723,16 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
Ty = getTypes().ConvertFunctionType(CanonTy, FD);
}
+ // Devirtualized destructor calls may come through here instead of via
+ // getAddrOfCXXStructor. Make sure we use the MS ABI base destructor instead
+ // of the complete destructor when necessary.
+ if (const auto *DD = dyn_cast<CXXDestructorDecl>(GD.getDecl())) {
+ if (getTarget().getCXXABI().isMicrosoft() &&
+ GD.getDtorType() == Dtor_Complete &&
+ DD->getParent()->getNumVBases() == 0)
+ GD = GlobalDecl(DD, Dtor_Base);
+ }
+
StringRef MangledName = getMangledName(GD);
return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
/*IsThunk=*/false, llvm::AttributeList(),
@@ -2255,7 +2754,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) {
// Demangle the premangled name from getTerminateFn()
IdentifierInfo &CXXII =
- (Name == "_ZSt9terminatev" || Name == "\01?terminate@@YAXXZ")
+ (Name == "_ZSt9terminatev" || Name == "?terminate@@YAXXZ")
? C.Idents.get("terminate")
: C.Idents.get(Name);
@@ -2302,6 +2801,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
F->setLinkage(llvm::GlobalValue::ExternalLinkage);
}
}
+ setDSOLocal(F);
}
}
@@ -2313,13 +2813,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
llvm::Constant *
CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name,
llvm::AttributeList ExtraAttrs) {
- llvm::Constant *C =
- GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
- /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs);
- if (auto *F = dyn_cast<llvm::Function>(C))
- if (F->empty())
- F->setCallingConv(getBuiltinCC());
- return C;
+ return CreateRuntimeFunction(FTy, Name, ExtraAttrs, true);
}
/// isTypeConstant - Determine whether an object of this type can be emitted
@@ -2350,7 +2844,7 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) {
/// If D is non-null, it specifies a decl that correspond to this. This is used
/// to set the attributes on the global when it is first created.
///
-/// If IsForDefinition is true, it is guranteed that an actual global with
+/// If IsForDefinition is true, it is guaranteed that an actual global with
/// type Ty will be returned, not conversion of a variable with the same
/// mangled name but some other type.
llvm::Constant *
@@ -2370,6 +2864,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>())
Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D)
+ getOpenMPRuntime().registerTargetGlobalVariable(D, Entry);
+
if (Entry->getType() == Ty)
return Entry;
@@ -2386,8 +2883,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
(OtherD = dyn_cast<VarDecl>(OtherGD.getDecl())) &&
OtherD->hasInit() &&
DiagnosedConflictingDefinitions.insert(D).second) {
- getDiags().Report(D->getLocation(),
- diag::err_duplicate_mangled_name);
+ getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name)
+ << MangledName;
getDiags().Report(OtherGD.getDecl()->getLocation(),
diag::note_previous_definition);
}
@@ -2438,6 +2935,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
// Handle things which are present even on external declarations.
if (D) {
+ if (LangOpts.OpenMP && !LangOpts.OpenMPSimd)
+ getOpenMPRuntime().registerTargetGlobalVariable(D, GV);
+
// FIXME: This code is overly simple and should be merged with other global
// handling.
GV->setConstant(isTypeConstant(D->getType(), false));
@@ -2445,7 +2945,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
setLinkageForGV(GV, D);
- setGlobalVisibility(GV, D, NotForDefinition);
if (D->getTLSKind()) {
if (D->getTLSKind() == VarDecl::TLS_Dynamic)
@@ -2453,6 +2952,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
setTLSMode(GV, *D);
}
+ setGVProperties(GV, D);
+
// If required by the ABI, treat declarations of static data members with
// inline initializers as definitions.
if (getContext().isMSStaticDataMemberInlineDefinition(D)) {
@@ -2501,7 +3002,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
GetAddrOfGlobalVar(D, InitType, IsForDefinition));
// Erase the old global, since it is no longer used.
- cast<llvm::GlobalValue>(GV)->eraseFromParent();
+ GV->eraseFromParent();
GV = NewGV;
} else {
GV->setInitializer(Init);
@@ -2602,7 +3103,7 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
/// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the
/// given global variable. If Ty is non-null and if the global doesn't exist,
/// then it will be created with the specified type instead of whatever the
-/// normal requested type would be. If IsForDefinition is true, it is guranteed
+/// normal requested type would be. If IsForDefinition is true, it is guaranteed
/// that an actual global with type Ty will be returned, not conversion of a
/// variable with the same mangled name but some other type.
llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
@@ -2625,7 +3126,10 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
llvm::Constant *
CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
StringRef Name) {
- return GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr);
+ auto *Ret =
+ GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr);
+ setDSOLocal(cast<llvm::GlobalValue>(Ret->stripPointerCasts()));
+ return Ret;
}
void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
@@ -2679,6 +3183,39 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
}
+LangAS CodeGenModule::getStringLiteralAddressSpace() const {
+ // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
+ if (LangOpts.OpenCL)
+ return LangAS::opencl_constant;
+ if (auto AS = getTarget().getConstantAddressSpace())
+ return AS.getValue();
+ return LangAS::Default;
+}
+
+// In address space agnostic languages, string literals are in default address
+// space in AST. However, certain targets (e.g. amdgcn) request them to be
+// emitted in constant address space in LLVM IR. To be consistent with other
+// parts of AST, string literal global variables in constant address space
+// need to be casted to default address space before being put into address
+// map and referenced by other part of CodeGen.
+// In OpenCL, string literals are in constant address space in AST, therefore
+// they should not be casted to default address space.
+static llvm::Constant *
+castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM,
+ llvm::GlobalVariable *GV) {
+ llvm::Constant *Cast = GV;
+ if (!CGM.getLangOpts().OpenCL) {
+ if (auto AS = CGM.getTarget().getConstantAddressSpace()) {
+ if (AS != LangAS::Default)
+ Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast(
+ CGM, GV, AS.getValue(), LangAS::Default,
+ GV->getValueType()->getPointerTo(
+ CGM.getContext().getTargetAddressSpace(LangAS::Default)));
+ }
+ }
+ return Cast;
+}
+
template<typename SomeDecl>
void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D,
llvm::GlobalValue *GV) {
@@ -2753,6 +3290,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
if (getLangOpts().OpenCL && ASTTy->isSamplerT())
return;
+ // If this is OpenMP device, check if it is legal to emit this global
+ // normally.
+ if (LangOpts.OpenMPIsDevice && OpenMPRuntime &&
+ OpenMPRuntime->emitTargetGlobalVariable(D))
+ return;
+
llvm::Constant *Init = nullptr;
CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
bool NeedsGlobalCtor = false;
@@ -2989,7 +3532,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
return true;
// A variable cannot be both common and exist in a section.
- // We dont try to determine which is the right section in the front-end.
+ // We don't try to determine which is the right section in the front-end.
// If no specialized section name is applicable, it will resort to default.
if (D->hasAttr<PragmaClangBSSSectionAttr>() ||
D->hasAttr<PragmaClangDataSectionAttr>() ||
@@ -3261,18 +3804,18 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
// declarations).
auto *Fn = cast<llvm::Function>(GV);
setFunctionLinkage(GD, Fn);
- setFunctionDLLStorageClass(GD, Fn);
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
- setGlobalVisibility(Fn, D, ForDefinition);
+ setGVProperties(Fn, GD);
MaybeHandleStaticInExternC(D, Fn);
+
maybeSetTrivialComdat(*D, *Fn);
CodeGenFunction(*this).GenerateCode(D, Fn, FI);
- setFunctionDefinitionAttributes(D, Fn);
+ setNonAliasAttributes(GD, Fn);
SetLLVMFunctionAttributesForDefinition(D, Fn);
if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
@@ -3281,6 +3824,15 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
+
+ if (D->isCPUSpecificMultiVersion()) {
+ auto *Spec = D->getAttr<CPUSpecificAttr>();
+ // If there is another specific version we need to emit, do so here.
+ if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) {
+ ++Spec->ActiveArgIndex;
+ EmitGlobalFunctionDefinition(GD, nullptr);
+ }
+ }
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -3356,7 +3908,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
if (VD->getTLSKind())
setTLSMode(GA, *VD);
- setAliasAttributes(D, GA);
+ SetCommonAttributes(GD, GA);
}
void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
@@ -3377,7 +3929,8 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
GlobalDecl OtherGD;
if (lookupRepresentativeDecl(MangledName, OtherGD) &&
DiagnosedConflictingDefinitions.insert(GD).second) {
- Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name);
+ Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name)
+ << MangledName;
Diags.Report(OtherGD.getDecl()->getLocation(),
diag::note_previous_definition);
}
@@ -3415,7 +3968,7 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
} else
GIF->setName(MangledName);
- SetCommonAttributes(D, GIF);
+ SetCommonAttributes(GD, GIF);
}
llvm::Function *CodeGenModule::getIntrinsic(unsigned IID,
@@ -3477,14 +4030,13 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
if (!CFConstantStringClassRef) {
llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy);
Ty = llvm::ArrayType::get(Ty, 0);
- llvm::Constant *GV =
- CreateRuntimeVariable(Ty, "__CFConstantStringClassReference");
+ llvm::GlobalValue *GV = cast<llvm::GlobalValue>(
+ CreateRuntimeVariable(Ty, "__CFConstantStringClassReference"));
if (getTriple().isOSBinFormatCOFF()) {
IdentifierInfo &II = getContext().Idents.get(GV->getName());
TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
- llvm::GlobalValue *CGV = cast<llvm::GlobalValue>(GV);
const VarDecl *VD = nullptr;
for (const auto &Result : DC->lookup(&II))
@@ -3492,13 +4044,14 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
break;
if (!VD || !VD->hasAttr<DLLExportAttr>()) {
- CGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- CGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
} else {
- CGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- CGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
}
}
+ setDSOLocal(GV);
// Decay array -> ptr
CFConstantStringClassRef =
@@ -3666,10 +4219,8 @@ static llvm::GlobalVariable *
GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
CodeGenModule &CGM, StringRef GlobalName,
CharUnits Alignment) {
- // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
- unsigned AddrSpace = 0;
- if (CGM.getLangOpts().OpenCL)
- AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
+ unsigned AddrSpace = CGM.getContext().getTargetAddressSpace(
+ CGM.getStringLiteralAddressSpace());
llvm::Module &M = CGM.getModule();
// Create a global variable for this string
@@ -3682,6 +4233,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
GV->setComdat(M.getOrInsertComdat(GV->getName()));
}
+ CGM.setDSOLocal(GV);
return GV;
}
@@ -3730,7 +4282,9 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
SanitizerMD->reportGlobalToASan(GV, S->getStrTokenLoc(0), "<string literal>",
QualType());
- return ConstantAddress(GV, Alignment);
+
+ return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
}
/// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
@@ -3774,7 +4328,9 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString(
GlobalName, Alignment);
if (Entry)
*Entry = GV;
- return ConstantAddress(GV, Alignment);
+
+ return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
}
ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
@@ -3847,7 +4403,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) &&
isa<CXXRecordDecl>(InitVD->getLexicalDeclContext())) {
// Temporaries defined inside a class get linkonce_odr linkage because the
- // class can be defined in multipe translation units.
+ // class can be defined in multiple translation units.
Linkage = llvm::GlobalVariable::LinkOnceODRLinkage;
} else {
// There is no need for this temporary to have external linkage if the
@@ -3860,7 +4416,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
if (emitter) emitter->finalize(GV);
- setGlobalVisibility(GV, VD, ForDefinition);
+ setGVProperties(GV, VD);
GV->setAlignment(Align.getQuantity());
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -3997,18 +4553,13 @@ void CodeGenModule::EmitDeclContext(const DeclContext *DC) {
/// EmitTopLevelDecl - Emit code for a single top level declaration.
void CodeGenModule::EmitTopLevelDecl(Decl *D) {
// Ignore dependent declarations.
- if (D->getDeclContext() && D->getDeclContext()->isDependentContext())
+ if (D->isTemplated())
return;
switch (D->getKind()) {
case Decl::CXXConversion:
case Decl::CXXMethod:
case Decl::Function:
- // Skip function templates
- if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() ||
- cast<FunctionDecl>(D)->isLateTemplateParsed())
- return;
-
EmitGlobal(cast<FunctionDecl>(D));
// Always provide some coverage mapping
// even for the functions that aren't emitted.
@@ -4021,10 +4572,6 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
case Decl::Var:
case Decl::Decomposition:
- // Skip variable templates
- if (cast<VarDecl>(D)->getDescribedVarTemplate())
- return;
- LLVM_FALLTHROUGH;
case Decl::VarTemplateSpecialization:
EmitGlobal(cast<VarDecl>(D));
if (auto *DD = dyn_cast<DecompositionDecl>(D))
@@ -4083,16 +4630,9 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
DI->EmitUsingDirective(cast<UsingDirectiveDecl>(*D));
return;
case Decl::CXXConstructor:
- // Skip function templates
- if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() ||
- cast<FunctionDecl>(D)->isLateTemplateParsed())
- return;
-
getCXXABI().EmitCXXConstructors(cast<CXXConstructorDecl>(D));
break;
case Decl::CXXDestructor:
- if (cast<FunctionDecl>(D)->isLateTemplateParsed())
- return;
getCXXABI().EmitCXXDestructors(cast<CXXDestructorDecl>(D));
break;
@@ -4152,7 +4692,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
AppendLinkerOptions(PCD->getArg());
break;
case PCK_Lib:
- AddDependentLib(PCD->getArg());
+ if (getTarget().getTriple().isOSBinFormatELF() &&
+ !getTarget().getTriple().isPS4())
+ AddELFLibDirective(PCD->getArg());
+ else
+ AddDependentLib(PCD->getArg());
break;
case PCK_Compiler:
case PCK_ExeStr:
@@ -4358,9 +4902,7 @@ static void EmitGlobalDeclMetadata(CodeGenModule &CGM,
/// to such functions with an unmangled name from inline assembly within the
/// same translation unit.
void CodeGenModule::EmitStaticExternCAliases() {
- // Don't do anything if we're generating CUDA device code -- the NVPTX
- // assembly target doesn't support aliases.
- if (Context.getTargetInfo().getTriple().isNVPTX())
+ if (!getTargetCodeGenInfo().shouldEmitStaticExternCAliases())
return;
for (auto &I : StaticExternCValues) {
IdentifierInfo *Name = I.first;
@@ -4504,7 +5046,7 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
// Return a bogus pointer if RTTI is disabled, unless it's for EH.
// FIXME: should we even be calling this method if RTTI is disabled
// and it's not for EH?
- if (!ForEH && !getLangOpts().RTTI)
+ if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice)
return llvm::Constant::getNullValue(Int8PtrTy);
if (ForEH && Ty->isObjCObjectPointerType() &&
@@ -4515,6 +5057,9 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
}
void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
+ // Do not emit threadprivates in simd-only mode.
+ if (LangOpts.OpenMP && LangOpts.OpenMPSimd)
+ return;
for (auto RefExpr : D->varlists()) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(RefExpr)->getDecl());
bool PerformInit =
@@ -4529,8 +5074,10 @@ void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
}
}
-llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
- llvm::Metadata *&InternalId = MetadataIdMap[T.getCanonicalType()];
+llvm::Metadata *
+CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map,
+ StringRef Suffix) {
+ llvm::Metadata *&InternalId = Map[T.getCanonicalType()];
if (InternalId)
return InternalId;
@@ -4538,6 +5085,7 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
std::string OutName;
llvm::raw_string_ostream Out(OutName);
getCXXABI().getMangleContext().mangleTypeName(T, Out);
+ Out << Suffix;
InternalId = llvm::MDString::get(getLLVMContext(), Out.str());
} else {
@@ -4548,6 +5096,15 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
return InternalId;
}
+llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
+ return CreateMetadataIdentifierImpl(T, MetadataIdMap, "");
+}
+
+llvm::Metadata *
+CodeGenModule::CreateMetadataIdentifierForVirtualMemPtrType(QualType T) {
+ return CreateMetadataIdentifierImpl(T, VirtualMetadataIdMap, ".virtual");
+}
+
// Generalize pointer types to a void pointer with the qualifiers of the
// originally pointed-to type, e.g. 'const char *' and 'char * const *'
// generalize to 'const void *' while 'char *' and 'const char **' generalize to
@@ -4581,25 +5138,8 @@ static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) {
}
llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) {
- T = GeneralizeFunctionType(getContext(), T);
-
- llvm::Metadata *&InternalId = GeneralizedMetadataIdMap[T.getCanonicalType()];
- if (InternalId)
- return InternalId;
-
- if (isExternallyVisible(T->getLinkage())) {
- std::string OutName;
- llvm::raw_string_ostream Out(OutName);
- getCXXABI().getMangleContext().mangleTypeName(T, Out);
- Out << ".generalized";
-
- InternalId = llvm::MDString::get(getLLVMContext(), Out.str());
- } else {
- InternalId = llvm::MDNode::getDistinct(getLLVMContext(),
- llvm::ArrayRef<llvm::Metadata *>());
- }
-
- return InternalId;
+ return CreateMetadataIdentifierImpl(GeneralizeFunctionType(getContext(), T),
+ GeneralizedMetadataIdMap, ".generalized");
}
/// Returns whether this module needs the "all-vtables" type identifier.
@@ -4634,22 +5174,28 @@ void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable,
}
}
+TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const TargetAttr *TD) {
+ assert(TD != nullptr);
+ TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
+
+ ParsedAttr.Features.erase(
+ llvm::remove_if(ParsedAttr.Features,
+ [&](const std::string &Feat) {
+ return !Target.isValidFeatureName(
+ StringRef{Feat}.substr(1));
+ }),
+ ParsedAttr.Features.end());
+ return ParsedAttr;
+}
+
+
// Fills in the supplied string map with the set of target features for the
// passed in function.
void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
const FunctionDecl *FD) {
StringRef TargetCPU = Target.getTargetOpts().CPU;
if (const auto *TD = FD->getAttr<TargetAttr>()) {
- // If we have a TargetAttr build up the feature map based on that.
- TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
-
- ParsedAttr.Features.erase(
- llvm::remove_if(ParsedAttr.Features,
- [&](const std::string &Feat) {
- return !Target.isValidFeatureName(
- StringRef{Feat}.substr(1));
- }),
- ParsedAttr.Features.end());
+ TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD);
// Make a copy of the features as passed on the command line into the
// beginning of the additional features from the function to override.
@@ -4667,6 +5213,12 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
// the attribute.
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
ParsedAttr.Features);
+ } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
+ llvm::SmallVector<StringRef, 32> FeaturesTmp;
+ Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(),
+ FeaturesTmp);
+ std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
+ Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
} else {
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
Target.getTargetOpts().Features);
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 22c4463b2c81..ee64ed4f2ae2 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -324,6 +324,10 @@ private:
/// is defined once we get to the end of the of the translation unit.
std::vector<GlobalDecl> Aliases;
+ /// List of multiversion functions that have to be emitted. Used to make sure
+ /// we properly emit the iFunc.
+ std::vector<GlobalDecl> MultiVersionFuncs;
+
typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy;
ReplacementsTy Replacements;
@@ -362,6 +366,13 @@ private:
llvm::MapVector<GlobalDecl, StringRef> MangledDeclNames;
llvm::StringMap<GlobalDecl, llvm::BumpPtrAllocator> Manglings;
+ // An ordered map of canonical GlobalDecls paired with the cpu-index for
+ // cpu-specific name manglings.
+ llvm::MapVector<std::pair<GlobalDecl, unsigned>, StringRef>
+ CPUSpecificMangledDeclNames;
+ llvm::StringMap<std::pair<GlobalDecl, unsigned>, llvm::BumpPtrAllocator>
+ CPUSpecificManglings;
+
/// Global annotations.
std::vector<llvm::Constant*> Annotations;
@@ -387,10 +398,10 @@ private:
llvm::GlobalValue *> StaticExternCMap;
StaticExternCMap StaticExternCValues;
- /// \brief thread_local variables defined or used in this TU.
+ /// thread_local variables defined or used in this TU.
std::vector<const VarDecl *> CXXThreadLocals;
- /// \brief thread_local variables with initializers that need to run
+ /// thread_local variables with initializers that need to run
/// before any thread_local variable in this TU is odr-used.
std::vector<llvm::Function *> CXXThreadLocalInits;
std::vector<const VarDecl *> CXXThreadLocalInitVars;
@@ -421,14 +432,14 @@ private:
/// Global destructor functions and arguments that need to run on termination.
std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors;
- /// \brief The complete set of modules that has been imported.
+ /// The complete set of modules that has been imported.
llvm::SetVector<clang::Module *> ImportedModules;
- /// \brief The set of modules for which the module initializers
+ /// The set of modules for which the module initializers
/// have been emitted.
llvm::SmallPtrSet<clang::Module *, 16> EmittedModuleInitializers;
- /// \brief A vector of metadata strings.
+ /// A vector of metadata strings.
SmallVector<llvm::MDNode *, 16> LinkerOptionsMetadata;
/// @name Cache for Objective-C runtime types
@@ -438,7 +449,7 @@ private:
/// int * but is actually an Obj-C class pointer.
llvm::WeakTrackingVH CFConstantStringClassRef;
- /// \brief The type used to describe the state of a fast enumeration in
+ /// The type used to describe the state of a fast enumeration in
/// Objective-C's for..in loop.
QualType ObjCFastEnumerationStateType;
@@ -499,6 +510,7 @@ private:
/// MDNodes.
typedef llvm::DenseMap<QualType, llvm::Metadata *> MetadataTypeMap;
MetadataTypeMap MetadataIdMap;
+ MetadataTypeMap VirtualMetadataIdMap;
MetadataTypeMap GeneralizedMetadataIdMap;
public:
@@ -685,6 +697,11 @@ public:
TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
TBAAAccessInfo InfoB);
+ /// mergeTBAAInfoForMemoryTransfer - Get merged TBAA information for the
+ /// purposes of memory transfer calls.
+ TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+ TBAAAccessInfo SrcInfo);
+
/// getTBAAInfoForSubobject - Get TBAA information for an access with a given
/// base lvalue.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) {
@@ -710,8 +727,19 @@ public:
llvm::ConstantInt *getSize(CharUnits numChars);
/// Set the visibility for the given LLVM GlobalValue.
- void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D,
- ForDefinition_t IsForDefinition) const;
+ void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const;
+
+ void setGlobalVisibilityAndLocal(llvm::GlobalValue *GV,
+ const NamedDecl *D) const;
+
+ void setDSOLocal(llvm::GlobalValue *GV) const;
+
+ void setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl D) const;
+ void setDLLImportDLLExport(llvm::GlobalValue *GV, const NamedDecl *D) const;
+ /// Set visibility, dllimport/dllexport and dso_local.
+ /// This must be called after dllimport/dllexport is set.
+ void setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const;
+ void setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const;
/// Set the TLS mode for the given LLVM GlobalValue for the thread-local
/// variable declaration D.
@@ -757,7 +785,7 @@ public:
/// Return the llvm::Constant for the address of the given global variable.
/// If Ty is non-null and if the global doesn't exist, then it will be created
/// with the specified type instead of whatever the normal requested type
- /// would be. If IsForDefinition is true, it is guranteed that an actual
+ /// would be. If IsForDefinition is true, it is guaranteed that an actual
/// global with type Ty will be returned, not conversion of a variable with
/// the same mangled name but some other type.
llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D,
@@ -765,6 +793,13 @@ public:
ForDefinition_t IsForDefinition
= NotForDefinition);
+ /// Return the AST address space of string literal, which is used to emit
+ /// the string literal as global variable in LLVM IR.
+ /// Note: This is not necessarily the address space of the string literal
+ /// in AST. For address space agnostic language, e.g. C++, string literal
+ /// in AST is always in default address space.
+ LangAS getStringLiteralAddressSpace() const;
+
/// Return the address of the given function. If Ty is non-null, then this
/// function will use the specified type if it has to create it.
llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = nullptr,
@@ -780,7 +815,8 @@ public:
ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
/// Get the address of the thunk for the given global decl.
- llvm::Constant *GetAddrOfThunk(GlobalDecl GD, const ThunkInfo &Thunk);
+ llvm::Constant *GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
+ GlobalDecl GD);
/// Get a reference to the target of VD.
ConstantAddress GetWeakRefReference(const ValueDecl *VD);
@@ -879,12 +915,12 @@ public:
void setAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *CLE,
llvm::GlobalVariable *GV);
- /// \brief Returns a pointer to a global variable representing a temporary
+ /// Returns a pointer to a global variable representing a temporary
/// with static or thread storage duration.
ConstantAddress GetAddrOfGlobalTemporary(const MaterializeTemporaryExpr *E,
const Expr *Inner);
- /// \brief Retrieve the record type that describes the state of an
+ /// Retrieve the record type that describes the state of an
/// Objective-C fast enumeration loop (for..in).
QualType getObjCFastEnumerationStateType();
@@ -912,22 +948,22 @@ public:
/// Emit code for a single top level declaration.
void EmitTopLevelDecl(Decl *D);
- /// \brief Stored a deferred empty coverage mapping for an unused
+ /// Stored a deferred empty coverage mapping for an unused
/// and thus uninstrumented top level declaration.
void AddDeferredUnusedCoverageMapping(Decl *D);
- /// \brief Remove the deferred empty coverage mapping as this
+ /// Remove the deferred empty coverage mapping as this
/// declaration is actually instrumented.
void ClearUnusedCoverageMapping(const Decl *D);
- /// \brief Emit all the deferred coverage mappings
+ /// Emit all the deferred coverage mappings
/// for the uninstrumented functions.
void EmitDeferredUnusedCoverageMappings();
/// Tell the consumer that this variable has been instantiated.
void HandleCXXStaticMemberVarInstantiation(VarDecl *VD);
- /// \brief If the declaration has internal linkage but is inside an
+ /// If the declaration has internal linkage but is inside an
/// extern "C" linkage specification, prepare to emit an alias for it
/// to the expected name.
template<typename SomeDecl>
@@ -976,7 +1012,7 @@ public:
llvm::Constant *getMemberPointerConstant(const UnaryOperator *e);
- /// \brief Emit type info if type of an expression is a variably modified
+ /// Emit type info if type of an expression is a variably modified
/// type. Also emit proper debug info for cast types.
void EmitExplicitCastExprType(const ExplicitCastExpr *E,
CodeGenFunction *CGF = nullptr);
@@ -1002,7 +1038,7 @@ public:
/// Set the attributes on the LLVM function for the given decl and function
/// info. This applies attributes necessary for handling the ABI as well as
/// user specified attributes like section.
- void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F,
+ void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F,
const CGFunctionInfo &FI);
/// Set the LLVM function attributes (sext, zext, etc).
@@ -1061,6 +1097,10 @@ public:
/// It's up to you to ensure that this is safe.
void AddDefaultFnAttrs(llvm::Function &F);
+ /// Parses the target attributes passed in, and returns only the ones that are
+ /// valid feature names.
+ TargetAttr::ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD);
+
// Fills in the supplied string map with the set of target features for the
// passed in function.
void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
@@ -1075,25 +1115,24 @@ public:
void RefreshTypeCacheForClass(const CXXRecordDecl *Class);
- /// \brief Appends Opts to the "llvm.linker.options" metadata value.
+ /// Appends Opts to the "llvm.linker.options" metadata value.
void AppendLinkerOptions(StringRef Opts);
- /// \brief Appends a detect mismatch command to the linker options.
+ /// Appends a detect mismatch command to the linker options.
void AddDetectMismatch(StringRef Name, StringRef Value);
- /// \brief Appends a dependent lib to the "llvm.linker.options" metadata
+ /// Appends a dependent lib to the "llvm.linker.options" metadata
/// value.
void AddDependentLib(StringRef Lib);
+ void AddELFLibDirective(StringRef Lib);
+
llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD);
void setFunctionLinkage(GlobalDecl GD, llvm::Function *F) {
F->setLinkage(getFunctionLinkage(GD));
}
- /// Set the DLL storage class on F.
- void setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F);
-
/// Return the appropriate linkage for the vtable, VTT, and type information
/// of the given class.
llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD);
@@ -1158,40 +1197,29 @@ public:
DeferredVTables.push_back(RD);
}
- /// Emit code for a singal global function or var decl. Forward declarations
+ /// Emit code for a single global function or var decl. Forward declarations
/// are emitted lazily.
void EmitGlobal(GlobalDecl D);
- bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target);
bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D);
- /// Set attributes for a global definition.
- void setFunctionDefinitionAttributes(const FunctionDecl *D,
- llvm::Function *F);
-
llvm::GlobalValue *GetGlobalValue(StringRef Ref);
/// Set attributes which are common to any form of a global definition (alias,
/// Objective-C method, function, global variable).
///
/// NOTE: This should only be called for definitions.
- void SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV);
-
- /// Set attributes which must be preserved by an alias. This includes common
- /// attributes (i.e. it includes a call to SetCommonAttributes).
- ///
- /// NOTE: This should only be called for definitions.
- void setAliasAttributes(const Decl *D, llvm::GlobalValue *GV);
+ void SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV);
void addReplacement(StringRef Name, llvm::Constant *C);
void addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C);
- /// \brief Emit a code for threadprivate directive.
+ /// Emit a code for threadprivate directive.
/// \param D Threadprivate declaration.
void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
- /// \brief Emit a code for declare reduction construct.
+ /// Emit a code for declare reduction construct.
void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
CodeGenFunction *CGF = nullptr);
@@ -1212,13 +1240,18 @@ public:
/// internal identifiers).
llvm::Metadata *CreateMetadataIdentifierForType(QualType T);
+ /// Create a metadata identifier that is intended to be used to check virtual
+ /// calls via a member function pointer.
+ llvm::Metadata *CreateMetadataIdentifierForVirtualMemPtrType(QualType T);
+
/// Create a metadata identifier for the generalization of the given type.
/// This may either be an MDString (for external identifiers) or a distinct
/// unnamed MDNode (for internal identifiers).
llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T);
/// Create and attach type metadata to the given function.
- void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F);
+ void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+ llvm::Function *F);
/// Returns whether this module needs the "all-vtables" type identifier.
bool NeedAllVtablesTypeId() const;
@@ -1227,7 +1260,15 @@ public:
void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset,
const CXXRecordDecl *RD);
- /// \brief Get the declaration of std::terminate for the platform.
+ /// Return a vector of most-base classes for RD. This is used to implement
+ /// control flow integrity checks for member function pointers.
+ ///
+ /// A most-base class of a class C is defined as a recursive base class of C,
+ /// including C itself, that does not have any bases.
+ std::vector<const CXXRecordDecl *>
+ getMostBaseClasses(const CXXRecordDecl *RD);
+
+ /// Get the declaration of std::terminate for the platform.
llvm::Constant *getTerminateFn();
llvm::SanitizerStatReport &getSanStats();
@@ -1247,18 +1288,24 @@ private:
llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
ForDefinition_t IsForDefinition = NotForDefinition);
+ llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD,
+ llvm::Type *DeclTy,
+ const FunctionDecl *FD);
+ void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD);
+
llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
llvm::PointerType *PTy,
const VarDecl *D,
ForDefinition_t IsForDefinition
= NotForDefinition);
- void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO);
+ bool GetCPUAndFeaturesAttributes(const Decl *D,
+ llvm::AttrBuilder &AttrBuilder);
+ void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO);
/// Set function attributes for a function declaration.
void SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
- bool IsIncompleteFunction, bool IsThunk,
- ForDefinition_t IsForDefinition);
+ bool IsIncompleteFunction, bool IsThunk);
void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
@@ -1266,6 +1313,7 @@ private:
void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
void EmitAliasDefinition(GlobalDecl GD);
void emitIFuncDefinition(GlobalDecl GD);
+ void emitCPUDispatchDefinition(GlobalDecl GD);
void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
@@ -1274,7 +1322,7 @@ private:
void EmitDeclContext(const DeclContext *DC);
void EmitLinkageSpec(const LinkageSpecDecl *D);
- /// \brief Emit the function that initializes C++ thread_local variables.
+ /// Emit the function that initializes C++ thread_local variables.
void EmitCXXThreadLocalInitFunc();
/// Emit the function that initializes C++ globals.
@@ -1319,6 +1367,14 @@ private:
void checkAliases();
+ std::map<int, llvm::TinyPtrVector<llvm::Function *>> DtorsUsingAtExit;
+
+ /// Register functions annotated with __attribute__((destructor)) using
+ /// __cxa_atexit, if it is available, or atexit otherwise.
+ void registerGlobalDtorsWithAtExit();
+
+ void emitMultiVersionFunctions();
+
/// Emit any vtables which we deferred and still have a use for.
void EmitDeferredVTables();
@@ -1329,16 +1385,16 @@ private:
/// Emit the llvm.used and llvm.compiler.used metadata.
void emitLLVMUsed();
- /// \brief Emit the link options introduced by imported modules.
+ /// Emit the link options introduced by imported modules.
void EmitModuleLinkOptions();
- /// \brief Emit aliases for internal-linkage declarations inside "C" language
+ /// Emit aliases for internal-linkage declarations inside "C" language
/// linkage specifications, giving them the "expected" name where possible.
void EmitStaticExternCAliases();
void EmitDeclMetadata();
- /// \brief Emit the Clang version as llvm.ident metadata.
+ /// Emit the Clang version as llvm.ident metadata.
void EmitVersionIdentMetadata();
/// Emits target specific Metadata for global declarations.
@@ -1373,6 +1429,9 @@ private:
void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
bool AttrOnCallSite,
llvm::AttrBuilder &FuncAttrs);
+
+ llvm::Metadata *CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map,
+ StringRef Suffix);
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index 295893c64fbc..c8c2a1b956b8 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -58,7 +58,7 @@ enum PGOHashVersion : unsigned {
};
namespace {
-/// \brief Stable hasher for PGO region counters.
+/// Stable hasher for PGO region counters.
///
/// PGOHash produces a stable hash of a given function's control flow.
///
@@ -79,7 +79,7 @@ class PGOHash {
static const unsigned TooBig = 1u << NumBitsPerType;
public:
- /// \brief Hash values for AST nodes.
+ /// Hash values for AST nodes.
///
/// Distinct values for AST nodes that have region counters attached.
///
@@ -978,7 +978,7 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
RegionCounts = ProfRecord->Counts;
}
-/// \brief Calculate what to divide by to scale weights.
+/// Calculate what to divide by to scale weights.
///
/// Given the maximum weight, calculate a divisor that will scale all the
/// weights to strictly less than UINT32_MAX.
@@ -986,7 +986,7 @@ static uint64_t calculateWeightScale(uint64_t MaxWeight) {
return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
}
-/// \brief Scale an individual branch weight (and add 1).
+/// Scale an individual branch weight (and add 1).
///
/// Scale a 64-bit weight down to 32-bits using \c Scale.
///
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index ad473032db17..ec48231e5247 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -215,6 +215,19 @@ llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
return MetadataCache[Ty] = TypeNode;
}
+TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) {
+ // Pointee values may have incomplete types, but they shall never be
+ // dereferenced.
+ if (AccessType->isIncompleteType())
+ return TBAAAccessInfo::getIncompleteInfo();
+
+ if (TypeHasMayAlias(AccessType))
+ return TBAAAccessInfo::getMayAliasInfo();
+
+ uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity();
+ return TBAAAccessInfo(getTypeInfo(AccessType), Size);
+}
+
TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
llvm::DataLayout DL(&Module);
unsigned Size = DL.getPointerTypeSize(VTablePtrType);
@@ -391,3 +404,21 @@ CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
// access type regardless of their base types.
return TBAAAccessInfo::getMayAliasInfo();
}
+
+TBAAAccessInfo
+CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+ TBAAAccessInfo SrcInfo) {
+ if (DestInfo == SrcInfo)
+ return DestInfo;
+
+ if (!DestInfo || !SrcInfo)
+ return TBAAAccessInfo();
+
+ if (DestInfo.isMayAlias() || SrcInfo.isMayAlias())
+ return TBAAAccessInfo::getMayAliasInfo();
+
+ // TODO: Implement the rest of the logic here. For example, two accesses
+ // with same final access types result in an access to an object of that final
+ // access type regardless of their base types.
+ return TBAAAccessInfo::getMayAliasInfo();
+}
diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h
index a5b1f66bcd1a..86ba407c05c6 100644
--- a/lib/CodeGen/CodeGenTBAA.h
+++ b/lib/CodeGen/CodeGenTBAA.h
@@ -177,6 +177,10 @@ public:
/// given type.
llvm::MDNode *getTypeInfo(QualType QTy);
+ /// getAccessInfo - Get TBAA information that describes an access to
+ /// an object of the given type.
+ TBAAAccessInfo getAccessInfo(QualType AccessType);
+
/// getVTablePtrAccessInfo - Get the TBAA information that describes an
/// access to a virtual table pointer.
TBAAAccessInfo getVTablePtrAccessInfo(llvm::Type *VTablePtrType);
@@ -201,6 +205,11 @@ public:
/// purpose of conditional operator.
TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
TBAAAccessInfo InfoB);
+
+ /// mergeTBAAInfoForMemoryTransfer - Get merged TBAA information for the
+ /// purpose of memory transfer calls.
+ TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+ TBAAAccessInfo SrcInfo);
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h
index fb096ac89987..901aed6c00b2 100644
--- a/lib/CodeGen/CodeGenTypeCache.h
+++ b/lib/CodeGen/CodeGenTypeCache.h
@@ -112,8 +112,6 @@ struct CodeGenTypeCache {
llvm::CallingConv::ID RuntimeCC;
llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
- llvm::CallingConv::ID BuiltinCC;
- llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; }
LangAS getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; }
};
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 529a13b7adc8..16ec1dd301aa 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -437,8 +437,33 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::ULongLong:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
+ case BuiltinType::ShortAccum:
+ case BuiltinType::Accum:
+ case BuiltinType::LongAccum:
+ case BuiltinType::UShortAccum:
+ case BuiltinType::UAccum:
+ case BuiltinType::ULongAccum:
+ case BuiltinType::ShortFract:
+ case BuiltinType::Fract:
+ case BuiltinType::LongFract:
+ case BuiltinType::UShortFract:
+ case BuiltinType::UFract:
+ case BuiltinType::ULongFract:
+ case BuiltinType::SatShortAccum:
+ case BuiltinType::SatAccum:
+ case BuiltinType::SatLongAccum:
+ case BuiltinType::SatUShortAccum:
+ case BuiltinType::SatUAccum:
+ case BuiltinType::SatULongAccum:
+ case BuiltinType::SatShortFract:
+ case BuiltinType::SatFract:
+ case BuiltinType::SatLongFract:
+ case BuiltinType::SatUShortFract:
+ case BuiltinType::SatUFract:
+ case BuiltinType::SatULongFract:
ResultType = llvm::IntegerType::get(getLLVMContext(),
static_cast<unsigned>(Context.getTypeSize(T)));
break;
@@ -767,7 +792,7 @@ bool CodeGenTypes::isZeroInitializable(QualType T) {
// Records are non-zero-initializable if they contain any
// non-zero-initializable subobjects.
if (const RecordType *RT = T->getAs<RecordType>()) {
- auto RD = cast<RecordDecl>(RT->getDecl());
+ const RecordDecl *RD = RT->getDecl();
return isZeroInitializable(RD);
}
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index d082342bf592..fb8d31684290 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -184,7 +184,7 @@ public:
/// ConvertType - Convert type T into a llvm::Type.
llvm::Type *ConvertType(QualType T);
- /// \brief Converts the GlobalDecl into an llvm::Type. This should be used
+ /// Converts the GlobalDecl into an llvm::Type. This should be used
/// when we know the target of the function we want to convert. This is
/// because some functions (explicitly, those with pass_object_size
/// parameters) may not have the same signature as their type portrays, and
@@ -225,7 +225,7 @@ public:
/// replace the 'opaque' type we previously made for it if applicable.
void UpdateCompletedType(const TagDecl *TD);
- /// \brief Remove stale types from the type cache when an inheritance model
+ /// Remove stale types from the type cache when an inheritance model
/// gets assigned to a class.
void RefreshTypeCacheForClass(const CXXRecordDecl *RD);
@@ -313,7 +313,8 @@ public:
const FunctionProtoType *type,
RequiredArgs required,
unsigned numPrefixArgs);
- const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
+ const CGFunctionInfo &
+ arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD);
const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
CXXCtorType CT);
const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD,
@@ -334,7 +335,7 @@ public:
ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
RequiredArgs args);
- /// \brief Compute a new LLVM record layout object for the given record.
+ /// Compute a new LLVM record layout object for the given record.
CGRecordLayout *ComputeRecordLayout(const RecordDecl *D,
llvm::StructType *Ty);
diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h
index 90c9fcd8cf81..b4d1b65743c7 100644
--- a/lib/CodeGen/ConstantEmitter.h
+++ b/lib/CodeGen/ConstantEmitter.h
@@ -50,7 +50,7 @@ public:
: CGM(CGM), CGF(CGF) {}
/// Initialize this emission in the context of the given function.
- /// Use this if the expression might contain contextaul references like
+ /// Use this if the expression might contain contextual references like
/// block addresses or PredefinedExprs.
ConstantEmitter(CodeGenFunction &CGF)
: CGM(CGF.CGM), CGF(&CGF) {}
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 89a30dc7040c..2d8446463594 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -35,14 +35,14 @@ void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range, SourceLocation) {
namespace {
-/// \brief A region of source code that can be mapped to a counter.
+/// A region of source code that can be mapped to a counter.
class SourceMappingRegion {
Counter Count;
- /// \brief The region's starting location.
+ /// The region's starting location.
Optional<SourceLocation> LocStart;
- /// \brief The region's ending location.
+ /// The region's ending location.
Optional<SourceLocation> LocEnd;
/// Whether this region should be emitted after its parent is emitted.
@@ -74,7 +74,10 @@ public:
bool hasEndLoc() const { return LocEnd.hasValue(); }
- void setEndLoc(SourceLocation Loc) { LocEnd = Loc; }
+ void setEndLoc(SourceLocation Loc) {
+ assert(Loc.isValid() && "Setting an invalid end location");
+ LocEnd = Loc;
+ }
SourceLocation getEndLoc() const {
assert(LocEnd && "Region has no end location");
@@ -123,7 +126,7 @@ struct SpellingRegion {
}
};
-/// \brief Provides the common functionality for the different
+/// Provides the common functionality for the different
/// coverage mapping region builders.
class CoverageMappingBuilder {
public:
@@ -132,17 +135,17 @@ public:
const LangOptions &LangOpts;
private:
- /// \brief Map of clang's FileIDs to IDs used for coverage mapping.
+ /// Map of clang's FileIDs to IDs used for coverage mapping.
llvm::SmallDenseMap<FileID, std::pair<unsigned, SourceLocation>, 8>
FileIDMapping;
public:
- /// \brief The coverage mapping regions for this function
+ /// The coverage mapping regions for this function
llvm::SmallVector<CounterMappingRegion, 32> MappingRegions;
- /// \brief The source mapping regions for this function.
+ /// The source mapping regions for this function.
std::vector<SourceMappingRegion> SourceRegions;
- /// \brief A set of regions which can be used as a filter.
+ /// A set of regions which can be used as a filter.
///
/// It is produced by emitExpansionRegions() and is used in
/// emitSourceRegions() to suppress producing code regions if
@@ -154,7 +157,7 @@ public:
const LangOptions &LangOpts)
: CVM(CVM), SM(SM), LangOpts(LangOpts) {}
- /// \brief Return the precise end location for the given token.
+ /// Return the precise end location for the given token.
SourceLocation getPreciseTokenLocEnd(SourceLocation Loc) {
// We avoid getLocForEndOfToken here, because it doesn't do what we want for
// macro locations, which we just treat as expanded files.
@@ -163,14 +166,14 @@ public:
return Loc.getLocWithOffset(TokLen);
}
- /// \brief Return the start location of an included file or expanded macro.
+ /// Return the start location of an included file or expanded macro.
SourceLocation getStartOfFileOrMacro(SourceLocation Loc) {
if (Loc.isMacroID())
return Loc.getLocWithOffset(-SM.getFileOffset(Loc));
return SM.getLocForStartOfFile(SM.getFileID(Loc));
}
- /// \brief Return the end location of an included file or expanded macro.
+ /// Return the end location of an included file or expanded macro.
SourceLocation getEndOfFileOrMacro(SourceLocation Loc) {
if (Loc.isMacroID())
return Loc.getLocWithOffset(SM.getFileIDSize(SM.getFileID(Loc)) -
@@ -178,18 +181,18 @@ public:
return SM.getLocForEndOfFile(SM.getFileID(Loc));
}
- /// \brief Find out where the current file is included or macro is expanded.
+ /// Find out where the current file is included or macro is expanded.
SourceLocation getIncludeOrExpansionLoc(SourceLocation Loc) {
- return Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).first
+ return Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin()
: SM.getIncludeLoc(SM.getFileID(Loc));
}
- /// \brief Return true if \c Loc is a location in a built-in macro.
+ /// Return true if \c Loc is a location in a built-in macro.
bool isInBuiltin(SourceLocation Loc) {
return SM.getBufferName(SM.getSpellingLoc(Loc)) == "<built-in>";
}
- /// \brief Check whether \c Loc is included or expanded from \c Parent.
+ /// Check whether \c Loc is included or expanded from \c Parent.
bool isNestedIn(SourceLocation Loc, FileID Parent) {
do {
Loc = getIncludeOrExpansionLoc(Loc);
@@ -199,23 +202,23 @@ public:
return true;
}
- /// \brief Get the start of \c S ignoring macro arguments and builtin macros.
+ /// Get the start of \c S ignoring macro arguments and builtin macros.
SourceLocation getStart(const Stmt *S) {
SourceLocation Loc = S->getLocStart();
while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
- Loc = SM.getImmediateExpansionRange(Loc).first;
+ Loc = SM.getImmediateExpansionRange(Loc).getBegin();
return Loc;
}
- /// \brief Get the end of \c S ignoring macro arguments and builtin macros.
+ /// Get the end of \c S ignoring macro arguments and builtin macros.
SourceLocation getEnd(const Stmt *S) {
SourceLocation Loc = S->getLocEnd();
while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
- Loc = SM.getImmediateExpansionRange(Loc).first;
+ Loc = SM.getImmediateExpansionRange(Loc).getBegin();
return getPreciseTokenLocEnd(Loc);
}
- /// \brief Find the set of files we have regions for and assign IDs
+ /// Find the set of files we have regions for and assign IDs
///
/// Fills \c Mapping with the virtual file mapping needed to write out
/// coverage and collects the necessary file information to emit source and
@@ -255,7 +258,7 @@ public:
}
}
- /// \brief Get the coverage mapping file ID for \c Loc.
+ /// Get the coverage mapping file ID for \c Loc.
///
/// If such file id doesn't exist, return None.
Optional<unsigned> getCoverageFileID(SourceLocation Loc) {
@@ -265,7 +268,7 @@ public:
return None;
}
- /// \brief Gather all the regions that were skipped by the preprocessor
+ /// Gather all the regions that were skipped by the preprocessor
/// using the constructs like #if.
void gatherSkippedRegions() {
/// An array of the minimum lineStarts and the maximum lineEnds
@@ -295,14 +298,14 @@ public:
auto Region = CounterMappingRegion::makeSkipped(
*CovFileID, SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd);
// Make sure that we only collect the regions that are inside
- // the souce code of this function.
+ // the source code of this function.
if (Region.LineStart >= FileLineRanges[*CovFileID].first &&
Region.LineEnd <= FileLineRanges[*CovFileID].second)
MappingRegions.push_back(Region);
}
}
- /// \brief Generate the coverage counter mapping regions from collected
+ /// Generate the coverage counter mapping regions from collected
/// source regions.
void emitSourceRegions(const SourceRegionFilter &Filter) {
for (const auto &Region : SourceRegions) {
@@ -347,7 +350,7 @@ public:
}
}
- /// \brief Generate expansion regions for each virtual file we've seen.
+ /// Generate expansion regions for each virtual file we've seen.
SourceRegionFilter emitExpansionRegions() {
SourceRegionFilter Filter;
for (const auto &FM : FileIDMapping) {
@@ -377,7 +380,7 @@ public:
}
};
-/// \brief Creates unreachable coverage regions for the functions that
+/// Creates unreachable coverage regions for the functions that
/// are not emitted.
struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
EmptyCoverageMappingBuilder(CoverageMappingModuleGen &CVM, SourceManager &SM,
@@ -411,7 +414,7 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
SourceRegions.emplace_back(Counter(), Start, End);
}
- /// \brief Write the mapping data to the output stream
+ /// Write the mapping data to the output stream
void write(llvm::raw_ostream &OS) {
SmallVector<unsigned, 16> FileIDMapping;
gatherFileIDs(FileIDMapping);
@@ -425,15 +428,15 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
}
};
-/// \brief A StmtVisitor that creates coverage mapping regions which map
+/// A StmtVisitor that creates coverage mapping regions which map
/// from the source code locations to the PGO counters.
struct CounterCoverageMappingBuilder
: public CoverageMappingBuilder,
public ConstStmtVisitor<CounterCoverageMappingBuilder> {
- /// \brief The map of statements to count values.
+ /// The map of statements to count values.
llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
- /// \brief A stack of currently live regions.
+ /// A stack of currently live regions.
std::vector<SourceMappingRegion> RegionStack;
/// The currently deferred region: its end location and count can be set once
@@ -442,7 +445,7 @@ struct CounterCoverageMappingBuilder
CounterExpressionBuilder Builder;
- /// \brief A location in the most recently visited file or macro.
+ /// A location in the most recently visited file or macro.
///
/// This is used to adjust the active source regions appropriately when
/// expressions cross file or macro boundaries.
@@ -451,12 +454,12 @@ struct CounterCoverageMappingBuilder
/// Location of the last terminated region.
Optional<std::pair<SourceLocation, size_t>> LastTerminatedRegion;
- /// \brief Return a counter for the subtraction of \c RHS from \c LHS
+ /// Return a counter for the subtraction of \c RHS from \c LHS
Counter subtractCounters(Counter LHS, Counter RHS) {
return Builder.subtract(LHS, RHS);
}
- /// \brief Return a counter for the sum of \c LHS and \c RHS.
+ /// Return a counter for the sum of \c LHS and \c RHS.
Counter addCounters(Counter LHS, Counter RHS) {
return Builder.add(LHS, RHS);
}
@@ -465,14 +468,14 @@ struct CounterCoverageMappingBuilder
return addCounters(addCounters(C1, C2), C3);
}
- /// \brief Return the region counter for the given statement.
+ /// Return the region counter for the given statement.
///
/// This should only be called on statements that have a dedicated counter.
Counter getRegionCounter(const Stmt *S) {
return Counter::getCounter(CounterMap[S]);
}
- /// \brief Push a region onto the stack.
+ /// Push a region onto the stack.
///
/// Returns the index on the stack where the region was pushed. This can be
/// used with popRegions to exit a "scope", ending the region that was pushed.
@@ -549,7 +552,7 @@ struct CounterCoverageMappingBuilder
completeDeferred(Count, DeferredEndLoc);
}
- /// \brief Pop regions from the stack into the function's list of regions.
+ /// Pop regions from the stack into the function's list of regions.
///
/// Adds all regions from \c ParentIndex to the top of the stack to the
/// function's \c SourceRegions.
@@ -616,13 +619,13 @@ struct CounterCoverageMappingBuilder
assert(!ParentOfDeferredRegion && "Deferred region with no parent");
}
- /// \brief Return the currently active region.
+ /// Return the currently active region.
SourceMappingRegion &getRegion() {
assert(!RegionStack.empty() && "statement has no region");
return RegionStack.back();
}
- /// \brief Propagate counts through the children of \c S.
+ /// Propagate counts through the children of \c S.
Counter propagateCounts(Counter TopCount, const Stmt *S) {
SourceLocation StartLoc = getStart(S);
SourceLocation EndLoc = getEnd(S);
@@ -639,7 +642,7 @@ struct CounterCoverageMappingBuilder
return ExitCount;
}
- /// \brief Check whether a region with bounds \c StartLoc and \c EndLoc
+ /// Check whether a region with bounds \c StartLoc and \c EndLoc
/// is already added to \c SourceRegions.
bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc) {
return SourceRegions.rend() !=
@@ -650,7 +653,7 @@ struct CounterCoverageMappingBuilder
});
}
- /// \brief Adjust the most recently visited location to \c EndLoc.
+ /// Adjust the most recently visited location to \c EndLoc.
///
/// This should be used after visiting any statements in non-source order.
void adjustForOutOfOrderTraversal(SourceLocation EndLoc) {
@@ -667,7 +670,7 @@ struct CounterCoverageMappingBuilder
MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation);
}
- /// \brief Adjust regions and state when \c NewLoc exits a file.
+ /// Adjust regions and state when \c NewLoc exits a file.
///
/// If moving from our most recently tracked location to \c NewLoc exits any
/// files, this adjusts our current region stack and creates the file regions
@@ -734,7 +737,7 @@ struct CounterCoverageMappingBuilder
MostRecentLocation = NewLoc;
}
- /// \brief Ensure that \c S is included in the current region.
+ /// Ensure that \c S is included in the current region.
void extendRegion(const Stmt *S) {
SourceMappingRegion &Region = getRegion();
SourceLocation StartLoc = getStart(S);
@@ -746,7 +749,7 @@ struct CounterCoverageMappingBuilder
completeDeferred(Region.getCounter(), StartLoc);
}
- /// \brief Mark \c S as a terminator, starting a zero region.
+ /// Mark \c S as a terminator, starting a zero region.
void terminateRegion(const Stmt *S) {
extendRegion(S);
SourceMappingRegion &Region = getRegion();
@@ -791,7 +794,7 @@ struct CounterCoverageMappingBuilder
popRegions(Index);
}
- /// \brief Keep counts of breaks and continues inside loops.
+ /// Keep counts of breaks and continues inside loops.
struct BreakContinue {
Counter BreakCount;
Counter ContinueCount;
@@ -805,7 +808,7 @@ struct CounterCoverageMappingBuilder
: CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap),
DeferredRegion(None) {}
- /// \brief Write the mapping data to the output stream
+ /// Write the mapping data to the output stream
void write(llvm::raw_ostream &OS) {
llvm::SmallVector<unsigned, 8> VirtualFileMapping;
gatherFileIDs(VirtualFileMapping);
@@ -831,22 +834,6 @@ struct CounterCoverageMappingBuilder
handleFileExit(getEnd(S));
}
- /// Determine whether the final deferred region emitted in \p Body should be
- /// discarded.
- static bool discardFinalDeferredRegionInDecl(Stmt *Body) {
- if (auto *CS = dyn_cast<CompoundStmt>(Body)) {
- Stmt *LastStmt = CS->body_back();
- if (auto *IfElse = dyn_cast<IfStmt>(LastStmt)) {
- if (auto *Else = dyn_cast_or_null<CompoundStmt>(IfElse->getElse()))
- LastStmt = Else->body_back();
- else
- LastStmt = IfElse->getElse();
- }
- return dyn_cast_or_null<ReturnStmt>(LastStmt);
- }
- return false;
- }
-
void VisitDecl(const Decl *D) {
assert(!DeferredRegion && "Deferred region never completed");
@@ -856,17 +843,13 @@ struct CounterCoverageMappingBuilder
if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body))))
return;
- Counter ExitCount = propagateCounts(getRegionCounter(Body), Body);
+ propagateCounts(getRegionCounter(Body), Body);
assert(RegionStack.empty() && "Regions entered but never exited");
- if (DeferredRegion) {
- // Complete (or discard) any deferred regions introduced by the last
- // statement.
- if (discardFinalDeferredRegionInDecl(Body))
- DeferredRegion = None;
- else
- popRegions(completeDeferred(ExitCount, getEnd(Body)));
- }
+ // Discard the last uncompleted deferred region in a decl, if one exists.
+ // This prevents lines at the end of a function containing only whitespace
+ // or closing braces from being marked as uncovered.
+ DeferredRegion = None;
}
void VisitReturnStmt(const ReturnStmt *S) {
@@ -889,6 +872,7 @@ struct CounterCoverageMappingBuilder
Counter LabelCount = getRegionCounter(S);
SourceLocation Start = getStart(S);
completeTopLevelDeferredRegion(LabelCount, Start);
+ completeDeferred(LabelCount, Start);
// We can't extendRegion here or we risk overlapping with our new region.
handleFileExit(Start);
pushRegion(LabelCount, Start);
@@ -979,20 +963,28 @@ struct CounterCoverageMappingBuilder
Counter ParentCount = getRegion().getCounter();
Counter BodyCount = getRegionCounter(S);
+ // The loop increment may contain a break or continue.
+ if (S->getInc())
+ BreakContinueStack.emplace_back();
+
// Handle the body first so that we can get the backedge count.
- BreakContinueStack.push_back(BreakContinue());
+ BreakContinueStack.emplace_back();
extendRegion(S->getBody());
Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
- BreakContinue BC = BreakContinueStack.pop_back_val();
+ BreakContinue BodyBC = BreakContinueStack.pop_back_val();
// The increment is essentially part of the body but it needs to include
// the count for all the continue statements.
- if (const Stmt *Inc = S->getInc())
- propagateCounts(addCounters(BackedgeCount, BC.ContinueCount), Inc);
+ BreakContinue IncrementBC;
+ if (const Stmt *Inc = S->getInc()) {
+ propagateCounts(addCounters(BackedgeCount, BodyBC.ContinueCount), Inc);
+ IncrementBC = BreakContinueStack.pop_back_val();
+ }
// Go back to handle the condition.
- Counter CondCount =
- addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
+ Counter CondCount = addCounters(
+ addCounters(ParentCount, BackedgeCount, BodyBC.ContinueCount),
+ IncrementBC.ContinueCount);
if (const Expr *Cond = S->getCond()) {
propagateCounts(CondCount, Cond);
adjustForOutOfOrderTraversal(getEnd(S));
@@ -1004,8 +996,8 @@ struct CounterCoverageMappingBuilder
if (Gap)
fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
- Counter OutCount =
- addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
+ Counter OutCount = addCounters(BodyBC.BreakCount, IncrementBC.BreakCount,
+ subtractCounters(CondCount, BodyCount));
if (OutCount != ParentCount)
pushRegion(OutCount);
}
@@ -1361,8 +1353,7 @@ void CoverageMappingModuleGen::emit() {
// and coverage mappings is a multiple of 8.
if (size_t Rem = OS.str().size() % 8) {
CoverageMappingSize += 8 - Rem;
- for (size_t I = 0, S = 8 - Rem; I < S; ++I)
- OS << '\0';
+ OS.write_zeros(8 - Rem);
}
auto *FilenamesAndMappingsVal =
llvm::ConstantDataArray::getString(Ctx, OS.str(), false);
diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h
index d07ed5ebcf2b..b08ad896d7a5 100644
--- a/lib/CodeGen/CoverageMappingGen.h
+++ b/lib/CodeGen/CoverageMappingGen.h
@@ -31,7 +31,7 @@ class Preprocessor;
class Decl;
class Stmt;
-/// \brief Stores additional source code information like skipped ranges which
+/// Stores additional source code information like skipped ranges which
/// is required by the coverage mapping generator and is obtained from
/// the preprocessor.
class CoverageSourceInfo : public PPCallbacks {
@@ -46,7 +46,7 @@ namespace CodeGen {
class CodeGenModule;
-/// \brief Organizes the cross-function state that is used while generating
+/// Organizes the cross-function state that is used while generating
/// code coverage mapping data.
class CoverageMappingModuleGen {
CodeGenModule &CGM;
@@ -65,7 +65,7 @@ public:
return SourceInfo;
}
- /// \brief Add a function's coverage mapping record to the collection of the
+ /// Add a function's coverage mapping record to the collection of the
/// function mapping records.
void addFunctionMappingRecord(llvm::GlobalVariable *FunctionName,
StringRef FunctionNameValue,
@@ -73,15 +73,15 @@ public:
const std::string &CoverageMapping,
bool IsUsed = true);
- /// \brief Emit the coverage mapping data for a translation unit.
+ /// Emit the coverage mapping data for a translation unit.
void emit();
- /// \brief Return the coverage mapping translation unit file id
+ /// Return the coverage mapping translation unit file id
/// for the given file.
unsigned getFileID(const FileEntry *File);
};
-/// \brief Organizes the per-function state that is used while generating
+/// Organizes the per-function state that is used while generating
/// code coverage mapping data.
class CoverageMappingGen {
CoverageMappingModuleGen &CVM;
@@ -99,12 +99,12 @@ public:
llvm::DenseMap<const Stmt *, unsigned> *CounterMap)
: CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(CounterMap) {}
- /// \brief Emit the coverage mapping data which maps the regions of
+ /// Emit the coverage mapping data which maps the regions of
/// code to counters that will be used to find the execution
/// counts for those regions.
void emitCounterMapping(const Decl *D, llvm::raw_ostream &OS);
- /// \brief Emit the coverage mapping data for an unused function.
+ /// Emit the coverage mapping data for an unused function.
/// It creates mapping regions with the counter of zero.
void emitEmptyMapping(const Decl *D, llvm::raw_ostream &OS);
};
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index c375b82ea936..16fdd1c16a1d 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -31,9 +31,11 @@
#include "clang/AST/StmtCXX.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/ScopedPrinter.h"
using namespace clang;
using namespace CodeGen;
@@ -63,13 +65,6 @@ public:
bool classifyReturnType(CGFunctionInfo &FI) const override;
bool passClassIndirect(const CXXRecordDecl *RD) const {
- // Clang <= 4 used the pre-C++11 rule, which ignores move operations.
- // The PS4 platform ABI follows the behavior of Clang 3.2.
- if (CGM.getCodeGenOpts().getClangABICompat() <=
- CodeGenOptions::ClangABI::Ver4 ||
- CGM.getTriple().getOS() == llvm::Triple::PS4)
- return RD->hasNonTrivialDestructor() ||
- RD->hasNonTrivialCopyConstructor();
return !canCopyArgument(RD);
}
@@ -187,8 +182,7 @@ public:
emitTerminateForUnexpectedException(CodeGenFunction &CGF,
llvm::Value *Exn) override;
- void EmitFundamentalRTTIDescriptor(QualType Type, bool DLLExport);
- void EmitFundamentalRTTIDescriptors(bool DLLExport);
+ void EmitFundamentalRTTIDescriptors(const CXXRecordDecl *RD);
llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
CatchTypeInfo
getAddrOfCXXCatchHandlerType(QualType Ty,
@@ -300,16 +294,11 @@ public:
// linkage together with vtables when needed.
if (ForVTable && !Thunk->hasLocalLinkage())
Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
-
- // Propagate dllexport storage, to enable the linker to generate import
- // thunks as necessary (e.g. when a parent class has a key function and a
- // child class doesn't, and the construction vtable for the parent in the
- // child needs to reference the parent's thunks).
- const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- if (MD->hasAttr<DLLExportAttr>())
- Thunk->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ CGM.setGVProperties(Thunk, GD);
}
+ bool exportThunk() override { return true; }
+
llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
const ThisAdjustment &TA) override;
@@ -480,6 +469,7 @@ public:
explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM)
: ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
/*UseARMGuardVarABI=*/true) {}
+ void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;
private:
bool HasThisReturn(GlobalDecl GD) const override {
@@ -632,13 +622,53 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty);
VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy);
}
- VTable = Builder.CreateGEP(VTable, VTableOffset);
+ // Compute the address of the virtual function pointer.
+ llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
+
+ // Check the address of the function pointer if CFI on member function
+ // pointers is enabled.
+ llvm::Constant *CheckSourceLocation;
+ llvm::Constant *CheckTypeDesc;
+ bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) &&
+ CGM.HasHiddenLTOVisibility(RD);
+ if (ShouldEmitCFICheck) {
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
+ CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getLocStart());
+ CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0));
+ llvm::Constant *StaticData[] = {
+ llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall),
+ CheckSourceLocation,
+ CheckTypeDesc,
+ };
+
+ llvm::Metadata *MD =
+ CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
+ llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
+
+ llvm::Value *TypeTest = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId});
+
+ if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) {
+ CGF.EmitTrapCheck(TypeTest);
+ } else {
+ llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+ CGM.getLLVMContext(),
+ llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+ llvm::Value *ValidVtable = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables});
+ CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall),
+ SanitizerHandler::CFICheckFail, StaticData,
+ {VTable, ValidVtable});
+ }
+
+ FnVirtual = Builder.GetInsertBlock();
+ }
// Load the virtual function to call.
- VTable = Builder.CreateBitCast(VTable, FTy->getPointerTo()->getPointerTo());
- llvm::Value *VirtualFn =
- Builder.CreateAlignedLoad(VTable, CGF.getPointerAlign(),
- "memptr.virtualfn");
+ VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo());
+ llvm::Value *VirtualFn = Builder.CreateAlignedLoad(
+ VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn");
CGF.EmitBranch(FnEnd);
// In the non-virtual path, the function pointer is actually a
@@ -647,6 +677,43 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
llvm::Value *NonVirtualFn =
Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn");
+ // Check the function pointer if CFI on member function pointers is enabled.
+ if (ShouldEmitCFICheck) {
+ CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+ if (RD->hasDefinition()) {
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
+ llvm::Constant *StaticData[] = {
+ llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_NVMFCall),
+ CheckSourceLocation,
+ CheckTypeDesc,
+ };
+
+ llvm::Value *Bit = Builder.getFalse();
+ llvm::Value *CastedNonVirtualFn =
+ Builder.CreateBitCast(NonVirtualFn, CGF.Int8PtrTy);
+ for (const CXXRecordDecl *Base : CGM.getMostBaseClasses(RD)) {
+ llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(
+ getContext().getMemberPointerType(
+ MPT->getPointeeType(),
+ getContext().getRecordType(Base).getTypePtr()));
+ llvm::Value *TypeId =
+ llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
+
+ llvm::Value *TypeTest =
+ Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test),
+ {CastedNonVirtualFn, TypeId});
+ Bit = Builder.CreateOr(Bit, TypeTest);
+ }
+
+ CGF.EmitCheck(std::make_pair(Bit, SanitizerKind::CFIMFCall),
+ SanitizerHandler::CFICheckFail, StaticData,
+ {CastedNonVirtualFn, llvm::UndefValue::get(CGF.IntPtrTy)});
+
+ FnNonVirtual = Builder.GetInsertBlock();
+ }
+ }
+
// We're done.
CGF.EmitBlock(FnEnd);
llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2);
@@ -836,7 +903,6 @@ ItaniumCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
CharUnits ThisAdjustment) {
assert(MD->isInstance() && "Member function must not be static!");
- MD = MD->getCanonicalDecl();
CodeGenTypes &Types = CGM.getTypes();
@@ -1182,7 +1248,7 @@ static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast");
}
-/// \brief Compute the src2dst_offset hint as described in the
+/// Compute the src2dst_offset hint as described in the
/// Itanium C++ ABI [2.9.7]
static CharUnits computeOffsetHint(ASTContext &Context,
const CXXRecordDecl *Src,
@@ -1448,7 +1514,7 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
return;
/// Initialize the 'this' slot. In the Itanium C++ ABI, no prologue
- /// adjustments are required, becuase they are all handled by thunks.
+ /// adjustments are required, because they are all handled by thunks.
setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF));
/// Initialize the 'vtt' slot if needed.
@@ -1479,8 +1545,7 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
llvm::Value *VTT =
CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
- Args.insert(Args.begin() + 1,
- CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
+ Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy));
return AddedStructorArgs::prefix(1); // Added one arg.
}
@@ -1531,7 +1596,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName()));
// Set the right visibility.
- CGM.setGlobalVisibility(VTable, RD, ForDefinition);
+ CGM.setGVProperties(VTable, RD);
// Use pointer alignment for the vtable. Otherwise we would align them based
// on the size of the initializer which doesn't make sense as only single
@@ -1548,7 +1613,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
isa<NamespaceDecl>(DC) && cast<NamespaceDecl>(DC)->getIdentifier() &&
cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") &&
DC->getParent()->isTranslationUnit())
- EmitFundamentalRTTIDescriptors(RD->hasAttr<DLLExportAttr>());
+ EmitFundamentalRTTIDescriptors(RD);
if (!VTable->isDeclarationForLinker())
CGM.EmitVTableTypeMetadata(VTable, VTLayout);
@@ -1641,12 +1706,8 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, VTableType, llvm::GlobalValue::ExternalLinkage);
VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- CGM.setGlobalVisibility(VTable, RD, NotForDefinition);
- if (RD->hasAttr<DLLImportAttr>())
- VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- else if (RD->hasAttr<DLLExportAttr>())
- VTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ CGM.setGVProperties(VTable, RD);
return VTable;
}
@@ -1656,7 +1717,6 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
Address This,
llvm::Type *Ty,
SourceLocation Loc) {
- GD = GD.getCanonicalDecl();
Ty = Ty->getPointerTo()->getPointerTo();
auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());
@@ -1690,7 +1750,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
VFunc = VFuncLoad;
}
- CGCallee Callee(MethodDecl, VFunc);
+ CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc);
return Callee;
}
@@ -1702,10 +1762,9 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
Dtor, getFromDtorType(DtorType));
- llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
+ llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
CGCallee Callee =
- getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty,
- CE ? CE->getLocStart() : SourceLocation());
+ CGCallee::forVirtual(CE, GlobalDecl(Dtor, DtorType), This, Ty);
CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(),
This.getPointer(), /*ImplicitParam=*/nullptr,
@@ -1725,11 +1784,19 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
if (CGM.getLangOpts().AppleKext)
return false;
- // If we don't have any not emitted inline virtual function, and if vtable is
- // not hidden, then we are safe to emit available_externally copy of vtable.
+ // If the vtable is hidden then it is not safe to emit an available_externally
+ // copy of vtable.
+ if (isVTableHidden(RD))
+ return false;
+
+ if (CGM.getCodeGenOpts().ForceEmitVTables)
+ return true;
+
+ // If we don't have any not emitted inline virtual function then we are safe
+ // to emit an available_externally copy of vtable.
// FIXME we can still emit a copy of the vtable if we
// can emit definition of the inline functions.
- return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD);
+ return !hasAnyUnusedVirtualInlineFunction(RD);
}
static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
Address InitialPtr,
@@ -1848,7 +1915,8 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
// Handle the array cookie specially in ASan.
if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
- expr->getOperatorNew()->isReplaceableGlobalAllocationFunction()) {
+ (expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() ||
+ CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) {
// The store to the CookiePtr does not need to be instrumented.
CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
llvm::FunctionType *FTy =
@@ -2052,6 +2120,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
false, var->getLinkage(),
llvm::ConstantInt::get(guardTy, 0),
guardName.str());
+ guard->setDSOLocal(var->isDSOLocal());
guard->setVisibility(var->getVisibility());
// If the variable is thread-local, so is its guard variable.
guard->setThreadLocalMode(var->getThreadLocalMode());
@@ -2211,6 +2280,13 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ if (!addr)
+ // addr is null when we are trying to register a dtor annotated with
+ // __attribute__((destructor)) in a constructor function. Using null here is
+ // okay because this argument is just passed back to the destructor
+ // function.
+ addr = llvm::Constant::getNullValue(CGF.Int8PtrTy);
+
llvm::Value *args[] = {
llvm::ConstantExpr::getBitCast(dtor, dtorTy),
llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy),
@@ -2219,6 +2295,48 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
CGF.EmitNounwindRuntimeCall(atexit, args);
}
+void CodeGenModule::registerGlobalDtorsWithAtExit() {
+ for (const auto I : DtorsUsingAtExit) {
+ int Priority = I.first;
+ const llvm::TinyPtrVector<llvm::Function *> &Dtors = I.second;
+
+ // Create a function that registers destructors that have the same priority.
+ //
+ // Since constructor functions are run in non-descending order of their
+ // priorities, destructors are registered in non-descending order of their
+ // priorities, and since destructor functions are run in the reverse order
+ // of their registration, destructor functions are run in non-ascending
+ // order of their priorities.
+ CodeGenFunction CGF(*this);
+ std::string GlobalInitFnName =
+ std::string("__GLOBAL_init_") + llvm::to_string(Priority);
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
+ llvm::Function *GlobalInitFn = CreateGlobalInitOrDestructFunction(
+ FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(),
+ SourceLocation());
+ ASTContext &Ctx = getContext();
+ FunctionDecl *FD = FunctionDecl::Create(
+ Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
+ &Ctx.Idents.get(GlobalInitFnName), Ctx.VoidTy, nullptr, SC_Static,
+ false, false);
+ CGF.StartFunction(GlobalDecl(FD), getContext().VoidTy, GlobalInitFn,
+ getTypes().arrangeNullaryFunction(), FunctionArgList(),
+ SourceLocation(), SourceLocation());
+
+ for (auto *Dtor : Dtors) {
+ // Register the destructor function calling __cxa_atexit if it is
+ // available. Otherwise fall back on calling atexit.
+ if (getCodeGenOpts().CXAAtExit)
+ emitGlobalDtorWithCXAAtExit(CGF, Dtor, nullptr, false);
+ else
+ CGF.registerGlobalDtorWithAtExit(Dtor);
+ }
+
+ CGF.FinishFunction();
+ AddGlobalCtor(GlobalInitFn, Priority, nullptr);
+ }
+}
+
/// Register a global destructor as best as we know how.
void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
const VarDecl &D,
@@ -2407,8 +2525,10 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
}
- if (Init)
+ if (Init) {
Init->setVisibility(Var->getVisibility());
+ Init->setDSOLocal(Var->isDSOLocal());
+ }
llvm::LLVMContext &Context = CGM.getModule().getContext();
llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
@@ -2416,8 +2536,12 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
if (InitIsInitFunc) {
if (Init) {
llvm::CallInst *CallVal = Builder.CreateCall(Init);
- if (isThreadWrapperReplaceable(VD, CGM))
+ if (isThreadWrapperReplaceable(VD, CGM)) {
CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+ llvm::Function *Fn =
+ cast<llvm::Function>(cast<llvm::GlobalAlias>(Init)->getAliasee());
+ Fn->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+ }
}
} else {
// Don't know whether we have an init function. Call it if it exists.
@@ -2574,12 +2698,16 @@ public:
BCTI_Public = 0x2
};
+ /// BuildTypeInfo - Build the RTTI type info struct for the given type, or
+ /// link to an existing RTTI descriptor if one already exists.
+ llvm::Constant *BuildTypeInfo(QualType Ty);
+
/// BuildTypeInfo - Build the RTTI type info struct for the given type.
- ///
- /// \param Force - true to force the creation of this RTTI value
- /// \param DLLExport - true to mark the RTTI value as DLLExport
- llvm::Constant *BuildTypeInfo(QualType Ty, bool Force = false,
- bool DLLExport = false);
+ llvm::Constant *BuildTypeInfo(
+ QualType Ty,
+ llvm::GlobalVariable::LinkageTypes Linkage,
+ llvm::GlobalValue::VisibilityTypes Visibility,
+ llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass);
};
}
@@ -2622,11 +2750,8 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
/*Constant=*/true,
llvm::GlobalValue::ExternalLinkage, nullptr,
Name);
- if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
- const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
- if (RD->hasAttr<DLLImportAttr>())
- GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
- }
+ const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+ CGM.setGVProperties(GV, RD);
}
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
@@ -2673,6 +2798,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::LongDouble:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Int128:
@@ -2687,6 +2813,30 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
case BuiltinType::OCLReserveID:
+ case BuiltinType::ShortAccum:
+ case BuiltinType::Accum:
+ case BuiltinType::LongAccum:
+ case BuiltinType::UShortAccum:
+ case BuiltinType::UAccum:
+ case BuiltinType::ULongAccum:
+ case BuiltinType::ShortFract:
+ case BuiltinType::Fract:
+ case BuiltinType::LongFract:
+ case BuiltinType::UShortFract:
+ case BuiltinType::UFract:
+ case BuiltinType::ULongFract:
+ case BuiltinType::SatShortAccum:
+ case BuiltinType::SatAccum:
+ case BuiltinType::SatLongAccum:
+ case BuiltinType::SatUShortAccum:
+ case BuiltinType::SatUAccum:
+ case BuiltinType::SatULongAccum:
+ case BuiltinType::SatShortFract:
+ case BuiltinType::SatFract:
+ case BuiltinType::SatLongFract:
+ case BuiltinType::SatUShortFract:
+ case BuiltinType::SatUFract:
+ case BuiltinType::SatULongFract:
return false;
case BuiltinType::Dependent:
@@ -2761,6 +2911,11 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
// N.B. We must always emit the RTTI data ourselves if there exists a key
// function.
bool IsDLLImport = RD->hasAttr<DLLImportAttr>();
+
+ // Don't import the RTTI but emit it locally.
+ if (CGM.getTriple().isWindowsGNUEnvironment() && IsDLLImport)
+ return false;
+
if (CGM.getVTables().isVTableExternal(RD))
return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
? false
@@ -2953,6 +3108,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
llvm::Constant *VTable =
CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);
+ CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts()));
llvm::Type *PtrDiffTy =
CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
@@ -2966,7 +3122,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
Fields.push_back(VTable);
}
-/// \brief Return the linkage that the type info and type info name constants
+/// Return the linkage that the type info and type info name constants
/// should have for the given type.
static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
QualType Ty) {
@@ -3020,8 +3176,7 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
llvm_unreachable("Invalid linkage!");
}
-llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
- bool DLLExport) {
+llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty) {
// We want to operate on the canonical type.
Ty = Ty.getCanonicalType();
@@ -3039,17 +3194,41 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
}
// Check if there is already an external RTTI descriptor for this type.
- bool IsStdLib = IsStandardLibraryRTTIDescriptor(Ty);
- if (!Force && (IsStdLib || ShouldUseExternalRTTIDescriptor(CGM, Ty)))
+ if (IsStandardLibraryRTTIDescriptor(Ty) ||
+ ShouldUseExternalRTTIDescriptor(CGM, Ty))
return GetAddrOfExternalRTTIDescriptor(Ty);
// Emit the standard library with external linkage.
- llvm::GlobalVariable::LinkageTypes Linkage;
- if (IsStdLib)
- Linkage = llvm::GlobalValue::ExternalLinkage;
+ llvm::GlobalVariable::LinkageTypes Linkage = getTypeInfoLinkage(CGM, Ty);
+
+ // Give the type_info object and name the formal visibility of the
+ // type itself.
+ llvm::GlobalValue::VisibilityTypes llvmVisibility;
+ if (llvm::GlobalValue::isLocalLinkage(Linkage))
+ // If the linkage is local, only default visibility makes sense.
+ llvmVisibility = llvm::GlobalValue::DefaultVisibility;
+ else if (CXXABI.classifyRTTIUniqueness(Ty, Linkage) ==
+ ItaniumCXXABI::RUK_NonUniqueHidden)
+ llvmVisibility = llvm::GlobalValue::HiddenVisibility;
else
- Linkage = getTypeInfoLinkage(CGM, Ty);
+ llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());
+
+ llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass =
+ llvm::GlobalValue::DefaultStorageClass;
+ if (CGM.getTriple().isWindowsItaniumEnvironment()) {
+ auto RD = Ty->getAsCXXRecordDecl();
+ if (RD && RD->hasAttr<DLLExportAttr>())
+ DLLStorageClass = llvm::GlobalValue::DLLExportStorageClass;
+ }
+ return BuildTypeInfo(Ty, Linkage, llvmVisibility, DLLStorageClass);
+}
+
+llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
+ QualType Ty,
+ llvm::GlobalVariable::LinkageTypes Linkage,
+ llvm::GlobalValue::VisibilityTypes Visibility,
+ llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass) {
// Add the vtable pointer.
BuildVTablePointer(cast<Type>(Ty));
@@ -3163,7 +3342,11 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields);
+ SmallString<256> Name;
+ llvm::raw_svector_ostream Out(Name);
+ CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
llvm::Module &M = CGM.getModule();
+ llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name);
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(M, Init->getType(),
/*Constant=*/true, Linkage, Init, Name);
@@ -3195,37 +3378,14 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
// All of this is to say that it's important that both the type_info
// object and the type_info name be uniqued when weakly emitted.
- // Give the type_info object and name the formal visibility of the
- // type itself.
- llvm::GlobalValue::VisibilityTypes llvmVisibility;
- if (llvm::GlobalValue::isLocalLinkage(Linkage))
- // If the linkage is local, only default visibility makes sense.
- llvmVisibility = llvm::GlobalValue::DefaultVisibility;
- else if (RTTIUniqueness == ItaniumCXXABI::RUK_NonUniqueHidden)
- llvmVisibility = llvm::GlobalValue::HiddenVisibility;
- else
- llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());
+ TypeName->setVisibility(Visibility);
+ CGM.setDSOLocal(TypeName);
- TypeName->setVisibility(llvmVisibility);
- GV->setVisibility(llvmVisibility);
+ GV->setVisibility(Visibility);
+ CGM.setDSOLocal(GV);
- if (CGM.getTriple().isWindowsItaniumEnvironment()) {
- auto RD = Ty->getAsCXXRecordDecl();
- if (DLLExport || (RD && RD->hasAttr<DLLExportAttr>())) {
- TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- } else if (RD && RD->hasAttr<DLLImportAttr>() &&
- ShouldUseExternalRTTIDescriptor(CGM, Ty)) {
- TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-
- // Because the typename and the typeinfo are DLL import, convert them to
- // declarations rather than definitions. The initializers still need to
- // be constructed to calculate the type for the declarations.
- TypeName->setInitializer(nullptr);
- GV->setInitializer(nullptr);
- }
- }
+ TypeName->setDLLStorageClass(DLLStorageClass);
+ GV->setDLLStorageClass(DLLStorageClass);
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
}
@@ -3433,11 +3593,9 @@ static unsigned extractPBaseFlags(ASTContext &Ctx, QualType &Type) {
Flags |= ItaniumRTTIBuilder::PTI_Incomplete;
if (auto *Proto = Type->getAs<FunctionProtoType>()) {
- if (Proto->isNothrow(Ctx)) {
+ if (Proto->isNothrow()) {
Flags |= ItaniumRTTIBuilder::PTI_Noexcept;
- Type = Ctx.getFunctionType(
- Proto->getReturnType(), Proto->getParamTypes(),
- Proto->getExtProtoInfo().withExceptionSpec(EST_None));
+ Type = Ctx.getFunctionTypeWithExceptionSpec(Type, EST_None);
}
}
@@ -3502,18 +3660,7 @@ llvm::Constant *ItaniumCXXABI::getAddrOfRTTIDescriptor(QualType Ty) {
return ItaniumRTTIBuilder(*this).BuildTypeInfo(Ty);
}
-void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type,
- bool DLLExport) {
- QualType PointerType = getContext().getPointerType(Type);
- QualType PointerTypeConst = getContext().getPointerType(Type.withConst());
- ItaniumRTTIBuilder(*this).BuildTypeInfo(Type, /*Force=*/true, DLLExport);
- ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerType, /*Force=*/true,
- DLLExport);
- ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, /*Force=*/true,
- DLLExport);
-}
-
-void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) {
+void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(const CXXRecordDecl *RD) {
// Types added here must also be added to TypeInfoIsInStandardLibrary.
QualType FundamentalTypes[] = {
getContext().VoidTy, getContext().NullPtrTy,
@@ -3527,10 +3674,24 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) {
getContext().UnsignedInt128Ty, getContext().HalfTy,
getContext().FloatTy, getContext().DoubleTy,
getContext().LongDoubleTy, getContext().Float128Ty,
- getContext().Char16Ty, getContext().Char32Ty
+ getContext().Char8Ty, getContext().Char16Ty,
+ getContext().Char32Ty
};
- for (const QualType &FundamentalType : FundamentalTypes)
- EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport);
+ llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass =
+ RD->hasAttr<DLLExportAttr>()
+ ? llvm::GlobalValue::DLLExportStorageClass
+ : llvm::GlobalValue::DefaultStorageClass;
+ llvm::GlobalValue::VisibilityTypes Visibility =
+ CodeGenModule::GetLLVMVisibility(RD->getVisibility());
+ for (const QualType &FundamentalType : FundamentalTypes) {
+ QualType PointerType = getContext().getPointerType(FundamentalType);
+ QualType PointerTypeConst = getContext().getPointerType(
+ FundamentalType.withConst());
+ for (QualType Type : {FundamentalType, PointerType, PointerTypeConst})
+ ItaniumRTTIBuilder(*this).BuildTypeInfo(
+ Type, llvm::GlobalValue::ExternalLinkage,
+ Visibility, DLLStorageClass);
+ }
}
/// What sort of uniqueness rules should we use for the RTTI for the
@@ -3583,12 +3744,22 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM,
}
llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(AliasDecl);
- if (llvm::GlobalValue::isDiscardableIfUnused(Linkage))
- return StructorCodegen::RAUW;
+ // All discardable structors can be RAUWed, but we don't want to do that in
+ // unoptimized code, as that makes complete structor symbol disappear
+ // completely, which degrades debugging experience.
+ // Symbols with private linkage can be safely aliased, so we special case them
+ // here.
+ if (llvm::GlobalValue::isLocalLinkage(Linkage))
+ return CGM.getCodeGenOpts().OptimizationLevel > 0 ? StructorCodegen::RAUW
+ : StructorCodegen::Alias;
+ // Linkonce structors cannot be aliased nor placed in a comdat, so these need
+ // to be emitted separately.
// FIXME: Should we allow available_externally aliases?
- if (!llvm::GlobalAlias::isValidLinkage(Linkage))
- return StructorCodegen::RAUW;
+ if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) ||
+ !llvm::GlobalAlias::isValidLinkage(Linkage))
+ return CGM.getCodeGenOpts().OptimizationLevel > 0 ? StructorCodegen::RAUW
+ : StructorCodegen::Emit;
if (llvm::GlobalValue::isWeakForLinker(Linkage)) {
// Only ELF and wasm support COMDATs with arbitrary names (C5/D5).
@@ -3616,6 +3787,9 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM,
// Create the alias with no name.
auto *Alias = llvm::GlobalAlias::create(Linkage, "", Aliasee);
+ // Constructors and destructors are always unnamed_addr.
+ Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
// Switch any previous uses to the alias.
if (Entry) {
assert(Entry->getType() == Aliasee->getType() &&
@@ -3628,7 +3802,7 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM,
}
// Finally, set up the alias with its proper name and attributes.
- CGM.setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
+ CGM.SetCommonAttributes(AliasDecl, Alias);
}
void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
@@ -3904,7 +4078,9 @@ static void InitCatchParam(CodeGenFunction &CGF,
llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
caughtExnAlignment);
- CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
+ LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
+ LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
+ CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap);
return;
}
@@ -3931,7 +4107,8 @@ static void InitCatchParam(CodeGenFunction &CGF,
AggValueSlot::forAddr(ParamAddr, Qualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased));
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap));
// Leave the terminate scope.
CGF.EHStack.popTerminate();
@@ -4051,3 +4228,11 @@ ItaniumCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This,
const CXXRecordDecl *RD) {
return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD};
}
+
+void WebAssemblyCXXABI::emitBeginCatch(CodeGenFunction &CGF,
+ const CXXCatchStmt *C) {
+ if (CGF.getTarget().hasFeature("exception-handling"))
+ CGF.EHStack.pushCleanup<CatchRetScope>(
+ NormalCleanup, cast<llvm::CatchPadInst>(CGF.CurrentFuncletPad));
+ ItaniumCXXABI::emitBeginCatch(CGF, C);
+}
diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp
index a6f21d8ddcfb..48dea7d54b1e 100644
--- a/lib/CodeGen/MacroPPCallbacks.cpp
+++ b/lib/CodeGen/MacroPPCallbacks.cpp
@@ -178,7 +178,8 @@ void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason,
void MacroPPCallbacks::InclusionDirective(
SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File,
- StringRef SearchPath, StringRef RelativePath, const Module *Imported) {
+ StringRef SearchPath, StringRef RelativePath, const Module *Imported,
+ SrcMgr::CharacteristicKind FileType) {
// Record the line location of the current included file.
LastHashLoc = HashLoc;
diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h
index e117f96f47df..48c67e2d36ad 100644
--- a/lib/CodeGen/MacroPPCallbacks.h
+++ b/lib/CodeGen/MacroPPCallbacks.h
@@ -101,7 +101,8 @@ public:
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange, const FileEntry *File,
StringRef SearchPath, StringRef RelativePath,
- const Module *Imported) override;
+ const Module *Imported,
+ SrcMgr::CharacteristicKind FileType) override;
/// Hook called whenever a macro definition is seen.
void MacroDefined(const Token &MacroNameTok,
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index ffb3681c2585..81ed05059546 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -216,13 +216,20 @@ public:
return DT != Dtor_Base;
}
+ void setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
+ const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const override;
+
+ llvm::GlobalValue::LinkageTypes
+ getCXXDestructorLinkage(GVALinkage Linkage, const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const override;
+
void EmitCXXDestructors(const CXXDestructorDecl *D) override;
const CXXRecordDecl *
getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override {
- MD = MD->getCanonicalDecl();
if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) {
- MicrosoftVTableContext::MethodVFTableLocation ML =
+ MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(MD);
// The vbases might be ordered differently in the final overrider object
// and the complete object, so the "this" argument may sometimes point to
@@ -357,9 +364,6 @@ public:
void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
GlobalDecl GD, bool ReturnAdjustment) override {
- // Never dllimport/dllexport thunks.
- Thunk->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
-
GVALinkage Linkage =
getContext().GetGVALinkageForFunction(cast<FunctionDecl>(GD.getDecl()));
@@ -371,6 +375,8 @@ public:
Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
}
+ bool exportThunk() override { return false; }
+
llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
const ThisAdjustment &TA) override;
@@ -516,10 +522,12 @@ public:
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name))
return GV;
- return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty,
- /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr, Name);
+ auto *GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty,
+ /*isConstant=*/true,
+ llvm::GlobalValue::ExternalLinkage,
+ /*Initializer=*/nullptr, Name);
+ CGM.setDSOLocal(GV);
+ return GV;
}
llvm::Constant *getImageRelativeConstant(llvm::Constant *PtrVal) {
@@ -558,7 +566,7 @@ private:
GetNullMemberPointerFields(const MemberPointerType *MPT,
llvm::SmallVectorImpl<llvm::Constant *> &fields);
- /// \brief Shared code for virtual base adjustment. Returns the offset from
+ /// Shared code for virtual base adjustment. Returns the offset from
/// the vbptr to the virtual base. Optionally returns the address of the
/// vbptr itself.
llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
@@ -582,14 +590,14 @@ private:
performBaseAdjustment(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy);
- /// \brief Performs a full virtual base adjustment. Used to dereference
+ /// Performs a full virtual base adjustment. Used to dereference
/// pointers to members of virtual bases.
llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E,
const CXXRecordDecl *RD, Address Base,
llvm::Value *VirtualBaseAdjustmentOffset,
llvm::Value *VBPtrOffset /* optional */);
- /// \brief Emits a full member pointer with the fields common to data and
+ /// Emits a full member pointer with the fields common to data and
/// function member pointers.
llvm::Constant *EmitFullMemberPointer(llvm::Constant *FirstField,
bool IsMemberFunction,
@@ -600,16 +608,15 @@ private:
bool MemberPointerConstantIsNull(const MemberPointerType *MPT,
llvm::Constant *MP);
- /// \brief - Initialize all vbptrs of 'this' with RD as the complete type.
+ /// - Initialize all vbptrs of 'this' with RD as the complete type.
void EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD);
- /// \brief Caching wrapper around VBTableBuilder::enumerateVBTables().
+ /// Caching wrapper around VBTableBuilder::enumerateVBTables().
const VBTableGlobals &enumerateVBTables(const CXXRecordDecl *RD);
- /// \brief Generate a thunk for calling a virtual member function MD.
- llvm::Function *EmitVirtualMemPtrThunk(
- const CXXMethodDecl *MD,
- const MicrosoftVTableContext::MethodVFTableLocation &ML);
+ /// Generate a thunk for calling a virtual member function MD.
+ llvm::Function *EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
+ const MethodVFTableLocation &ML);
public:
llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
@@ -753,15 +760,15 @@ private:
typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy;
typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalValue *> VFTablesMapTy;
- /// \brief All the vftables that have been referenced.
+ /// All the vftables that have been referenced.
VFTablesMapTy VFTablesMap;
VTablesMapTy VTablesMap;
- /// \brief This set holds the record decls we've deferred vtable emission for.
+ /// This set holds the record decls we've deferred vtable emission for.
llvm::SmallPtrSet<const CXXRecordDecl *, 4> DeferredVFTables;
- /// \brief All the vbtables which have been referenced.
+ /// All the vbtables which have been referenced.
llvm::DenseMap<const CXXRecordDecl *, VBTableGlobals> VBTablesMap;
/// Info on the global variable used to guard initialization of static locals.
@@ -820,45 +827,8 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
return RAA_Default;
case llvm::Triple::x86_64:
- // If a class has a destructor, we'd really like to pass it indirectly
- // because it allows us to elide copies. Unfortunately, MSVC makes that
- // impossible for small types, which it will pass in a single register or
- // stack slot. Most objects with dtors are large-ish, so handle that early.
- // We can't call out all large objects as being indirect because there are
- // multiple x64 calling conventions and the C++ ABI code shouldn't dictate
- // how we pass large POD types.
- //
- // Note: This permits small classes with nontrivial destructors to be
- // passed in registers, which is non-conforming.
- if (RD->hasNonTrivialDestructor() &&
- getContext().getTypeSize(RD->getTypeForDecl()) > 64)
- return RAA_Indirect;
-
- // If a class has at least one non-deleted, trivial copy constructor, it
- // is passed according to the C ABI. Otherwise, it is passed indirectly.
- //
- // Note: This permits classes with non-trivial copy or move ctors to be
- // passed in registers, so long as they *also* have a trivial copy ctor,
- // which is non-conforming.
- if (RD->needsImplicitCopyConstructor()) {
- // If the copy ctor has not yet been declared, we can read its triviality
- // off the AST.
- if (!RD->defaultedCopyConstructorIsDeleted() &&
- RD->hasTrivialCopyConstructor())
- return RAA_Default;
- } else {
- // Otherwise, we need to find the copy constructor(s) and ask.
- for (const CXXConstructorDecl *CD : RD->ctors()) {
- if (CD->isCopyConstructor()) {
- // We had at least one nondeleted trivial copy ctor. Return directly.
- if (!CD->isDeleted() && CD->isTrivial())
- return RAA_Default;
- }
- }
- }
-
- // We have no trivial, non-deleted copy constructor.
- return RAA_Indirect;
+ case llvm::Triple::aarch64:
+ return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default;
}
llvm_unreachable("invalid enum");
@@ -890,20 +860,6 @@ void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
CGF.EmitRuntimeCallOrInvoke(Fn, Args);
}
-namespace {
-struct CatchRetScope final : EHScopeStack::Cleanup {
- llvm::CatchPadInst *CPI;
-
- CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {}
-
- void Emit(CodeGenFunction &CGF, Flags flags) override {
- llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest");
- CGF.Builder.CreateCatchRet(CPI, BB);
- CGF.EmitBlock(BB);
- }
-};
-}
-
void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF,
const CXXCatchStmt *S) {
// In the MS ABI, the runtime handles the copy, and the catch handler is
@@ -1105,10 +1061,22 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
// the second parameter.
FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
+
+ // aarch64-windows requires that instance methods use X1 for the return
+ // address. So for aarch64-windows we do not mark the
+ // return as SRet.
+ FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() ==
+ llvm::Triple::aarch64);
return true;
} else if (!RD->isPOD()) {
// If it's a free function, non-POD types are returned indirectly.
FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+
+ // aarch64-windows requires that non-POD, non-instance returns use X0 for
+ // the return address. So for aarch64-windows we do not mark the return as
+ // SRet.
+ FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() ==
+ llvm::Triple::aarch64);
return true;
}
@@ -1182,15 +1150,16 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
unsigned AS = getThisAddress(CGF).getAddressSpace();
llvm::Value *Int8This = nullptr; // Initialize lazily.
- for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end();
- I != E; ++I) {
+ for (const CXXBaseSpecifier &S : RD->vbases()) {
+ const CXXRecordDecl *VBase = S.getType()->getAsCXXRecordDecl();
+ auto I = VBaseMap.find(VBase);
+ assert(I != VBaseMap.end());
if (!I->second.hasVtorDisp())
continue;
llvm::Value *VBaseOffset =
- GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first);
- uint64_t ConstantVBaseOffset =
- Layout.getVBaseClassOffset(I->first).getQuantity();
+ GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, VBase);
+ uint64_t ConstantVBaseOffset = I->second.VBaseOffset.getQuantity();
// vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase).
llvm::Value *VtorDispValue = Builder.CreateSub(
@@ -1233,7 +1202,7 @@ void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
if (!hasDefaultCXXMethodCC(getContext(), D) || D->getNumParams() != 0) {
llvm::Function *Fn = getAddrOfCXXCtorClosure(D, Ctor_DefaultClosure);
Fn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
- Fn->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ CGM.setGVProperties(Fn, D);
}
}
@@ -1295,6 +1264,52 @@ MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
return Added;
}
+void MicrosoftCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
+ const CXXDestructorDecl *Dtor,
+ CXXDtorType DT) const {
+ // Deleting destructor variants are never imported or exported. Give them the
+ // default storage class.
+ if (DT == Dtor_Deleting) {
+ GV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ } else {
+ const NamedDecl *ND = Dtor;
+ CGM.setDLLImportDLLExport(GV, ND);
+ }
+}
+
+llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage(
+ GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const {
+ // Internal things are always internal, regardless of attributes. After this,
+ // we know the thunk is externally visible.
+ if (Linkage == GVA_Internal)
+ return llvm::GlobalValue::InternalLinkage;
+
+ switch (DT) {
+ case Dtor_Base:
+ // The base destructor most closely tracks the user-declared constructor, so
+ // we delegate back to the normal declarator case.
+ return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
+ /*isConstantVariable=*/false);
+ case Dtor_Complete:
+ // The complete destructor is like an inline function, but it may be
+ // imported and therefore must be exported as well. This requires changing
+ // the linkage if a DLL attribute is present.
+ if (Dtor->hasAttr<DLLExportAttr>())
+ return llvm::GlobalValue::WeakODRLinkage;
+ if (Dtor->hasAttr<DLLImportAttr>())
+ return llvm::GlobalValue::AvailableExternallyLinkage;
+ return llvm::GlobalValue::LinkOnceODRLinkage;
+ case Dtor_Deleting:
+ // Deleting destructors are like inline functions. They have vague linkage
+ // and are emitted everywhere they are used. They are internal if the class
+ // is internal.
+ return llvm::GlobalValue::LinkOnceODRLinkage;
+ case Dtor_Comdat:
+ llvm_unreachable("MS C++ ABI does not support comdat dtors");
+ }
+ llvm_unreachable("invalid dtor type");
+}
+
void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
// The TU defining a dtor is only guaranteed to emit a base destructor. All
// other destructor variants are delegating thunks.
@@ -1303,10 +1318,8 @@ void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
CharUnits
MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
- GD = GD.getCanonicalDecl();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- GlobalDecl LookupGD = GD;
if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
// Complete destructors take a pointer to the complete object as a
// parameter, thus don't need this adjustment.
@@ -1315,11 +1328,11 @@ MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
// There's no Dtor_Base in vftable but it shares the this adjustment with
// the deleting one, so look it up instead.
- LookupGD = GlobalDecl(DD, Dtor_Deleting);
+ GD = GlobalDecl(DD, Dtor_Deleting);
}
- MicrosoftVTableContext::MethodVFTableLocation ML =
- CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
+ MethodVFTableLocation ML =
+ CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
CharUnits Adjustment = ML.VFPtrOffset;
// Normal virtual instance methods need to adjust from the vfptr that first
@@ -1353,7 +1366,6 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
return CGF.Builder.CreateConstByteGEP(This, Adjustment);
}
- GD = GD.getCanonicalDecl();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
GlobalDecl LookupGD = GD;
@@ -1367,7 +1379,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
// with the base one, so look up the deleting one instead.
LookupGD = GlobalDecl(DD, Dtor_Deleting);
}
- MicrosoftVTableContext::MethodVFTableLocation ML =
+ MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
CharUnits StaticOffset = ML.VFPtrOffset;
@@ -1523,8 +1535,7 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
}
RValue RV = RValue::get(MostDerivedArg);
if (FPT->isVariadic()) {
- Args.insert(Args.begin() + 1,
- CallArg(RV, getContext().IntTy, /*needscopy=*/false));
+ Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy));
return AddedStructorArgs::prefix(1);
}
Args.add(RV, getContext().IntTy);
@@ -1535,6 +1546,12 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
bool Delegating, Address This) {
+ // Use the base destructor variant in place of the complete destructor variant
+ // if the class has no virtual bases. This effectively implements some of the
+ // -mconstructor-aliases optimization, but as part of the MS C++ ABI.
+ if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0)
+ Type = Dtor_Base;
+
CGCallee Callee = CGCallee::forDirect(
CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
DD);
@@ -1817,7 +1834,6 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
Address This,
llvm::Type *Ty,
SourceLocation Loc) {
- GD = GD.getCanonicalDecl();
CGBuilderTy &Builder = CGF.Builder;
Ty = Ty->getPointerTo()->getPointerTo();
@@ -1828,8 +1844,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty, MethodDecl->getParent());
MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
- MicrosoftVTableContext::MethodVFTableLocation ML =
- VFTContext.getMethodVFTableLocation(GD);
+ MethodVFTableLocation ML = VFTContext.getMethodVFTableLocation(GD);
// Compute the identity of the most derived class whose virtual table is
// located at the MethodVFTableLocation ML.
@@ -1857,7 +1872,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
}
- CGCallee Callee(MethodDecl, VFunc);
+ CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc);
return Callee;
}
@@ -1872,9 +1887,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
GlobalDecl GD(Dtor, Dtor_Deleting);
const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
Dtor, StructorType::Deleting);
- llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
- CGCallee Callee = getVirtualFunctionPointer(
- CGF, GD, This, Ty, CE ? CE->getLocStart() : SourceLocation());
+ llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
+ CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
ASTContext &Context = getContext();
llvm::Value *ImplicitParam = llvm::ConstantInt::get(
@@ -1915,23 +1929,24 @@ MicrosoftCXXABI::enumerateVBTables(const CXXRecordDecl *RD) {
return VBGlobals;
}
-llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
- const CXXMethodDecl *MD,
- const MicrosoftVTableContext::MethodVFTableLocation &ML) {
+llvm::Function *
+MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
+ const MethodVFTableLocation &ML) {
assert(!isa<CXXConstructorDecl>(MD) && !isa<CXXDestructorDecl>(MD) &&
"can't form pointers to ctors or virtual dtors");
// Calculate the mangled name.
SmallString<256> ThunkName;
llvm::raw_svector_ostream Out(ThunkName);
- getMangleContext().mangleVirtualMemPtrThunk(MD, Out);
+ getMangleContext().mangleVirtualMemPtrThunk(MD, ML, Out);
// If the thunk has been generated previously, just return it.
if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
return cast<llvm::Function>(GV);
// Create the llvm::Function.
- const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSMemberPointerThunk(MD);
+ const CGFunctionInfo &FnInfo =
+ CGM.getTypes().arrangeUnprototypedMustTailThunk(MD);
llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo);
llvm::Function *ThunkFn =
llvm::Function::Create(ThunkTy, llvm::Function::ExternalLinkage,
@@ -2716,9 +2731,8 @@ llvm::Constant *
MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
assert(MD->isInstance() && "Member function must not be static!");
- MD = MD->getCanonicalDecl();
CharUnits NonVirtualBaseAdjustment = CharUnits::Zero();
- const CXXRecordDecl *RD = MD->getParent()->getMostRecentDecl();
+ const CXXRecordDecl *RD = MD->getParent()->getMostRecentNonInjectedDecl();
CodeGenTypes &Types = CGM.getTypes();
unsigned VBTableIndex = 0;
@@ -2738,8 +2752,7 @@ MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
FirstField = CGM.GetAddrOfFunction(MD, Ty);
} else {
auto &VTableContext = CGM.getMicrosoftVTableContext();
- MicrosoftVTableContext::MethodVFTableLocation ML =
- VTableContext.getMethodVFTableLocation(MD);
+ MethodVFTableLocation ML = VTableContext.getMethodVFTableLocation(MD);
FirstField = EmitVirtualMemPtrThunk(MD, ML);
// Include the vfptr adjustment if the method is in a non-primary vftable.
NonVirtualBaseAdjustment += ML.VFPtrOffset;
@@ -3336,14 +3349,14 @@ CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) {
// a reference to the TypeInfo for the type and a reference to the
// CompleteHierarchyDescriptor for the type.
//
-// ClassHieararchyDescriptor: Contains information about a class hierarchy.
+// ClassHierarchyDescriptor: Contains information about a class hierarchy.
// Used during dynamic_cast to walk a class hierarchy. References a base
// class array and the size of said array.
//
// BaseClassArray: Contains a list of classes in a hierarchy. BaseClassArray is
// somewhat of a misnomer because the most derived class is also in the list
// as well as multiple copies of virtual bases (if they occur multiple times
-// in the hiearchy.) The BaseClassArray contains one BaseClassDescriptor for
+// in the hierarchy.) The BaseClassArray contains one BaseClassDescriptor for
// every path in the hierarchy, in pre-order depth first order. Note, we do
// not declare a specific llvm type for BaseClassArray, it's merely an array
// of BaseClassDescriptor pointers.
@@ -3356,7 +3369,7 @@ CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) {
// mangled into them so they can be aggressively deduplicated by the linker.
static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
- StringRef MangledName("\01??_7type_info@@6B@");
+ StringRef MangledName("??_7type_info@@6B@");
if (auto VTable = CGM.getModule().getNamedGlobal(MangledName))
return VTable;
return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
@@ -3367,7 +3380,7 @@ static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
namespace {
-/// \brief A Helper struct that stores information about a class in a class
+/// A Helper struct that stores information about a class in a class
/// hierarchy. The information stored in these structs struct is used during
/// the generation of ClassHierarchyDescriptors and BaseClassDescriptors.
// During RTTI creation, MSRTTIClasses are stored in a contiguous array with
@@ -3394,7 +3407,7 @@ struct MSRTTIClass {
uint32_t Flags, NumBases, OffsetInVBase;
};
-/// \brief Recursively initialize the base class array.
+/// Recursively initialize the base class array.
uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent,
const CXXBaseSpecifier *Specifier) {
Flags = HasHierarchyDescriptor;
@@ -3441,7 +3454,7 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
llvm_unreachable("Invalid linkage!");
}
-/// \brief An ephemeral helper class for building MS RTTI types. It caches some
+/// An ephemeral helper class for building MS RTTI types. It caches some
/// calls to the module and information about the most derived class in a
/// hierarchy.
struct MSRTTIBuilder {
@@ -3474,7 +3487,7 @@ struct MSRTTIBuilder {
} // namespace
-/// \brief Recursively serializes a class hierarchy in pre-order depth first
+/// Recursively serializes a class hierarchy in pre-order depth first
/// order.
static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes,
const CXXRecordDecl *RD) {
@@ -3483,7 +3496,7 @@ static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes,
serializeClassHierarchy(Classes, Base.getType()->getAsCXXRecordDecl());
}
-/// \brief Find ambiguity among base classes.
+/// Find ambiguity among base classes.
static void
detectAmbiguousBases(SmallVectorImpl<MSRTTIClass> &Classes) {
llvm::SmallPtrSet<const CXXRecordDecl *, 8> VirtualBases;
@@ -3749,7 +3762,7 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
Flags};
}
-/// \brief Gets a TypeDescriptor. Returns a llvm::Constant * rather than a
+/// Gets a TypeDescriptor. Returns a llvm::Constant * rather than a
/// llvm::GlobalVariable * because different type descriptors have different
/// types, and need to be abstracted. They are abstracting by casting the
/// address to an Int8PtrTy.
@@ -3791,7 +3804,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
}
-/// \brief Gets or a creates a Microsoft CompleteObjectLocator.
+/// Gets or a creates a Microsoft CompleteObjectLocator.
llvm::GlobalVariable *
MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD,
const VPtrInfo &Info) {
@@ -3808,19 +3821,12 @@ static void emitCXXConstructor(CodeGenModule &CGM,
static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor,
StructorType dtorType) {
- // The complete destructor is equivalent to the base destructor for
- // classes with no virtual bases, so try to emit it as an alias.
- if (!dtor->getParent()->getNumVBases() &&
- (dtorType == StructorType::Complete || dtorType == StructorType::Base)) {
- bool ProducedAlias = !CGM.TryEmitDefinitionAsAlias(
- GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base));
- if (ProducedAlias) {
- if (dtorType == StructorType::Complete)
- return;
- if (dtor->isVirtual())
- CGM.getVTables().EmitThunks(GlobalDecl(dtor, Dtor_Complete));
- }
- }
+ // Emit the base destructor if the base and complete (vbase) destructors are
+ // equivalent. This effectively implements -mconstructor-aliases as part of
+ // the ABI.
+ if (dtorType == StructorType::Complete &&
+ dtor->getParent()->getNumVBases() == 0)
+ dtorType = StructorType::Base;
// The base destructor is equivalent to the base destructor of its
// base class if there is exactly one non-virtual base class with a
@@ -3898,7 +3904,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
SourceLocation(),
&getContext().Idents.get("is_most_derived"),
getContext().IntTy, ImplicitParamDecl::Other);
- // Only add the parameter to the list if thie class has virtual bases.
+ // Only add the parameter to the list if the class has virtual bases.
if (RD->getNumVBases() > 0)
FunctionArgs.push_back(&IsMostDerived);
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index d0760b9cc2a6..c164cec5d942 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -71,9 +71,8 @@ class PCHContainerGenerator : public ASTConsumer {
}
bool VisitImportDecl(ImportDecl *D) {
- auto *Import = cast<ImportDecl>(D);
- if (!Import->getImportedOwningModule())
- DI.EmitImportDecl(*Import);
+ if (!D->getImportedOwningModule())
+ DI.EmitImportDecl(*D);
return true;
}
@@ -229,6 +228,11 @@ public:
Builder->getModuleDebugInfo()->completeRequiredType(RD);
}
+ void HandleImplicitImportDecl(ImportDecl *D) override {
+ if (!D->getImportedOwningModule())
+ Builder->getModuleDebugInfo()->EmitImportDecl(*D);
+ }
+
/// Emit a container holding the serialized AST.
void HandleTranslationUnit(ASTContext &Ctx) override {
assert(M && VMContext && Builder);
@@ -286,7 +290,7 @@ public:
else
ASTSym->setSection("__clangast");
- DEBUG({
+ LLVM_DEBUG({
// Print the IR for the PCH container to the debug output.
llvm::SmallString<0> Buffer;
clang::EmitBackendOutput(
diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp
index f891cfbe4bb2..23cf9e490828 100644
--- a/lib/CodeGen/SanitizerMetadata.cpp
+++ b/lib/CodeGen/SanitizerMetadata.cpp
@@ -27,7 +27,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
bool IsBlacklisted) {
if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
SanitizerKind::KernelAddress |
- SanitizerKind::HWAddress))
+ SanitizerKind::HWAddress |
+ SanitizerKind::KernelHWAddress))
return;
IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init");
IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
@@ -60,7 +61,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
const VarDecl &D, bool IsDynInit) {
if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
SanitizerKind::KernelAddress |
- SanitizerKind::HWAddress))
+ SanitizerKind::HWAddress |
+ SanitizerKind::KernelHWAddress))
return;
std::string QualName;
llvm::raw_string_ostream OS(QualName);
@@ -79,7 +81,8 @@ void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
// instrumentation.
if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
SanitizerKind::KernelAddress |
- SanitizerKind::HWAddress))
+ SanitizerKind::HWAddress |
+ SanitizerKind::KernelHWAddress))
reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true);
}
diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp
index fc8e36d2c599..3673a5597eac 100644
--- a/lib/CodeGen/SwiftCallingConv.cpp
+++ b/lib/CodeGen/SwiftCallingConv.cpp
@@ -579,11 +579,9 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
// Empty types don't need to be passed indirectly.
if (Entries.empty()) return false;
- CharUnits totalSize = Entries.back().End;
-
// Avoid copying the array of types when there's just a single element.
if (Entries.size() == 1) {
- return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+ return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(
Entries.back().Type,
asReturnValue);
}
@@ -593,8 +591,14 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
for (auto &entry : Entries) {
componentTys.push_back(entry.Type);
}
- return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
- componentTys,
+ return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys,
+ asReturnValue);
+}
+
+bool swiftcall::shouldPassIndirectly(CodeGenModule &CGM,
+ ArrayRef<llvm::Type*> componentTys,
+ bool asReturnValue) {
+ return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys,
asReturnValue);
}
@@ -736,24 +740,12 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize,
components.append(numElts, eltTy);
}
-bool swiftcall::shouldPassCXXRecordIndirectly(CodeGenModule &CGM,
- const CXXRecordDecl *record) {
- // Following a recommendation from Richard Smith, pass a C++ type
- // indirectly only if the destructor is non-trivial or *all* of the
- // copy/move constructors are deleted or non-trivial.
-
- if (record->hasNonTrivialDestructor())
- return true;
-
- // It would be nice if this were summarized on the CXXRecordDecl.
- for (auto ctor : record->ctors()) {
- if (ctor->isCopyOrMoveConstructor() && !ctor->isDeleted() &&
- ctor->isTrivial()) {
- return false;
- }
- }
-
- return true;
+bool swiftcall::mustPassRecordIndirectly(CodeGenModule &CGM,
+ const RecordDecl *record) {
+ // FIXME: should we not rely on the standard computation in Sema, just in
+ // case we want to diverge from the platform ABI (e.g. on targets where
+ // that uses the MSVC rule)?
+ return !record->canPassInRegisters();
}
static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering,
@@ -775,10 +767,8 @@ static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type,
auto record = recordType->getDecl();
auto &layout = CGM.getContext().getASTRecordLayout(record);
- if (auto cxxRecord = dyn_cast<CXXRecordDecl>(record)) {
- if (shouldPassCXXRecordIndirectly(CGM, cxxRecord))
- return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false);
- }
+ if (mustPassRecordIndirectly(CGM, record))
+ return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false);
SwiftAggLowering lowering(CGM);
lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout);
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 4b8006428f8f..fa9b0a27af28 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -140,8 +140,11 @@ bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
CGCXXABI &CXXABI) {
const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
- if (!RD)
+ if (!RD) {
+ if (!RT->getDecl()->canPassInRegisters())
+ return CGCXXABI::RAA_Indirect;
return CGCXXABI::RAA_Default;
+ }
return CXXABI.getRecordArgABI(RD);
}
@@ -153,6 +156,20 @@ static CGCXXABI::RecordArgABI getRecordArgABI(QualType T,
return getRecordArgABI(RT, CXXABI);
}
+static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
+ const ABIInfo &Info) {
+ QualType Ty = FI.getReturnType();
+
+ if (const auto *RT = Ty->getAs<RecordType>())
+ if (!isa<CXXRecordDecl>(RT->getDecl()) &&
+ !RT->getDecl()->canPassInRegisters()) {
+ FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty);
+ return true;
+ }
+
+ return CXXABI.classifyReturnType(FI);
+}
+
/// Pass transparent unions as if they were the type of the first element. Sema
/// should ensure that all elements of the union have the same "machine type".
static QualType useFirstFieldIfTransparentUnion(QualType Ty) {
@@ -201,10 +218,6 @@ bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
return false;
}
-bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
- return false;
-}
-
LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
raw_ostream &OS = llvm::errs();
OS << "(ABIArgInfo Kind=";
@@ -682,8 +695,8 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
@@ -697,8 +710,8 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
//===----------------------------------------------------------------------===//
@@ -734,9 +747,18 @@ class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {}
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override {
+ if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ llvm::Function *Fn = cast<llvm::Function>(GV);
+ if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype())
+ Fn->addFnAttr("no-prototype");
+ }
+ }
};
-/// \brief Classify argument of given type \p Ty.
+/// Classify argument of given type \p Ty.
ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
@@ -831,7 +853,7 @@ Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
}
-/// \brief Classify argument of given type \p Ty.
+/// Classify argument of given type \p Ty.
ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
if (isAggregateTypeForABI(Ty)) {
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -845,8 +867,8 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect();
}
- return (Ty->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
@@ -861,8 +883,8 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
/// IsX86_MMXType - Return true if this is an MMX type.
@@ -932,7 +954,7 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
-/// \brief Similar to llvm::CCState, but for Clang.
+/// Similar to llvm::CCState, but for Clang.
struct CCState {
CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
@@ -985,14 +1007,14 @@ class X86_32ABIInfo : public SwiftABIInfo {
ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
- /// \brief Return the alignment to use for the given type on the stack.
+ /// Return the alignment to use for the given type on the stack.
unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
- /// \brief Updates the number of available free registers, returns
+ /// Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -1002,7 +1024,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
bool canExpandIndirectArgument(QualType Ty) const;
- /// \brief Rewrite the function info so that all memory arguments use
+ /// Rewrite the function info so that all memory arguments use
/// inalloca.
void rewriteWithInAlloca(CGFunctionInfo &FI) const;
@@ -1028,8 +1050,7 @@ public:
IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
DefaultNumRegisterParameters(NumRegisterParameters) {}
- bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> scalars,
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
// LLVM's x86-32 lowering currently only assigns up to three
// integer registers and three fp registers. Oddly, it'll use up to
@@ -1057,8 +1078,7 @@ public:
const llvm::Triple &Triple, const CodeGenOptions &Opts);
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &CGM) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
// Darwin uses different dwarf register numbers for EH.
@@ -1404,8 +1424,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
static bool isSSEVectorType(ASTContext &Context, QualType Ty) {
@@ -1677,8 +1697,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
if (Ty->isPromotableIntegerType()) {
if (InReg)
- return ABIArgInfo::getExtendInReg();
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtendInReg(Ty);
+ return ABIArgInfo::getExtend(Ty);
}
if (InReg)
@@ -1755,7 +1775,7 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
} else
State.FreeRegs = DefaultNumRegisterParameters;
- if (!getCXXABI().classifyReturnType(FI)) {
+ if (!::classifyReturnType(getCXXABI(), FI, *this)) {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
} else if (FI.getReturnInfo().isIndirect()) {
// The C++ ABI is not aware of register usage, so we have to check if the
@@ -1925,19 +1945,13 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
}
void X86_32TargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const {
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+ if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
- // Get the LLVM function.
llvm::Function *Fn = cast<llvm::Function>(GV);
-
- // Now add the 'alignstack' attribute with a value of 16.
- llvm::AttrBuilder B;
- B.addStackAlignmentAttr(16);
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ Fn->addFnAttr("stackrealign");
}
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -2121,8 +2135,8 @@ class X86_64ABIInfo : public SwiftABIInfo {
/// classify it as INTEGER (for compatibility with older clang compilers).
bool classifyIntegerMMXAsSSE() const {
// Clang <= 3.8 did not do this.
- if (getCodeGenOpts().getClangABICompat() <=
- CodeGenOptions::ClangABI::Ver3_8)
+ if (getContext().getLangOpts().getClangABICompat() <=
+ LangOptions::ClangABI::Ver3_8)
return false;
const llvm::Triple &Triple = getTarget().getTriple();
@@ -2168,8 +2182,7 @@ public:
return Has64BitPointers;
}
- bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> scalars,
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
@@ -2201,8 +2214,7 @@ public:
return isX86VectorCallAggregateSmallEnough(NumMembers);
}
- bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type *> scalars,
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
@@ -2286,19 +2298,13 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override {
- if (!IsForDefinition)
+ CodeGen::CodeGenModule &CGM) const override {
+ if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
- // Get the LLVM function.
- auto *Fn = cast<llvm::Function>(GV);
-
- // Now add the 'alignstack' attribute with a value of 16.
- llvm::AttrBuilder B;
- B.addStackAlignmentAttr(16);
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ llvm::Function *Fn = cast<llvm::Function>(GV);
+ Fn->addFnAttr("stackrealign");
}
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -2346,8 +2352,7 @@ public:
Win32StructABI, NumRegisterParameters, false) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &CGM) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
@@ -2362,26 +2367,24 @@ public:
}
};
-static void addStackProbeSizeTargetAttribute(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) {
- if (D && isa<FunctionDecl>(D)) {
- if (CGM.getCodeGenOpts().StackProbeSize != 4096) {
- llvm::Function *Fn = cast<llvm::Function>(GV);
+static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) {
+ if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) {
+ if (CGM.getCodeGenOpts().StackProbeSize != 4096)
Fn->addFnAttr("stack-probe-size",
llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
- }
+ if (CGM.getCodeGenOpts().NoStackArgProbe)
+ Fn->addFnAttr("no-stack-arg-probe");
}
}
void WinX86_32TargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const {
- X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+ X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ if (GV->isDeclaration())
return;
- addStackProbeSizeTargetAttribute(D, GV, CGM);
+ addStackProbeTargetAttributes(D, GV, CGM);
}
class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2391,8 +2394,7 @@ public:
: TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &CGM) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
@@ -2422,20 +2424,14 @@ public:
};
void WinX86_64TargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const {
- TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+ TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
- // Get the LLVM function.
- auto *Fn = cast<llvm::Function>(GV);
-
- // Now add the 'alignstack' attribute with a value of 16.
- llvm::AttrBuilder B;
- B.addStackAlignmentAttr(16);
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ llvm::Function *Fn = cast<llvm::Function>(GV);
+ Fn->addFnAttr("stackrealign");
}
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -2443,7 +2439,7 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes(
}
}
- addStackProbeSizeTargetAttribute(D, GV, CGM);
+ addStackProbeTargetAttributes(D, GV, CGM);
}
}
@@ -2868,8 +2864,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
return getNaturalAlignIndirect(Ty);
@@ -2901,8 +2897,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -3271,7 +3267,7 @@ classifyReturnType(QualType RetTy) const {
if (RetTy->isIntegralOrEnumerationType() &&
RetTy->isPromotableIntegerType())
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtend(RetTy);
}
break;
@@ -3416,7 +3412,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
if (Ty->isIntegralOrEnumerationType() &&
Ty->isPromotableIntegerType())
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtend(Ty);
}
break;
@@ -3543,14 +3539,24 @@ ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty,
void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
- bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall;
+ const unsigned CallingConv = FI.getCallingConvention();
+ // It is possible to force Win64 calling convention on any x86_64 target by
+ // using __attribute__((ms_abi)). In such case to correctly emit Win64
+ // compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
+ if (CallingConv == llvm::CallingConv::Win64) {
+ WinX86_64ABIInfo Win64ABIInfo(CGT);
+ Win64ABIInfo.computeInfo(FI);
+ return;
+ }
+
+ bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
// Keep track of the number of assigned registers.
unsigned FreeIntRegs = IsRegCall ? 11 : 6;
unsigned FreeSSERegs = IsRegCall ? 16 : 8;
unsigned NeededInt, NeededSSE;
- if (!getCXXABI().classifyReturnType(FI)) {
+ if (!::classifyReturnType(getCXXABI(), FI, *this)) {
if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
!FI.getReturnType()->getTypePtr()->isUnionType()) {
FI.getReturnInfo() =
@@ -3797,17 +3803,18 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
Address RegAddrHi =
CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
CharUnits::fromQuantity(16));
- llvm::Type *DoubleTy = CGF.DoubleTy;
- llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
+ llvm::Type *ST = AI.canHaveCoerceToType()
+ ? AI.getCoerceToType()
+ : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
llvm::Value *V;
Address Tmp = CGF.CreateMemTemp(Ty);
Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
- V = CGF.Builder.CreateLoad(
- CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
+ V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
+ RegAddrLo, ST->getStructElementType(0)));
CGF.Builder.CreateStore(V,
CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
- V = CGF.Builder.CreateLoad(
- CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
+ V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
+ RegAddrHi, ST->getStructElementType(1)));
CGF.Builder.CreateStore(V,
CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8)));
@@ -3941,7 +3948,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
// extended.
const BuiltinType *BT = Ty->getAs<BuiltinType>();
if (BT && BT->getKind() == BuiltinType::Bool)
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtend(Ty);
// Mingw64 GCC uses the old 80 bit extended precision floating point unit. It
// passes them indirectly through memory.
@@ -4289,7 +4296,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
namespace {
/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
-class PPC64_SVR4_ABIInfo : public ABIInfo {
+class PPC64_SVR4_ABIInfo : public SwiftABIInfo {
public:
enum ABIKind {
ELFv1 = 0,
@@ -4333,7 +4340,7 @@ private:
public:
PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX,
bool SoftFloatABI)
- : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX),
+ : SwiftABIInfo(CGT), Kind(Kind), HasQPX(HasQPX),
IsSoftFloatABI(SoftFloatABI) {}
bool isPromotableTypeForABI(QualType Ty) const;
@@ -4376,6 +4383,15 @@ public:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
+
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
+ bool asReturnValue) const override {
+ return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+ }
+
+ bool isSwiftErrorInRegister() const override {
+ return false;
+ }
};
class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4543,7 +4559,7 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
// For compatibility with GCC, ignore empty bitfields in C++ mode.
if (getContext().getLangOpts().CPlusPlus &&
- FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
+ FD->isZeroLengthBitField(getContext()))
continue;
uint64_t FldMembers;
@@ -4603,7 +4619,9 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
if (BT->getKind() == BuiltinType::Float ||
BT->getKind() == BuiltinType::Double ||
- BT->getKind() == BuiltinType::LongDouble) {
+ BT->getKind() == BuiltinType::LongDouble ||
+ (getContext().getTargetInfo().hasFloat128Type() &&
+ (BT->getKind() == BuiltinType::Float128))) {
if (IsSoftFloatABI)
return false;
return true;
@@ -4618,10 +4636,13 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
const Type *Base, uint64_t Members) const {
- // Vector types require one register, floating point types require one
- // or two registers depending on their size.
+ // Vector and fp128 types require one register, other floating point types
+ // require one or two registers depending on their size.
uint32_t NumRegs =
- Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64;
+ ((getContext().getTargetInfo().hasFloat128Type() &&
+ Base->isFloat128Type()) ||
+ Base->isVectorType()) ? 1
+ : (getContext().getTypeSize(Base) + 63) / 64;
// Homogeneous Aggregates may occupy at most 8 registers.
return Members * NumRegs <= 8;
@@ -4694,8 +4715,8 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
/*Realign=*/TyAlign > ABIAlign);
}
- return (isPromotableTypeForABI(Ty) ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo
@@ -4749,8 +4770,8 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
return getNaturalAlignIndirect(RetTy);
}
- return (isPromotableTypeForABI(RetTy) ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
// Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
@@ -4899,7 +4920,7 @@ private:
bool isIllegalVectorType(QualType Ty) const;
void computeInfo(CGFunctionInfo &FI) const override {
- if (!getCXXABI().classifyReturnType(FI))
+ if (!::classifyReturnType(getCXXABI(), FI, *this))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &it : FI.arguments())
@@ -4922,8 +4943,7 @@ private:
Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
- bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> scalars,
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
@@ -5002,7 +5022,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
Ty = EnumTy->getDecl()->getIntegerType();
return (Ty->isPromotableIntegerType() && isDarwinPCS()
- ? ABIArgInfo::getExtend()
+ ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
@@ -5072,7 +5092,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
RetTy = EnumTy->getDecl()->getIntegerType();
return (RetTy->isPromotableIntegerType() && isDarwinPCS()
- ? ABIArgInfo::getExtend()
+ ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
@@ -5521,8 +5541,7 @@ private:
llvm::CallingConv::ID getABIDefaultCC() const;
void setCCs();
- bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> scalars,
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
@@ -5565,9 +5584,8 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override {
- if (!IsForDefinition)
+ CodeGen::CodeGenModule &CGM) const override {
+ if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
@@ -5610,8 +5628,7 @@ public:
: ARMTargetCodeGenInfo(CGT, K) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &CGM) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
@@ -5625,17 +5642,16 @@ public:
};
void WindowsARMTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const {
- ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+ ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ if (GV->isDeclaration())
return;
- addStackProbeSizeTargetAttribute(D, GV, CGM);
+ addStackProbeTargetAttributes(D, GV, CGM);
}
}
void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
- if (!getCXXABI().classifyReturnType(FI))
+ if (!::classifyReturnType(getCXXABI(), FI, *this))
FI.getReturnInfo() =
classifyReturnType(FI.getReturnType(), FI.isVariadic());
@@ -5682,18 +5698,6 @@ void ARMABIInfo::setCCs() {
llvm::CallingConv::ID abiCC = getABIDefaultCC();
if (abiCC != getLLVMDefaultCC())
RuntimeCC = abiCC;
-
- // AAPCS apparently requires runtime support functions to be soft-float, but
- // that's almost certainly for historic reasons (Thumb1 not supporting VFP
- // most likely). It's more convenient for AAPCS16_VFP to be hard-float.
-
- // The Run-time ABI for the ARM Architecture section 4.1.2 requires
- // AEABI-complying FP helper functions to use the base AAPCS.
- // These AEABI functions are expanded in the ARM llvm backend, all the builtin
- // support functions emitted by clang such as the _Complex helpers follow the
- // abiCC.
- if (abiCC != getLLVMDefaultCC())
- BuiltinCC = abiCC;
}
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
@@ -5730,10 +5734,11 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
- // __fp16 gets passed as if it were an int or float, but with the top 16 bits
- // unspecified. This is not done for OpenCL as it handles the half type
- // natively, and does not need to interwork with AAPCS code.
- if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
+ // _Float16 and __fp16 get passed as if it were an int or float, but with
+ // the top 16 bits unspecified. This is not done for OpenCL as it handles the
+ // half type natively, and does not need to interwork with AAPCS code.
+ if ((Ty->isFloat16Type() || Ty->isHalfType()) &&
+ !getContext().getLangOpts().NativeHalfArgsAndReturns) {
llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
llvm::Type::getFloatTy(getVMContext()) :
llvm::Type::getInt32Ty(getVMContext());
@@ -5746,7 +5751,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
Ty = EnumTy->getDecl()->getIntegerType();
}
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend()
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
@@ -5928,10 +5933,11 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
return getNaturalAlignIndirect(RetTy);
}
- // __fp16 gets returned as if it were an int or float, but with the top 16
- // bits unspecified. This is not done for OpenCL as it handles the half type
- // natively, and does not need to interwork with AAPCS code.
- if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
+ // _Float16 and __fp16 get returned as if it were an int or float, but with
+ // the top 16 bits unspecified. This is not done for OpenCL as it handles the
+ // half type natively, and does not need to interwork with AAPCS code.
+ if ((RetTy->isFloat16Type() || RetTy->isHalfType()) &&
+ !getContext().getLangOpts().NativeHalfArgsAndReturns) {
llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
llvm::Type::getFloatTy(getVMContext()) :
llvm::Type::getInt32Ty(getVMContext());
@@ -5943,7 +5949,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend()
+ return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect();
}
@@ -6155,8 +6161,8 @@ public:
: TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &M) const override;
+ bool shouldEmitStaticExternCAliases() const override;
private:
// Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
@@ -6176,8 +6182,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
@@ -6189,8 +6195,8 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
if (isAggregateTypeForABI(Ty))
return getNaturalAlignIndirect(Ty, /* byval */ true);
- return (Ty->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -6212,9 +6218,8 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
}
void NVPTXTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const {
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -6279,6 +6284,10 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
+
+bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
+ return false;
+}
}
//===----------------------------------------------------------------------===//
@@ -6313,8 +6322,7 @@ public:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
- bool shouldPassIndirectlyForSwift(CharUnits totalSize,
- ArrayRef<llvm::Type*> scalars,
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
@@ -6402,7 +6410,7 @@ QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
// Unlike isSingleElementStruct(), empty structure and array fields
// do count. So do anonymous bitfields that aren't zero-sized.
if (getContext().getLangOpts().CPlusPlus &&
- FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
+ FD->isZeroLengthBitField(getContext()))
continue;
// Unlike isSingleElementStruct(), arrays do not count.
@@ -6586,8 +6594,8 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
return ABIArgInfo::getDirect();
if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
return getNaturalAlignIndirect(RetTy);
- return (isPromotableIntegerType(RetTy) ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
@@ -6597,7 +6605,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
// Integers and enums are extended to full register width.
if (isPromotableIntegerType(Ty))
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtend(Ty);
// Handle vector types and vector-like structure types. Note that
// as opposed to float-like structure types, we do not allow any
@@ -6651,16 +6659,14 @@ public:
MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &M) const override;
};
}
void MSP430TargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const {
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
@@ -6705,7 +6711,7 @@ public:
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
- bool shouldSignExtUnsignedType(QualType Ty) const override;
+ ABIArgInfo extendType(QualType Ty) const;
};
class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -6720,8 +6726,7 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override {
+ CodeGen::CodeGenModule &CGM) const override {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -6732,7 +6737,7 @@ public:
Fn->addFnAttr("short-call");
// Other attributes do not have a meaning for declarations.
- if (!IsForDefinition)
+ if (GV->isDeclaration())
return;
if (FD->hasAttr<Mips16Attr>()) {
@@ -6898,7 +6903,7 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
// All integral types are promoted to the GPR width.
if (Ty->isIntegralOrEnumerationType())
- return ABIArgInfo::getExtend();
+ return extendType(Ty);
return ABIArgInfo::getDirect(
nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
@@ -6980,8 +6985,8 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -7047,14 +7052,14 @@ Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
return Addr;
}
-bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
+ABIArgInfo MipsABIInfo::extendType(QualType Ty) const {
int TySize = getContext().getTypeSize(Ty);
// MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
- return true;
+ return ABIArgInfo::getSignExtend(Ty);
- return false;
+ return ABIArgInfo::getExtend(Ty);
}
bool
@@ -7096,9 +7101,8 @@ public:
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM,
- ForDefinition_t IsForDefinition) const override {
- if (!IsForDefinition)
+ CodeGen::CodeGenModule &CGM) const override {
+ if (GV->isDeclaration())
return;
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -7127,14 +7131,12 @@ public:
: DefaultTargetCodeGenInfo(CGT) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &M) const override;
};
void TCETargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const {
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -7227,8 +7229,8 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -7265,8 +7267,8 @@ ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ?
- ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+ return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
if (isEmptyRecord(getContext(), RetTy, true))
@@ -7409,7 +7411,7 @@ ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
if (Ty->isPromotableIntegerType()) {
if (InReg)
return ABIArgInfo::getDirectInReg();
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtend(Ty);
}
if (InReg)
return ABIArgInfo::getDirectInReg();
@@ -7639,8 +7641,7 @@ public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const override;
+ CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
@@ -7658,13 +7659,14 @@ public:
createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Value *BlockLiteral) const override;
+ bool shouldEmitStaticExternCAliases() const override;
+ void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
};
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const {
- if (!IsForDefinition)
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
@@ -7674,6 +7676,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+
+ if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() &&
+ (M.getTriple().getOS() == llvm::Triple::AMDHSA))
+ F->addFnAttr("amdgpu-implicitarg-num-bytes", "48");
+
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
if (ReqdWGS || FlatWGS) {
unsigned Min = FlatWGS ? FlatWGS->getMin() : 0;
@@ -7785,6 +7792,16 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
return C.getOrInsertSyncScopeID(Name);
}
+bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
+ return false;
+}
+
+void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
+ const FunctionType *&FT) const {
+ FT = getABIInfo().getContext().adjustFunctionType(
+ FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+}
+
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
@@ -7991,7 +8008,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
// Integer types smaller than a register are extended.
if (Size < 64 && Ty->isIntegerType())
- return ABIArgInfo::getExtend();
+ return ABIArgInfo::getExtend(Ty);
// Other non-aggregates go in registers.
if (!isAggregateTypeForABI(Ty))
@@ -8521,7 +8538,7 @@ static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
// The ABI requires unions to be sorted but not structures.
// See FieldEncoding::operator< for sort algorithm.
if (RT->isUnionType())
- std::sort(FE.begin(), FE.end());
+ llvm::sort(FE.begin(), FE.end());
// We can now complete the TypeString.
unsigned E = FE.size();
for (unsigned I = 0; I != E; ++I) {
@@ -8565,7 +8582,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
EnumEnc += '}';
FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
}
- std::sort(FE.begin(), FE.end());
+ llvm::sort(FE.begin(), FE.end());
unsigned E = FE.size();
for (unsigned I = 0; I != E; ++I) {
if (I)
@@ -8780,6 +8797,203 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
return false;
}
+//===----------------------------------------------------------------------===//
+// RISCV ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class RISCVABIInfo : public DefaultABIInfo {
+private:
+ unsigned XLen; // Size of the integer ('x') registers in bits.
+ static const int NumArgGPRs = 8;
+
+public:
+ RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
+ : DefaultABIInfo(CGT), XLen(XLen) {}
+
+ // DefaultABIInfo's classifyReturnType and classifyArgumentType are
+ // non-virtual, but computeInfo is virtual, so we overload it.
+ void computeInfo(CGFunctionInfo &FI) const override;
+
+ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed,
+ int &ArgGPRsLeft) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+
+ ABIArgInfo extendType(QualType Ty) const;
+};
+} // end anonymous namespace
+
+void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ QualType RetTy = FI.getReturnType();
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(RetTy);
+
+ // IsRetIndirect is true if classifyArgumentType indicated the value should
+ // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128
+ // is passed direct in LLVM IR, relying on the backend lowering code to
+ // rewrite the argument list and pass indirectly on RV32.
+ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect ||
+ getContext().getTypeSize(RetTy) > (2 * XLen);
+
+ // We must track the number of GPRs used in order to conform to the RISC-V
+ // ABI, as integer scalars passed in registers should have signext/zeroext
+ // when promoted, but are anyext if passed on the stack. As GPR usage is
+ // different for variadic arguments, we must also track whether we are
+ // examining a vararg or not.
+ int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
+ int NumFixedArgs = FI.getNumRequiredArgs();
+
+ int ArgNum = 0;
+ for (auto &ArgInfo : FI.arguments()) {
+ bool IsFixed = ArgNum < NumFixedArgs;
+ ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft);
+ ArgNum++;
+ }
+}
+
+ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
+ int &ArgGPRsLeft) const {
+ assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // Structures with either a non-trivial destructor or a non-trivial
+ // copy constructor are always passed indirectly.
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+ if (ArgGPRsLeft)
+ ArgGPRsLeft -= 1;
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
+ CGCXXABI::RAA_DirectInMemory);
+ }
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ uint64_t Size = getContext().getTypeSize(Ty);
+ uint64_t NeededAlign = getContext().getTypeAlign(Ty);
+ bool MustUseStack = false;
+ // Determine the number of GPRs needed to pass the current argument
+ // according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
+ // register pairs, so may consume 3 registers.
+ int NeededArgGPRs = 1;
+ if (!IsFixed && NeededAlign == 2 * XLen)
+ NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
+ else if (Size > XLen && Size <= 2 * XLen)
+ NeededArgGPRs = 2;
+
+ if (NeededArgGPRs > ArgGPRsLeft) {
+ MustUseStack = true;
+ NeededArgGPRs = ArgGPRsLeft;
+ }
+
+ ArgGPRsLeft -= NeededArgGPRs;
+
+ if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ // All integral types are promoted to XLen width, unless passed on the
+ // stack.
+ if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) {
+ return extendType(Ty);
+ }
+
+ return ABIArgInfo::getDirect();
+ }
+
+ // Aggregates which are <= 2*XLen will be passed in registers if possible,
+ // so coerce to integers.
+ if (Size <= 2 * XLen) {
+ unsigned Alignment = getContext().getTypeAlign(Ty);
+
+ // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is
+ // required, and a 2-element XLen array if only XLen alignment is required.
+ if (Size <= XLen) {
+ return ABIArgInfo::getDirect(
+ llvm::IntegerType::get(getVMContext(), XLen));
+ } else if (Alignment == 2 * XLen) {
+ return ABIArgInfo::getDirect(
+ llvm::IntegerType::get(getVMContext(), 2 * XLen));
+ } else {
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(
+ llvm::IntegerType::get(getVMContext(), XLen), 2));
+ }
+ }
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+}
+
+ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isVoidType())
+ return ABIArgInfo::getIgnore();
+
+ int ArgGPRsLeft = 2;
+
+ // The rules for return and argument types are the same, so defer to
+ // classifyArgumentType.
+ return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft);
+}
+
+Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);
+
+ // Empty records are ignored for parameter passing purposes.
+ if (isEmptyRecord(getContext(), Ty, true)) {
+ Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize);
+ Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
+ return Addr;
+ }
+
+ std::pair<CharUnits, CharUnits> SizeAndAlign =
+ getContext().getTypeInfoInChars(Ty);
+
+ // Arguments bigger than 2*Xlen bytes are passed indirectly.
+ bool IsIndirect = SizeAndAlign.first > 2 * SlotSize;
+
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, SizeAndAlign,
+ SlotSize, /*AllowHigherAlign=*/true);
+}
+
+ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
+ int TySize = getContext().getTypeSize(Ty);
+ // RV64 ABI requires unsigned 32 bit integers to be sign extended.
+ if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
+ return ABIArgInfo::getSignExtend(Ty);
+ return ABIArgInfo::getExtend(Ty);
+}
+
+namespace {
+class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
+ : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {}
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override {
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD) return;
+
+ const auto *Attr = FD->getAttr<RISCVInterruptAttr>();
+ if (!Attr)
+ return;
+
+ const char *Kind;
+ switch (Attr->getInterrupt()) {
+ case RISCVInterruptAttr::user: Kind = "user"; break;
+ case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break;
+ case RISCVInterruptAttr::machine: Kind = "machine"; break;
+ }
+
+ auto *Fn = cast<llvm::Function>(GV);
+
+ Fn->addFnAttr("interrupt", Kind);
+ }
+};
+} // namespace
//===----------------------------------------------------------------------===//
// Driver code
@@ -8894,6 +9108,11 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::msp430:
return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
+ case llvm::Triple::riscv32:
+ return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32));
+ case llvm::Triple::riscv64:
+ return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64));
+
case llvm::Triple::systemz: {
bool HasVector = getTarget().getABI() == "vector";
return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector));
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index d745e420c4a5..b530260ea48f 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -57,8 +57,7 @@ public:
/// setTargetAttributes - Provides a convenient hook to handle extra
/// target-specific attributes for the given global.
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M,
- ForDefinition_t IsForDefinition) const {}
+ CodeGen::CodeGenModule &M) const {}
/// emitTargetMD - Provides a convenient hook to handle extra
/// target-specific metadata for the given global.
@@ -267,7 +266,7 @@ public:
virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
llvm::LLVMContext &C) const;
- /// Inteface class for filling custom fields of a block literal for OpenCL.
+ /// Interface class for filling custom fields of a block literal for OpenCL.
class TargetOpenCLBlockHelper {
public:
typedef std::pair<llvm::Value *, StringRef> ValueTy;
@@ -297,6 +296,13 @@ public:
createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Value *BlockLiteral) const;
+
+ /// \return true if the target supports alias from the unmangled name to the
+ /// mangled name of functions declared within an extern "C" region and marked
+ /// as 'used', and having internal linkage.
+ virtual bool shouldEmitStaticExternCAliases() const { return true; }
+
+ virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
};
} // namespace CodeGen
diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp
index cfb93d6a9fcc..2f8a591a3e7f 100644
--- a/lib/CodeGen/VarBypassDetector.cpp
+++ b/lib/CodeGen/VarBypassDetector.cpp
@@ -95,7 +95,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S,
case Stmt::CaseStmtClass:
case Stmt::DefaultStmtClass:
case Stmt::LabelStmtClass:
- llvm_unreachable("the loop bellow handles labels and cases");
+ llvm_unreachable("the loop below handles labels and cases");
break;
default: