aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /clang/lib/CodeGen
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
downloadsrc-cfca06d7963fa0909f90483b42a6d7d194d01e08.tar.gz
src-cfca06d7963fa0909f90483b42a6d7d194d01e08.zip
Notes
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/ABIInfo.h6
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp266
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp13
-rw-r--r--clang/lib/CodeGen/CGBlocks.cpp223
-rw-r--r--clang/lib/CodeGen/CGBlocks.h4
-rw-r--r--clang/lib/CodeGen/CGBuilder.h32
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp3300
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp148
-rw-r--r--clang/lib/CodeGen/CGCUDARuntime.h39
-rw-r--r--clang/lib/CodeGen/CGCXX.cpp4
-rw-r--r--clang/lib/CodeGen/CGCXXABI.cpp19
-rw-r--r--clang/lib/CodeGen/CGCXXABI.h57
-rw-r--r--clang/lib/CodeGen/CGCall.cpp853
-rw-r--r--clang/lib/CodeGen/CGCall.h34
-rw-r--r--clang/lib/CodeGen/CGClass.cpp56
-rw-r--r--clang/lib/CodeGen/CGCleanup.cpp11
-rw-r--r--clang/lib/CodeGen/CGCleanup.h23
-rw-r--r--clang/lib/CodeGen/CGCoroutine.cpp6
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp287
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h22
-rw-r--r--clang/lib/CodeGen/CGDecl.cpp77
-rw-r--r--clang/lib/CodeGen/CGDeclCXX.cpp194
-rw-r--r--clang/lib/CodeGen/CGException.cpp104
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp322
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp73
-rw-r--r--clang/lib/CodeGen/CGExprCXX.cpp40
-rw-r--r--clang/lib/CodeGen/CGExprComplex.cpp13
-rw-r--r--clang/lib/CodeGen/CGExprConstant.cpp72
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp449
-rw-r--r--clang/lib/CodeGen/CGGPUBuiltin.cpp36
-rw-r--r--clang/lib/CodeGen/CGLoopInfo.cpp11
-rw-r--r--clang/lib/CodeGen/CGLoopInfo.h2
-rw-r--r--clang/lib/CodeGen/CGNonTrivialStruct.cpp50
-rw-r--r--clang/lib/CodeGen/CGObjC.cpp80
-rw-r--r--clang/lib/CodeGen/CGObjCGNU.cpp53
-rw-r--r--clang/lib/CodeGen/CGObjCMac.cpp120
-rw-r--r--clang/lib/CodeGen/CGObjCRuntime.cpp13
-rw-r--r--clang/lib/CodeGen/CGObjCRuntime.h5
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp3650
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h311
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp94
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h14
-rw-r--r--clang/lib/CodeGen/CGRecordLayoutBuilder.cpp25
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp113
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp1726
-rw-r--r--clang/lib/CodeGen/CGVTables.cpp372
-rw-r--r--clang/lib/CodeGen/CGVTables.h40
-rw-r--r--clang/lib/CodeGen/CGValue.h38
-rw-r--r--clang/lib/CodeGen/CodeGenABITypes.cpp44
-rw-r--r--clang/lib/CodeGen/CodeGenAction.cpp30
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp355
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h359
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp589
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h102
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.cpp30
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.h4
-rw-r--r--clang/lib/CodeGen/CodeGenTBAA.cpp37
-rw-r--r--clang/lib/CodeGen/CodeGenTypeCache.h4
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.cpp197
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.h8
-rw-r--r--clang/lib/CodeGen/ConstantEmitter.h2
-rw-r--r--clang/lib/CodeGen/ConstantInitBuilder.cpp22
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp151
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.h18
-rw-r--r--clang/lib/CodeGen/EHScopeStack.h14
-rw-r--r--clang/lib/CodeGen/ItaniumCXXABI.cpp367
-rw-r--r--clang/lib/CodeGen/MicrosoftCXXABI.cpp69
-rw-r--r--clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp4
-rw-r--r--clang/lib/CodeGen/PatternInit.cpp12
-rw-r--r--clang/lib/CodeGen/SanitizerMetadata.cpp15
-rw-r--r--clang/lib/CodeGen/SanitizerMetadata.h2
-rw-r--r--clang/lib/CodeGen/SwiftCallingConv.cpp7
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp1734
-rw-r--r--clang/lib/CodeGen/TargetInfo.h47
74 files changed, 12472 insertions, 5251 deletions
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 0c3a076da0b5..bb40dace8a84 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -60,6 +60,8 @@ namespace swiftcall {
virtual bool supportsSwift() const { return false; }
+ virtual bool allowBFloatArgsAndRet() const { return false; }
+
CodeGen::CGCXXABI &getCXXABI() const;
ASTContext &getContext() const;
llvm::LLVMContext &getVMContext() const;
@@ -102,6 +104,10 @@ namespace swiftcall {
bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
uint64_t &Members) const;
+ // Implement the Type::IsPromotableIntegerType for ABI specific needs. The
+ // only difference is that this considers _ExtInt as well.
+ bool isPromotableIntegerTypeForABI(QualType Ty) const;
+
/// A convenience method to return an indirect ABIArgInfo with an
/// expected alignment equal to the ABI alignment of the given type.
CodeGen::ABIArgInfo
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 0bfcab88a3a9..dce0940670a2 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTOBackend.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -45,12 +47,18 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Coroutines.h"
+#include "llvm/Transforms/Coroutines/CoroCleanup.h"
+#include "llvm/Transforms/Coroutines/CoroEarly.h"
+#include "llvm/Transforms/Coroutines/CoroElide.h"
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
@@ -71,6 +79,7 @@
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h"
#include <memory>
using namespace clang;
using namespace llvm;
@@ -216,6 +225,7 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) {
Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard;
Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune;
Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters;
+ Opts.InlineBoolFlag = CGOpts.SanitizeCoverageInlineBoolFlag;
Opts.PCTable = CGOpts.SanitizeCoveragePCTable;
Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth;
return Opts;
@@ -227,7 +237,9 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
static_cast<const PassManagerBuilderWrapper &>(Builder);
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
auto Opts = getSancovOptsFromCGOpts(CGOpts);
- PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts));
+ PM.add(createModuleSanitizerCoverageLegacyPassPass(
+ Opts, CGOpts.SanitizeCoverageAllowlistFiles,
+ CGOpts.SanitizeCoverageBlocklistFiles));
}
// Check if ASan should use GC-friendly instrumentation for globals.
@@ -350,7 +362,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
break;
case CodeGenOptions::MASSV:
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV);
- break;
+ break;
case CodeGenOptions::SVML:
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML);
break;
@@ -413,7 +425,8 @@ static CodeGenFileType getCodeGenFileType(BackendAction Action) {
}
}
-static void initTargetOptions(llvm::TargetOptions &Options,
+static void initTargetOptions(DiagnosticsEngine &Diags,
+ llvm::TargetOptions &Options,
const CodeGenOptions &CodeGenOpts,
const clang::TargetOptions &TargetOpts,
const LangOptions &LangOpts,
@@ -436,15 +449,15 @@ static void initTargetOptions(llvm::TargetOptions &Options,
// Set FP fusion mode.
switch (LangOpts.getDefaultFPContractMode()) {
- case LangOptions::FPC_Off:
+ case LangOptions::FPM_Off:
// Preserve any contraction performed by the front-end. (Strict performs
// splitting of the muladd intrinsic in the backend.)
Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
break;
- case LangOptions::FPC_On:
+ case LangOptions::FPM_On:
Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
break;
- case LangOptions::FPC_Fast:
+ case LangOptions::FPM_Fast:
Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
break;
}
@@ -466,22 +479,44 @@ static void initTargetOptions(llvm::TargetOptions &Options,
if (LangOpts.WasmExceptions)
Options.ExceptionModel = llvm::ExceptionHandling::Wasm;
- Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
- Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
+ Options.NoInfsFPMath = LangOpts.NoHonorInfs;
+ Options.NoNaNsFPMath = LangOpts.NoHonorNaNs;
Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
- Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
+ Options.UnsafeFPMath = LangOpts.UnsafeFPMath;
Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
+
+ Options.BBSections =
+ llvm::StringSwitch<llvm::BasicBlockSection>(CodeGenOpts.BBSections)
+ .Case("all", llvm::BasicBlockSection::All)
+ .Case("labels", llvm::BasicBlockSection::Labels)
+ .StartsWith("list=", llvm::BasicBlockSection::List)
+ .Case("none", llvm::BasicBlockSection::None)
+ .Default(llvm::BasicBlockSection::None);
+
+ if (Options.BBSections == llvm::BasicBlockSection::List) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(CodeGenOpts.BBSections.substr(5));
+ if (!MBOrErr)
+ Diags.Report(diag::err_fe_unable_to_load_basic_block_sections_file)
+ << MBOrErr.getError().message();
+ else
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+
Options.FunctionSections = CodeGenOpts.FunctionSections;
Options.DataSections = CodeGenOpts.DataSections;
Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
+ Options.UniqueBasicBlockSectionNames =
+ CodeGenOpts.UniqueBasicBlockSectionNames;
Options.TLSSize = CodeGenOpts.TLSSize;
Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS;
Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
Options.EmitAddrsig = CodeGenOpts.Addrsig;
- Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues;
Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
+ Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
+ Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex;
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
@@ -502,6 +537,8 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Entry.Group == frontend::IncludeDirGroup::System))
Options.MCOptions.IASSearchPaths.push_back(
Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
+ Options.MCOptions.Argv0 = CodeGenOpts.Argv0;
+ Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs;
}
static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
if (CodeGenOpts.DisableGCov)
@@ -514,12 +551,9 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
Options.EmitData = CodeGenOpts.EmitGcovArcs;
llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version));
- Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
- Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
Options.Filter = CodeGenOpts.ProfileFilterFiles;
Options.Exclude = CodeGenOpts.ProfileExcludeFiles;
- Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
return Options;
}
@@ -553,13 +587,24 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
std::unique_ptr<TargetLibraryInfoImpl> TLII(
createTLII(TargetTriple, CodeGenOpts));
+ // If we reached here with a non-empty index file name, then the index file
+ // was empty and we are not performing ThinLTO backend compilation (used in
+ // testing in a distributed build environment). Drop any the type test
+ // assume sequences inserted for whole program vtables so that codegen doesn't
+ // complain.
+ if (!CodeGenOpts.ThinLTOIndexFile.empty())
+ MPM.add(createLowerTypeTestsPass(/*ExportSummary=*/nullptr,
+ /*ImportSummary=*/nullptr,
+ /*DropTypeTests=*/true));
+
PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts);
// At O0 and O1 we only run the always inliner which is more efficient. At
// higher optimization levels we run the normal inliner.
if (CodeGenOpts.OptimizationLevel <= 1) {
- bool InsertLifetimeIntrinsics = (CodeGenOpts.OptimizationLevel != 0 &&
- !CodeGenOpts.DisableLifetimeMarkers);
+ bool InsertLifetimeIntrinsics = ((CodeGenOpts.OptimizationLevel != 0 &&
+ !CodeGenOpts.DisableLifetimeMarkers) ||
+ LangOpts.Coroutines);
PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
} else {
// We do not want to inline hot callsites for SamplePGO module-summary build
@@ -575,6 +620,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
+ // Only enable CGProfilePass when using integrated assembler, since
+ // non-integrated assemblers don't recognize .cgprofile section.
+ PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
// Loop interleaving in the loop vectorizer has historically been set to be
@@ -689,6 +737,12 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (!CodeGenOpts.RewriteMapFiles.empty())
addSymbolRewriterPass(CodeGenOpts, &MPM);
+ // Add UniqueInternalLinkageNames Pass which renames internal linkage symbols
+ // with unique names.
+ if (CodeGenOpts.UniqueInternalLinkageNames) {
+ MPM.add(createUniqueInternalLinkageNamesPass());
+ }
+
if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) {
MPM.add(createGCOVProfilerPass(*Options));
if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo)
@@ -718,7 +772,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (!CodeGenOpts.InstrProfileOutput.empty())
PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput;
else
- PMBuilder.PGOInstrGen = DefaultProfileGenName;
+ PMBuilder.PGOInstrGen = std::string(DefaultProfileGenName);
}
if (CodeGenOpts.hasProfileIRUse()) {
PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath;
@@ -766,7 +820,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
llvm::TargetOptions Options;
- initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts);
+ initTargetOptions(Diags, Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts);
TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
Options, RM, CM, OptLevel));
}
@@ -924,7 +978,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
llvm_unreachable("Invalid optimization level!");
case 1:
- return PassBuilder::O1;
+ return PassBuilder::OptimizationLevel::O1;
case 2:
switch (Opts.OptimizeSize) {
@@ -932,24 +986,49 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
llvm_unreachable("Invalid optimization level for size!");
case 0:
- return PassBuilder::O2;
+ return PassBuilder::OptimizationLevel::O2;
case 1:
- return PassBuilder::Os;
+ return PassBuilder::OptimizationLevel::Os;
case 2:
- return PassBuilder::Oz;
+ return PassBuilder::OptimizationLevel::Oz;
}
case 3:
- return PassBuilder::O3;
+ return PassBuilder::OptimizationLevel::O3;
}
}
+static void addCoroutinePassesAtO0(ModulePassManager &MPM,
+ const LangOptions &LangOpts,
+ const CodeGenOptions &CodeGenOpts) {
+ if (!LangOpts.Coroutines)
+ return;
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+
+ CGSCCPassManager CGPM(CodeGenOpts.DebugPassManager);
+ CGPM.addPass(CoroSplitPass());
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass()));
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+}
+
static void addSanitizersAtO0(ModulePassManager &MPM,
const Triple &TargetTriple,
const LangOptions &LangOpts,
const CodeGenOptions &CodeGenOpts) {
+ if (CodeGenOpts.SanitizeCoverageType ||
+ CodeGenOpts.SanitizeCoverageIndirectCalls ||
+ CodeGenOpts.SanitizeCoverageTraceCmp) {
+ auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
+ MPM.addPass(ModuleSanitizerCoveragePass(
+ SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles,
+ CodeGenOpts.SanitizeCoverageBlocklistFiles));
+ }
+
auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
@@ -970,8 +1049,11 @@ static void addSanitizersAtO0(ModulePassManager &MPM,
}
if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
- MPM.addPass(MemorySanitizerPass({}));
- MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({})));
+ bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Memory);
+ int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
+ MPM.addPass(MemorySanitizerPass({TrackOrigins, Recover, false}));
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ MemorySanitizerPass({TrackOrigins, Recover, false})));
}
if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) {
@@ -1013,7 +1095,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (CodeGenOpts.hasProfileIRInstr())
// -fprofile-generate.
PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty()
- ? DefaultProfileGenName
+ ? std::string(DefaultProfileGenName)
: CodeGenOpts.InstrProfileOutput,
"", "", PGOOptions::IRInstr, PGOOptions::NoCSAction,
CodeGenOpts.DebugInfoForProfiling);
@@ -1046,13 +1128,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
"Cannot run CSProfileGen pass with ProfileGen or SampleUse "
" pass");
PGOOpt->CSProfileGenFile = CodeGenOpts.InstrProfileOutput.empty()
- ? DefaultProfileGenName
+ ? std::string(DefaultProfileGenName)
: CodeGenOpts.InstrProfileOutput;
PGOOpt->CSAction = PGOOptions::CSIRInstr;
} else
PGOOpt = PGOOptions("",
CodeGenOpts.InstrProfileOutput.empty()
- ? DefaultProfileGenName
+ ? std::string(DefaultProfileGenName)
: CodeGenOpts.InstrProfileOutput,
"", PGOOptions::NoAction, PGOOptions::CSIRInstr,
CodeGenOpts.DebugInfoForProfiling);
@@ -1065,6 +1147,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
+ // Only enable CGProfilePass when using integrated assembler, since
+ // non-integrated assemblers don't recognize .cgprofile section.
+ PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
+ PTO.Coroutines = LangOpts.Coroutines;
PassInstrumentationCallbacks PIC;
StandardInstrumentations SI;
@@ -1114,6 +1200,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
bool IsLTO = CodeGenOpts.PrepareForLTO;
if (CodeGenOpts.OptimizationLevel == 0) {
+ // If we reached here with a non-empty index file name, then the index
+ // file was empty and we are not performing ThinLTO backend compilation
+ // (used in testing in a distributed build environment). Drop any the type
+ // test assume sequences inserted for whole program vtables so that
+ // codegen doesn't complain.
+ if (!CodeGenOpts.ThinLTOIndexFile.empty())
+ MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr,
+ /*ImportSummary=*/nullptr,
+ /*DropTypeTests=*/true));
if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
MPM.addPass(GCOVProfilerPass(*Options));
if (Optional<InstrProfOptions> Options =
@@ -1124,7 +1219,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// which is just that always inlining occurs. Further, disable generating
// lifetime intrinsics to avoid enabling further optimizations during
// code generation.
- MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false));
+ // However, we need to insert lifetime intrinsics to avoid invalid access
+ // caused by multithreaded coroutines.
+ MPM.addPass(
+ AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/LangOpts.Coroutines));
// At -O0, we can still do PGO. Add all the requested passes for
// instrumentation PGO, if requested.
@@ -1140,6 +1238,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
+ // Add UniqueInternalLinkageNames Pass which renames internal linkage
+ // symbols with unique names.
+ if (CodeGenOpts.UniqueInternalLinkageNames) {
+ MPM.addPass(UniqueInternalLinkageNamesPass());
+ }
+
// Lastly, add semantically necessary passes for LTO.
if (IsLTO || IsThinLTO) {
MPM.addPass(CanonicalizeAliasesPass());
@@ -1150,6 +1254,18 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// configure the pipeline.
PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
+ // If we reached here with a non-empty index file name, then the index
+ // file was empty and we are not performing ThinLTO backend compilation
+ // (used in testing in a distributed build environment). Drop any the type
+ // test assume sequences inserted for whole program vtables so that
+ // codegen doesn't complain.
+ if (!CodeGenOpts.ThinLTOIndexFile.empty())
+ PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) {
+ MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr,
+ /*ImportSummary=*/nullptr,
+ /*DropTypeTests=*/true));
+ });
+
PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) {
MPM.addPass(createModuleToFunctionPassAdaptor(
EntryExitInstrumenterPass(/*PostInlining=*/false)));
@@ -1157,50 +1273,60 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// Register callbacks to schedule sanitizer passes at the appropriate part of
// the pipeline.
- // FIXME: either handle asan/the remaining sanitizers or error out
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
PB.registerScalarOptimizerLateEPCallback(
[](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
FPM.addPass(BoundsCheckingPass());
});
+
+ if (CodeGenOpts.SanitizeCoverageType ||
+ CodeGenOpts.SanitizeCoverageIndirectCalls ||
+ CodeGenOpts.SanitizeCoverageTraceCmp) {
+ PB.registerOptimizerLastEPCallback(
+ [this](ModulePassManager &MPM,
+ PassBuilder::OptimizationLevel Level) {
+ auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
+ MPM.addPass(ModuleSanitizerCoveragePass(
+ SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles,
+ CodeGenOpts.SanitizeCoverageBlocklistFiles));
+ });
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
- PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) {
- MPM.addPass(MemorySanitizerPass({}));
- });
+ int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
+ bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Memory);
PB.registerOptimizerLastEPCallback(
- [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
- FPM.addPass(MemorySanitizerPass({}));
+ [TrackOrigins, Recover](ModulePassManager &MPM,
+ PassBuilder::OptimizationLevel Level) {
+ MPM.addPass(MemorySanitizerPass({TrackOrigins, Recover, false}));
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ MemorySanitizerPass({TrackOrigins, Recover, false})));
});
}
if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
- PB.registerPipelineStartEPCallback(
- [](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); });
PB.registerOptimizerLastEPCallback(
- [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
- FPM.addPass(ThreadSanitizerPass());
+ [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
+ MPM.addPass(ThreadSanitizerPass());
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
});
}
if (LangOpts.Sanitize.has(SanitizerKind::Address)) {
- PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) {
- MPM.addPass(
- RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
- });
bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Address);
bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
- PB.registerOptimizerLastEPCallback(
- [Recover, UseAfterScope](FunctionPassManager &FPM,
- PassBuilder::OptimizationLevel Level) {
- FPM.addPass(AddressSanitizerPass(
- /*CompileKernel=*/false, Recover, UseAfterScope));
- });
bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator;
- PB.registerPipelineStartEPCallback(
- [Recover, ModuleUseAfterScope,
- UseOdrIndicator](ModulePassManager &MPM) {
+ PB.registerOptimizerLastEPCallback(
+ [Recover, UseAfterScope, ModuleUseAfterScope, UseOdrIndicator](
+ ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
+ MPM.addPass(
+ RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
MPM.addPass(ModuleAddressSanitizerPass(
/*CompileKernel=*/false, Recover, ModuleUseAfterScope,
UseOdrIndicator));
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(AddressSanitizerPass(
+ /*CompileKernel=*/false, Recover, UseAfterScope)));
});
}
if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
@@ -1213,6 +1339,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
MPM.addPass(InstrProfiling(*Options, false));
});
+ // Add UniqueInternalLinkageNames Pass which renames internal linkage
+ // symbols with unique names.
+ if (CodeGenOpts.UniqueInternalLinkageNames) {
+ MPM.addPass(UniqueInternalLinkageNamesPass());
+ }
+
if (IsThinLTO) {
MPM = PB.buildThinLTOPreLinkDefaultPipeline(
Level, CodeGenOpts.DebugPassManager);
@@ -1229,13 +1361,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
}
- if (CodeGenOpts.SanitizeCoverageType ||
- CodeGenOpts.SanitizeCoverageIndirectCalls ||
- CodeGenOpts.SanitizeCoverageTraceCmp) {
- auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
- MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts));
- }
-
if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) {
bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
MPM.addPass(HWAddressSanitizerPass(
@@ -1247,6 +1372,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
if (CodeGenOpts.OptimizationLevel == 0) {
+ addCoroutinePassesAtO0(MPM, LangOpts, CodeGenOpts);
addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts);
}
}
@@ -1358,15 +1484,12 @@ BitcodeModule *clang::FindThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
return nullptr;
}
-static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
- const HeaderSearchOptions &HeaderOpts,
- const CodeGenOptions &CGOpts,
- const clang::TargetOptions &TOpts,
- const LangOptions &LOpts,
- std::unique_ptr<raw_pwrite_stream> OS,
- std::string SampleProfile,
- std::string ProfileRemapping,
- BackendAction Action) {
+static void runThinLTOBackend(
+ DiagnosticsEngine &Diags, ModuleSummaryIndex *CombinedIndex, Module *M,
+ const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts,
+ const clang::TargetOptions &TOpts, const LangOptions &LOpts,
+ std::unique_ptr<raw_pwrite_stream> OS, std::string SampleProfile,
+ std::string ProfileRemapping, BackendAction Action) {
StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -1436,7 +1559,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
Conf.RelocModel = CGOpts.RelocationModel;
Conf.CGOptLevel = getCGOptLevel(CGOpts);
Conf.OptLevel = CGOpts.OptimizationLevel;
- initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
+ initTargetOptions(Diags, Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
@@ -1444,6 +1567,9 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
+ // Only enable CGProfilePass when using integrated assembler, since
+ // non-integrated assemblers don't recognize .cgprofile section.
+ Conf.PTO.CallGraphProfile = !CGOpts.DisableIntegratedAS;
// Context sensitive profile.
if (CGOpts.hasProfileCSIRInstr()) {
@@ -1525,8 +1651,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// of an error).
if (CombinedIndex) {
if (!CombinedIndex->skipModuleByDistributedBackend()) {
- runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
- LOpts, std::move(OS), CGOpts.SampleProfileFile,
+ runThinLTOBackend(Diags, CombinedIndex.get(), M, HeaderOpts, CGOpts,
+ TOpts, LOpts, std::move(OS), CGOpts.SampleProfileFile,
CGOpts.ProfileRemappingFile, Action);
return;
}
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 149982d82790..a58450ddd4c5 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -119,8 +119,9 @@ namespace {
ValueTy = lvalue.getType();
ValueSizeInBits = C.getTypeSize(ValueTy);
AtomicTy = ValueTy = CGF.getContext().getExtVectorType(
- lvalue.getType(), lvalue.getExtVectorAddress()
- .getElementType()->getVectorNumElements());
+ lvalue.getType(), cast<llvm::VectorType>(
+ lvalue.getExtVectorAddress().getElementType())
+ ->getNumElements());
AtomicSizeInBits = C.getTypeSize(AtomicTy);
AtomicAlign = ValueAlign = lvalue.getAlignment();
LVal = lvalue;
@@ -1826,7 +1827,7 @@ void AtomicInfo::EmitAtomicUpdateOp(
auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
// Do the atomic load.
- auto *OldVal = EmitAtomicLoadOp(AO, IsVolatile);
+ auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
// For non-simple lvalues perform compare-and-swap procedure.
auto *ContBB = CGF.createBasicBlock("atomic_cont");
auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@@ -1908,7 +1909,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
// Do the atomic load.
- auto *OldVal = EmitAtomicLoadOp(AO, IsVolatile);
+ auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
// For non-simple lvalues perform compare-and-swap procedure.
auto *ContBB = CGF.createBasicBlock("atomic_cont");
auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@@ -2018,6 +2019,10 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
intValue, addr.getElementType(), /*isSigned=*/false);
llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+ if (AO == llvm::AtomicOrdering::Acquire)
+ AO = llvm::AtomicOrdering::Monotonic;
+ else if (AO == llvm::AtomicOrdering::AcquireRelease)
+ AO = llvm::AtomicOrdering::Release;
// Initializations don't need to be atomic.
if (!isInit)
store->setAtomic(AO);
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index 11f54d1f7fb2..615b78235041 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -36,7 +36,7 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
: Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false),
HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false),
CapturesNonExternalType(false), LocalAddress(Address::invalid()),
- StructureType(nullptr), Block(block), DominatingIP(nullptr) {
+ StructureType(nullptr), Block(block) {
// Skip asm prefix, if any. 'name' is usually taken directly from
// the mangled name of the enclosing function.
@@ -775,151 +775,23 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true);
}
-/// Enter the scope of a block. This should be run at the entrance to
-/// a full-expression so that the block's cleanups are pushed at the
-/// right place in the stack.
-static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
- assert(CGF.HaveInsertPoint());
-
- // Allocate the block info and place it at the head of the list.
- CGBlockInfo &blockInfo =
- *new CGBlockInfo(block, CGF.CurFn->getName());
- blockInfo.NextBlockInfo = CGF.FirstBlockInfo;
- CGF.FirstBlockInfo = &blockInfo;
-
- // Compute information about the layout, etc., of this block,
- // pushing cleanups as necessary.
- computeBlockInfo(CGF.CGM, &CGF, blockInfo);
-
- // Nothing else to do if it can be global.
- if (blockInfo.CanBeGlobal) return;
-
- // Make the allocation for the block.
- blockInfo.LocalAddress = CGF.CreateTempAlloca(blockInfo.StructureType,
- blockInfo.BlockAlign, "block");
-
- // If there are cleanups to emit, enter them (but inactive).
- if (!blockInfo.NeedsCopyDispose) return;
-
- // Walk through the captures (in order) and find the ones not
- // captured by constant.
- for (const auto &CI : block->captures()) {
- // Ignore __block captures; there's nothing special in the
- // on-stack block that we need to do for them.
- if (CI.isByRef()) continue;
-
- // Ignore variables that are constant-captured.
- const VarDecl *variable = CI.getVariable();
- CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
- if (capture.isConstant()) continue;
-
- // Ignore objects that aren't destructed.
- QualType VT = getCaptureFieldType(CGF, CI);
- QualType::DestructionKind dtorKind = VT.isDestructedType();
- if (dtorKind == QualType::DK_none) continue;
-
- CodeGenFunction::Destroyer *destroyer;
-
- // Block captures count as local values and have imprecise semantics.
- // They also can't be arrays, so need to worry about that.
- //
- // For const-qualified captures, emit clang.arc.use to ensure the captured
- // object doesn't get released while we are still depending on its validity
- // within the block.
- if (VT.isConstQualified() &&
- VT.getObjCLifetime() == Qualifiers::OCL_Strong &&
- CGF.CGM.getCodeGenOpts().OptimizationLevel != 0) {
- assert(CGF.CGM.getLangOpts().ObjCAutoRefCount &&
- "expected ObjC ARC to be enabled");
- destroyer = CodeGenFunction::emitARCIntrinsicUse;
- } else if (dtorKind == QualType::DK_objc_strong_lifetime) {
- destroyer = CodeGenFunction::destroyARCStrongImprecise;
- } else {
- destroyer = CGF.getDestroyer(dtorKind);
- }
-
- // GEP down to the address.
- Address addr =
- CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, capture.getIndex());
-
- // We can use that GEP as the dominating IP.
- if (!blockInfo.DominatingIP)
- blockInfo.DominatingIP = cast<llvm::Instruction>(addr.getPointer());
-
- CleanupKind cleanupKind = InactiveNormalCleanup;
- bool useArrayEHCleanup = CGF.needsEHCleanup(dtorKind);
- if (useArrayEHCleanup)
- cleanupKind = InactiveNormalAndEHCleanup;
-
- CGF.pushDestroy(cleanupKind, addr, VT,
- destroyer, useArrayEHCleanup);
-
- // Remember where that cleanup was.
- capture.setCleanup(CGF.EHStack.stable_begin());
- }
-}
-
-/// Enter a full-expression with a non-trivial number of objects to
-/// clean up. This is in this file because, at the moment, the only
-/// kind of cleanup object is a BlockDecl*.
-void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) {
- if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) {
- assert(EWC->getNumObjects() != 0);
- for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects())
- enterBlockScope(*this, C);
- }
-}
-
-/// Find the layout for the given block in a linked list and remove it.
-static CGBlockInfo *findAndRemoveBlockInfo(CGBlockInfo **head,
- const BlockDecl *block) {
- while (true) {
- assert(head && *head);
- CGBlockInfo *cur = *head;
-
- // If this is the block we're looking for, splice it out of the list.
- if (cur->getBlockDecl() == block) {
- *head = cur->NextBlockInfo;
- return cur;
- }
-
- head = &cur->NextBlockInfo;
- }
-}
-
-/// Destroy a chain of block layouts.
-void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) {
- assert(head && "destroying an empty chain");
- do {
- CGBlockInfo *cur = head;
- head = cur->NextBlockInfo;
- delete cur;
- } while (head != nullptr);
-}
-
/// Emit a block literal expression in the current function.
llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
// If the block has no captures, we won't have a pre-computed
// layout for it.
- if (!blockExpr->getBlockDecl()->hasCaptures()) {
+ if (!blockExpr->getBlockDecl()->hasCaptures())
// The block literal is emitted as a global variable, and the block invoke
// function has to be extracted from its initializer.
- if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
+ if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr))
return Block;
- }
- CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
- computeBlockInfo(CGM, this, blockInfo);
- blockInfo.BlockExpression = blockExpr;
- return EmitBlockLiteral(blockInfo);
- }
-
- // Find the block info for this block and take ownership of it.
- std::unique_ptr<CGBlockInfo> blockInfo;
- blockInfo.reset(findAndRemoveBlockInfo(&FirstBlockInfo,
- blockExpr->getBlockDecl()));
- blockInfo->BlockExpression = blockExpr;
- return EmitBlockLiteral(*blockInfo);
+ CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
+ computeBlockInfo(CGM, this, blockInfo);
+ blockInfo.BlockExpression = blockExpr;
+ if (!blockInfo.CanBeGlobal)
+ blockInfo.LocalAddress = CreateTempAlloca(blockInfo.StructureType,
+ blockInfo.BlockAlign, "block");
+ return EmitBlockLiteral(blockInfo);
}
llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
@@ -1161,12 +1033,64 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
/*captured by init*/ false);
}
- // Activate the cleanup if layout pushed one.
- if (!CI.isByRef()) {
- EHScopeStack::stable_iterator cleanup = capture.getCleanup();
- if (cleanup.isValid())
- ActivateCleanupBlock(cleanup, blockInfo.DominatingIP);
+ // Push a cleanup for the capture if necessary.
+ if (!blockInfo.NeedsCopyDispose)
+ continue;
+
+ // Ignore __block captures; there's nothing special in the on-stack block
+ // that we need to do for them.
+ if (CI.isByRef())
+ continue;
+
+ // Ignore objects that aren't destructed.
+ QualType::DestructionKind dtorKind = type.isDestructedType();
+ if (dtorKind == QualType::DK_none)
+ continue;
+
+ CodeGenFunction::Destroyer *destroyer;
+
+ // Block captures count as local values and have imprecise semantics.
+ // They also can't be arrays, so need to worry about that.
+ //
+ // For const-qualified captures, emit clang.arc.use to ensure the captured
+ // object doesn't get released while we are still depending on its validity
+ // within the block.
+ if (type.isConstQualified() &&
+ type.getObjCLifetime() == Qualifiers::OCL_Strong &&
+ CGM.getCodeGenOpts().OptimizationLevel != 0) {
+ assert(CGM.getLangOpts().ObjCAutoRefCount &&
+ "expected ObjC ARC to be enabled");
+ destroyer = emitARCIntrinsicUse;
+ } else if (dtorKind == QualType::DK_objc_strong_lifetime) {
+ destroyer = destroyARCStrongImprecise;
+ } else {
+ destroyer = getDestroyer(dtorKind);
}
+
+ CleanupKind cleanupKind = NormalCleanup;
+ bool useArrayEHCleanup = needsEHCleanup(dtorKind);
+ if (useArrayEHCleanup)
+ cleanupKind = NormalAndEHCleanup;
+
+ // Extend the lifetime of the capture to the end of the scope enclosing the
+ // block expression except when the block decl is in the list of RetExpr's
+ // cleanup objects, in which case its lifetime ends after the full
+ // expression.
+ auto IsBlockDeclInRetExpr = [&]() {
+ auto *EWC = llvm::dyn_cast_or_null<ExprWithCleanups>(RetExpr);
+ if (EWC)
+ for (auto &C : EWC->getObjects())
+ if (auto *BD = C.dyn_cast<BlockDecl *>())
+ if (BD == blockDecl)
+ return true;
+ return false;
+ };
+
+ if (IsBlockDeclInRetExpr())
+ pushDestroy(cleanupKind, blockField, type, destroyer, useArrayEHCleanup);
+ else
+ pushLifetimeExtendedDestroy(cleanupKind, blockField, type, destroyer,
+ useArrayEHCleanup);
}
// Cast to the converted block-pointer type, which happens (somewhat
@@ -1449,7 +1373,8 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry",
Init));
b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(),
- b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity());
+ b.CreateStructGEP(literal, 0),
+ CGM.getPointerAlign().getAsAlign());
b.CreateRetVoid();
// We can't use the normal LLVM global initialisation array, because we
// need to specify that this runs early in library initialisation.
@@ -2031,11 +1956,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
FunctionDecl *FD = FunctionDecl::Create(
C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
FunctionTy, nullptr, SC_Static, false, false);
-
setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
CGM);
+ // This is necessary to avoid inheriting the previous line number.
+ FD->setImplicit();
StartFunction(FD, ReturnTy, Fn, FI, args);
- ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
+ auto AL = ApplyDebugLocation::CreateArtificial(*this);
+
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
Address src = GetAddrOfLocalVar(&SrcDecl);
@@ -2226,10 +2153,12 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
CGM);
+ // This is necessary to avoid inheriting the previous line number.
+ FD->setImplicit();
StartFunction(FD, ReturnTy, Fn, FI, args);
markAsIgnoreThreadCheckingAtRuntime(Fn);
- ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
+ auto AL = ApplyDebugLocation::CreateArtificial(*this);
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
diff --git a/clang/lib/CodeGen/CGBlocks.h b/clang/lib/CodeGen/CGBlocks.h
index c4bfde666154..698ecd3d926a 100644
--- a/clang/lib/CodeGen/CGBlocks.h
+++ b/clang/lib/CodeGen/CGBlocks.h
@@ -257,10 +257,6 @@ public:
// This could be zero if no forced alignment is required.
CharUnits BlockHeaderForcedGapSize;
- /// An instruction which dominates the full-expression that the
- /// block is inside.
- llvm::Instruction *DominatingIP;
-
/// The next block in the block-info chain. Invalid if this block
/// info is not part of the CGF's block-info chain, which is true
/// if it corresponds to a global block or a block whose expression
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 107c9275431c..38e96c0f4ee6 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -22,16 +22,15 @@ class CodeGenFunction;
/// This is an IRBuilder insertion helper that forwards to
/// CodeGenFunction::InsertHelper, which adds necessary metadata to
/// instructions.
-class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter {
+class CGBuilderInserter final : public llvm::IRBuilderDefaultInserter {
public:
CGBuilderInserter() = default;
explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
-protected:
/// This forwards to CodeGenFunction::InsertHelper.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
llvm::BasicBlock *BB,
- llvm::BasicBlock::iterator InsertPt) const;
+ llvm::BasicBlock::iterator InsertPt) const override;
private:
CodeGenFunction *CGF = nullptr;
};
@@ -68,38 +67,34 @@ public:
// take an alignment.
llvm::LoadInst *CreateLoad(Address Addr, const llvm::Twine &Name = "") {
return CreateAlignedLoad(Addr.getPointer(),
- Addr.getAlignment().getQuantity(),
- Name);
+ Addr.getAlignment().getAsAlign(), Name);
}
llvm::LoadInst *CreateLoad(Address Addr, const char *Name) {
// This overload is required to prevent string literals from
// ending up in the IsVolatile overload.
return CreateAlignedLoad(Addr.getPointer(),
- Addr.getAlignment().getQuantity(),
- Name);
+ Addr.getAlignment().getAsAlign(), Name);
}
llvm::LoadInst *CreateLoad(Address Addr, bool IsVolatile,
const llvm::Twine &Name = "") {
- return CreateAlignedLoad(Addr.getPointer(),
- Addr.getAlignment().getQuantity(),
- IsVolatile,
- Name);
+ return CreateAlignedLoad(
+ Addr.getPointer(), Addr.getAlignment().getAsAlign(), IsVolatile, Name);
}
using CGBuilderBaseTy::CreateAlignedLoad;
llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align,
const llvm::Twine &Name = "") {
- return CreateAlignedLoad(Addr, Align.getQuantity(), Name);
+ return CreateAlignedLoad(Addr, Align.getAsAlign(), Name);
}
llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align,
const char *Name) {
- return CreateAlignedLoad(Addr, Align.getQuantity(), Name);
+ return CreateAlignedLoad(Addr, Align.getAsAlign(), Name);
}
llvm::LoadInst *CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr,
CharUnits Align,
const llvm::Twine &Name = "") {
assert(Addr->getType()->getPointerElementType() == Ty);
- return CreateAlignedLoad(Addr, Align.getQuantity(), Name);
+ return CreateAlignedLoad(Addr, Align.getAsAlign(), Name);
}
// Note that we intentionally hide the CreateStore APIs that don't
@@ -113,7 +108,7 @@ public:
using CGBuilderBaseTy::CreateAlignedStore;
llvm::StoreInst *CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr,
CharUnits Align, bool IsVolatile = false) {
- return CreateAlignedStore(Val, Addr, Align.getQuantity(), IsVolatile);
+ return CreateAlignedStore(Val, Addr, Align.getAsAlign(), IsVolatile);
}
// FIXME: these "default-aligned" APIs should be removed,
@@ -284,6 +279,13 @@ public:
IsVolatile);
}
+ using CGBuilderBaseTy::CreateMemCpyInline;
+ llvm::CallInst *CreateMemCpyInline(Address Dest, Address Src, uint64_t Size) {
+ return CreateMemCpyInline(
+ Dest.getPointer(), Dest.getAlignment().getAsAlign(), Src.getPointer(),
+ Src.getAlignment().getAsAlign(), getInt64(Size));
+ }
+
using CGBuilderBaseTy::CreateMemMove;
llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
bool IsVolatile = false) {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09fd3087b494..8994b939093e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -28,6 +28,7 @@
#include "clang/CodeGen/CGFunctionInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
@@ -43,9 +44,10 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/MatrixBuilder.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/X86TargetParser.h"
#include <sstream>
using namespace clang;
@@ -74,6 +76,8 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
break;
}
}
+ if (CGF.CGM.stopAutoInit())
+ return;
CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
}
@@ -215,8 +219,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
if (Invert)
- Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
- llvm::ConstantInt::get(IntType, -1));
+ Result =
+ CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
+ llvm::ConstantInt::getAllOnesValue(IntType));
Result = EmitFromInt(CGF, Result, T, ValueType);
return RValue::get(Result);
}
@@ -411,6 +416,25 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
}
}
+// Emit an intrinsic where all operands are of the same type as the result.
+// Depending on mode, this may be a constrained floating-point intrinsic.
+static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
+ unsigned IntrinsicID,
+ unsigned ConstrainedIntrinsicID,
+ llvm::Type *Ty,
+ ArrayRef<Value *> Args) {
+ Function *F;
+ if (CGF.Builder.getIsFPConstrained())
+ F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
+ else
+ F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
+
+ if (CGF.Builder.getIsFPConstrained())
+ return CGF.Builder.CreateConstrainedFPCall(F, Args);
+ else
+ return CGF.Builder.CreateCall(F, Args);
+}
+
// Emit a simple mangled intrinsic that has 1 argument and a return type
// matching the argument type.
static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
@@ -566,7 +590,9 @@ static WidthAndSignedness
getIntegerWidthAndSignedness(const clang::ASTContext &context,
const clang::QualType Type) {
assert(Type->isIntegerType() && "Given type is not an integer.");
- unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
+ unsigned Width = Type->isBooleanType() ? 1
+ : Type->isExtIntType() ? context.getIntWidth(Type)
+ : context.getTypeInfo(Type).Width;
bool Signed = Type->isSignedIntegerType();
return {Width, Signed};
}
@@ -1251,6 +1277,8 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
FuncionTy, nullptr, SC_PrivateExtern, false, false);
+ // Avoid generating debug location info for the function.
+ FD->setImplicit();
StartFunction(FD, ReturnTy, Fn, FI, Args);
@@ -1320,14 +1348,42 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
} else if (const Expr *TheExpr = Item.getExpr()) {
ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
- // Check if this is a retainable type.
- if (TheExpr->getType()->isObjCRetainableType()) {
+ // If a temporary object that requires destruction after the full
+ // expression is passed, push a lifetime-extended cleanup to extend its
+ // lifetime to the end of the enclosing block scope.
+ auto LifetimeExtendObject = [&](const Expr *E) {
+ E = E->IgnoreParenCasts();
+ // Extend lifetimes of objects returned by function calls and message
+ // sends.
+
+ // FIXME: We should do this in other cases in which temporaries are
+ // created including arguments of non-ARC types (e.g., C++
+ // temporaries).
+ if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
+ return true;
+ return false;
+ };
+
+ if (TheExpr->getType()->isObjCRetainableType() &&
+ getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
"Only scalar can be a ObjC retainable type");
- // Check if the object is constant, if not, save it in
- // RetainableOperands.
- if (!isa<Constant>(ArgVal))
- RetainableOperands.push_back(ArgVal);
+ if (!isa<Constant>(ArgVal)) {
+ CleanupKind Cleanup = getARCCleanupKind();
+ QualType Ty = TheExpr->getType();
+ Address Alloca = Address::invalid();
+ Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
+ ArgVal = EmitARCRetain(Ty, ArgVal);
+ Builder.CreateStore(ArgVal, Addr);
+ pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
+ CodeGenFunction::destroyARCStrongPrecise,
+ Cleanup & EHCleanup);
+
+ // Push a clang.arc.use call to ensure ARC optimizer knows that the
+ // argument has to be alive.
+ if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+ pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
+ }
}
} else {
ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
@@ -1349,18 +1405,6 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
Layout, BufAddr.getAlignment());
EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
-
- // Push a clang.arc.use cleanup for each object in RetainableOperands. The
- // cleanup will cause the use to appear after the final log call, keeping
- // the object valid while it’s held in the log buffer. Note that if there’s
- // a release cleanup on the object, it will already be active; since
- // cleanups are emitted in reverse order, the use will occur before the
- // object is released.
- if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
- CGM.getCodeGenOpts().OptimizationLevel != 0)
- for (llvm::Value *Object : RetainableOperands)
- pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
-
return RValue::get(BufAddr.getPointer());
}
@@ -1521,8 +1565,7 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
// We check whether we are in a recursive type
if (CanonicalType->isRecordType()) {
- Value *TmpRes =
- dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
+ TmpRes = dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
Res = CGF.Builder.CreateAdd(TmpRes, Res);
continue;
}
@@ -1629,7 +1672,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_ceilf:
case Builtin::BI__builtin_ceilf16:
case Builtin::BI__builtin_ceill:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::ceil,
Intrinsic::experimental_constrained_ceil));
@@ -1650,7 +1693,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_cosf:
case Builtin::BI__builtin_cosf16:
case Builtin::BI__builtin_cosl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::cos,
Intrinsic::experimental_constrained_cos));
@@ -1661,7 +1704,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_expf:
case Builtin::BI__builtin_expf16:
case Builtin::BI__builtin_expl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::exp,
Intrinsic::experimental_constrained_exp));
@@ -1672,7 +1715,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_exp2f:
case Builtin::BI__builtin_exp2f16:
case Builtin::BI__builtin_exp2l:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::exp2,
Intrinsic::experimental_constrained_exp2));
@@ -1693,7 +1736,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_floorf:
case Builtin::BI__builtin_floorf16:
case Builtin::BI__builtin_floorl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::floor,
Intrinsic::experimental_constrained_floor));
@@ -1704,7 +1747,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmaf:
case Builtin::BI__builtin_fmaf16:
case Builtin::BI__builtin_fmal:
- return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::fma,
Intrinsic::experimental_constrained_fma));
@@ -1715,7 +1758,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmaxf:
case Builtin::BI__builtin_fmaxf16:
case Builtin::BI__builtin_fmaxl:
- return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::maxnum,
Intrinsic::experimental_constrained_maxnum));
@@ -1726,7 +1769,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fminf:
case Builtin::BI__builtin_fminf16:
case Builtin::BI__builtin_fminl:
- return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::minnum,
Intrinsic::experimental_constrained_minnum));
@@ -1751,7 +1794,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_logf:
case Builtin::BI__builtin_logf16:
case Builtin::BI__builtin_logl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::log,
Intrinsic::experimental_constrained_log));
@@ -1762,7 +1805,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_log10f:
case Builtin::BI__builtin_log10f16:
case Builtin::BI__builtin_log10l:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::log10,
Intrinsic::experimental_constrained_log10));
@@ -1773,7 +1816,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_log2f:
case Builtin::BI__builtin_log2f16:
case Builtin::BI__builtin_log2l:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::log2,
Intrinsic::experimental_constrained_log2));
@@ -1783,7 +1826,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_nearbyint:
case Builtin::BI__builtin_nearbyintf:
case Builtin::BI__builtin_nearbyintl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::nearbyint,
Intrinsic::experimental_constrained_nearbyint));
@@ -1794,7 +1837,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_powf:
case Builtin::BI__builtin_powf16:
case Builtin::BI__builtin_powl:
- return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::pow,
Intrinsic::experimental_constrained_pow));
@@ -1805,7 +1848,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_rintf:
case Builtin::BI__builtin_rintf16:
case Builtin::BI__builtin_rintl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::rint,
Intrinsic::experimental_constrained_rint));
@@ -1816,7 +1859,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_roundf:
case Builtin::BI__builtin_roundf16:
case Builtin::BI__builtin_roundl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::round,
Intrinsic::experimental_constrained_round));
@@ -1827,7 +1870,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sinf:
case Builtin::BI__builtin_sinf16:
case Builtin::BI__builtin_sinl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::sin,
Intrinsic::experimental_constrained_sin));
@@ -1838,7 +1881,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sqrtf:
case Builtin::BI__builtin_sqrtf16:
case Builtin::BI__builtin_sqrtl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::sqrt,
Intrinsic::experimental_constrained_sqrt));
@@ -1849,7 +1892,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_truncf:
case Builtin::BI__builtin_truncf16:
case Builtin::BI__builtin_truncl:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::trunc,
Intrinsic::experimental_constrained_trunc));
@@ -2152,6 +2195,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
return RValue::get(Result);
}
+ case Builtin::BI__builtin_expect_with_probability: {
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgType = ArgValue->getType();
+
+ Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
+ llvm::APFloat Probability(0.0);
+ const Expr *ProbArg = E->getArg(2);
+ bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
+ assert(EvalSucceed && "probability should be able to evaluate as float");
+ (void)EvalSucceed;
+ bool LoseInfo = false;
+ Probability.convert(llvm::APFloat::IEEEdouble(),
+ llvm::RoundingMode::Dynamic, &LoseInfo);
+ llvm::Type *Ty = ConvertType(ProbArg->getType());
+ Constant *Confidence = ConstantFP::get(Ty, Probability);
+ // Don't generate llvm.expect.with.probability on -O0 as the backend
+ // won't use it for anything.
+ // Note, we still IRGen ExpectedValue because it could have side-effects.
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ return RValue::get(ArgValue);
+
+ Function *FnExpect =
+ CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
+ Value *Result = Builder.CreateCall(
+ FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_assume_aligned: {
const Expr *Ptr = E->getArg(0);
Value *PtrValue = EmitScalarExpr(Ptr);
@@ -2164,7 +2234,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
llvm::Value::MaximumAlignment);
- EmitAlignmentAssumption(PtrValue, Ptr,
+ emitAlignmentAssumption(PtrValue, Ptr,
/*The expr loc is sufficient.*/ SourceLocation(),
AlignmentCI, OffsetValue);
return RValue::get(PtrValue);
@@ -2336,6 +2406,53 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
}
+ case Builtin::BI__builtin_matrix_transpose: {
+ const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
+ Value *MatValue = EmitScalarExpr(E->getArg(0));
+ MatrixBuilder<CGBuilderTy> MB(Builder);
+ Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
+ MatrixTy->getNumColumns());
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_matrix_column_major_load: {
+ MatrixBuilder<CGBuilderTy> MB(Builder);
+ // Emit everything that isn't dependent on the first parameter type
+ Value *Stride = EmitScalarExpr(E->getArg(3));
+ const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
+ auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
+ assert(PtrTy && "arg0 must be of pointer type");
+ bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
+
+ Address Src = EmitPointerWithAlignment(E->getArg(0));
+ EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
+ E->getArg(0)->getExprLoc(), FD, 0);
+ Value *Result = MB.CreateColumnMajorLoad(
+ Src.getPointer(), Align(Src.getAlignment().getQuantity()), Stride,
+ IsVolatile, ResultTy->getNumRows(), ResultTy->getNumColumns(),
+ "matrix");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_matrix_column_major_store: {
+ MatrixBuilder<CGBuilderTy> MB(Builder);
+ Value *Matrix = EmitScalarExpr(E->getArg(0));
+ Address Dst = EmitPointerWithAlignment(E->getArg(1));
+ Value *Stride = EmitScalarExpr(E->getArg(2));
+
+ const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
+ auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
+ assert(PtrTy && "arg1 must be of pointer type");
+ bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
+
+ EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(),
+ E->getArg(1)->getExprLoc(), FD, 0);
+ Value *Result = MB.CreateColumnMajorStore(
+ Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
+ Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
+ return RValue::get(Result);
+ }
+
case Builtin::BIfinite:
case Builtin::BI__finite:
case Builtin::BIfinitef:
@@ -2518,6 +2635,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Dest.getPointer());
}
+ case Builtin::BI__builtin_memcpy_inline: {
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ Address Src = EmitPointerWithAlignment(E->getArg(1));
+ uint64_t Size =
+ E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
+ EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
+ E->getArg(0)->getExprLoc(), FD, 0);
+ EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
+ E->getArg(1)->getExprLoc(), FD, 1);
+ Builder.CreateMemCpyInline(Dest, Src, Size);
+ return RValue::get(nullptr);
+ }
+
case Builtin::BI__builtin_char_memchr:
BuiltinID = Builtin::BI__builtin_memchr;
break;
@@ -3222,6 +3352,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
ConvertType(E->getType())));
}
+ case Builtin::BI__warn_memset_zero_len:
+ return RValue::getIgnored();
case Builtin::BI__annotation: {
// Re-encode each wide string to UTF8 and make an MDString.
SmallVector<Metadata *, 1> Strings;
@@ -3928,7 +4060,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
auto *V =
Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
Builder.CreateAlignedStore(
- V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
+ V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
return std::tie(ElemPtr, TmpSize, TmpPtr);
};
@@ -3947,19 +4079,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
- std::vector<llvm::Value *> Args = {
- Queue, Flags, Range,
- Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
- ElemPtr};
- std::vector<llvm::Type *> ArgTys = {
+ llvm::Value *const Args[] = {Queue, Flags,
+ Range, Kernel,
+ Block, ConstantInt::get(IntTy, NumArgs - 4),
+ ElemPtr};
+ llvm::Type *const ArgTys[] = {
QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
GenericVoidPtrTy, IntTy, ElemPtr->getType()};
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- auto Call =
- RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
+ auto Call = RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
if (TmpSize)
EmitLifetimeEnd(TmpSize, TmpPtr);
return Call;
@@ -4115,6 +4245,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIprintf:
if (getTarget().getTriple().isNVPTX())
return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
+ if (getTarget().getTriple().getArch() == Triple::amdgcn &&
+ getLangOpts().HIP)
+ return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
break;
case Builtin::BI__builtin_canonicalize:
case Builtin::BI__builtin_canonicalizef:
@@ -4427,35 +4560,41 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
NeonTypeFlags TypeFlags,
- bool HasLegalHalfType=true,
- bool V1Ty=false) {
+ bool HasLegalHalfType = true,
+ bool V1Ty = false,
+ bool AllowBFloatArgsAndRet = true) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
case NeonTypeFlags::Int8:
case NeonTypeFlags::Poly8:
- return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
case NeonTypeFlags::Int16:
case NeonTypeFlags::Poly16:
- return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
+ case NeonTypeFlags::BFloat16:
+ if (AllowBFloatArgsAndRet)
+ return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
+ else
+ return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Float16:
if (HasLegalHalfType)
- return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
else
- return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Int32:
- return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Int64:
case NeonTypeFlags::Poly64:
- return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
case NeonTypeFlags::Poly128:
// FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
// There is a lot of i128 and f128 API missing.
// so we use v16i8 to represent poly128 and get pattern matched.
- return llvm::VectorType::get(CGF->Int8Ty, 16);
+ return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
case NeonTypeFlags::Float32:
- return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Float64:
- return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
}
llvm_unreachable("Unknown vector element type!");
}
@@ -4465,34 +4604,46 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
int IsQuad = IntTypeFlags.isQuad();
switch (IntTypeFlags.getEltType()) {
case NeonTypeFlags::Int16:
- return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
case NeonTypeFlags::Int32:
- return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
case NeonTypeFlags::Int64:
- return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
+ return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
default:
llvm_unreachable("Type can't be converted to floating-point!");
}
}
-Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
- unsigned nElts = V->getType()->getVectorNumElements();
- Value* SV = llvm::ConstantVector::getSplat(nElts, C);
+Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
+ const ElementCount &Count) {
+ Value *SV = llvm::ConstantVector::getSplat(Count, C);
return Builder.CreateShuffleVector(V, V, SV, "lane");
}
+Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
+ ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
+ return EmitNeonSplat(V, C, EC);
+}
+
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
const char *name,
unsigned shift, bool rightshift) {
unsigned j = 0;
for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
- ai != ae; ++ai, ++j)
+ ai != ae; ++ai, ++j) {
+ if (F->isConstrainedFPIntrinsic())
+ if (ai->getType()->isMetadataTy())
+ continue;
if (shift > 0 && shift == j)
Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
else
Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
+ }
- return Builder.CreateCall(F, Ops, name);
+ if (F->isConstrainedFPIntrinsic())
+ return Builder.CreateConstrainedFPCall(F, Ops, name);
+ else
+ return Builder.CreateCall(F, Ops, name);
}
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
@@ -4556,17 +4707,17 @@ enum {
};
namespace {
-struct NeonIntrinsicInfo {
+struct ARMVectorIntrinsicInfo {
const char *NameHint;
unsigned BuiltinID;
unsigned LLVMIntrinsic;
unsigned AltLLVMIntrinsic;
- unsigned TypeModifier;
+ uint64_t TypeModifier;
bool operator<(unsigned RHSBuiltinID) const {
return BuiltinID < RHSBuiltinID;
}
- bool operator<(const NeonIntrinsicInfo &TE) const {
+ bool operator<(const ARMVectorIntrinsicInfo &TE) const {
return BuiltinID < TE.BuiltinID;
}
};
@@ -4584,7 +4735,12 @@ struct NeonIntrinsicInfo {
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
TypeModifier }
-static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+ NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0),
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP1(vabs_v, arm_neon_vabs, 0),
@@ -4594,6 +4750,11 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vaeseq_v, arm_neon_aese, 0),
NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
+ NEONMAP1(vbfdot_v, arm_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0),
NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
@@ -4654,6 +4815,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
@@ -4752,6 +4914,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0),
NEONMAP0(vmovl_v),
NEONMAP0(vmovn_v),
NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
@@ -4859,13 +5022,21 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vtrnq_v),
NEONMAP0(vtst_v),
NEONMAP0(vtstq_v),
+ NEONMAP1(vusdot_v, arm_neon_usdot, 0),
+ NEONMAP1(vusdotq_v, arm_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0),
NEONMAP0(vuzp_v),
NEONMAP0(vuzpq_v),
NEONMAP0(vzip_v),
NEONMAP0(vzipq_v)
};
-static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+ NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0),
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
NEONMAP1(vabs_v, aarch64_neon_abs, 0),
NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
NEONMAP0(vaddhn_v),
@@ -4873,6 +5044,11 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
+ NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0),
NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
@@ -4916,6 +5092,7 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP0(vcvtq_f16_v),
NEONMAP0(vcvtq_f32_v),
+ NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0),
NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
@@ -4950,6 +5127,7 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0),
NEONMAP0(vmovl_v),
NEONMAP0(vmovn_v),
NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
@@ -4964,14 +5142,22 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
+ NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
+ NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
+ NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+ NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+ NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
@@ -5024,9 +5210,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vsubhn_v),
NEONMAP0(vtst_v),
NEONMAP0(vtstq_v),
+ NEONMAP1(vusdot_v, aarch64_neon_usdot, 0),
+ NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0),
};
-static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
+static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
@@ -5059,6 +5248,7 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
@@ -5256,24 +5446,42 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
#undef NEONMAP1
#undef NEONMAP2
+#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ { \
+ #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier \
+ }
+
+#define SVEMAP2(NameBase, TypeModifier) \
+ { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
+static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
+#define GET_SVE_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sve_builtin_cg.inc"
+#undef GET_SVE_LLVM_INTRINSIC_MAP
+};
+
+#undef SVEMAP1
+#undef SVEMAP2
+
static bool NEONSIMDIntrinsicsProvenSorted = false;
static bool AArch64SIMDIntrinsicsProvenSorted = false;
static bool AArch64SISDIntrinsicsProvenSorted = false;
+static bool AArch64SVEIntrinsicsProvenSorted = false;
-
-static const NeonIntrinsicInfo *
-findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
- unsigned BuiltinID, bool &MapProvenSorted) {
+static const ARMVectorIntrinsicInfo *
+findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
+ unsigned BuiltinID, bool &MapProvenSorted) {
#ifndef NDEBUG
if (!MapProvenSorted) {
- assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
+ assert(llvm::is_sorted(IntrinsicMap));
MapProvenSorted = true;
}
#endif
- const NeonIntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID);
+ const ARMVectorIntrinsicInfo *Builtin =
+ llvm::lower_bound(IntrinsicMap, BuiltinID);
if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
return Builtin;
@@ -5296,7 +5504,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
if (Modifier & AddRetType) {
llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
if (Modifier & VectorizeRetType)
- Ty = llvm::VectorType::get(
+ Ty = llvm::FixedVectorType::get(
Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
Tys.push_back(Ty);
@@ -5305,7 +5513,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
// Arguments.
if (Modifier & VectorizeArgTypes) {
int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
- ArgType = llvm::VectorType::get(ArgType, Elts);
+ ArgType = llvm::FixedVectorType::get(ArgType, Elts);
}
if (Modifier & (Add1ArgType | Add2ArgTypes))
@@ -5320,10 +5528,9 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
return CGM.getIntrinsic(IntrinsicID, Tys);
}
-static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
- const NeonIntrinsicInfo &SISDInfo,
- SmallVectorImpl<Value *> &Ops,
- const CallExpr *E) {
+static Value *EmitCommonNeonSISDBuiltinExpr(
+ CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
+ SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
unsigned BuiltinID = SISDInfo.BuiltinID;
unsigned int Int = SISDInfo.LLVMIntrinsic;
unsigned Modifier = SISDInfo.TypeModifier;
@@ -5368,8 +5575,8 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
// The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
// it before inserting.
- Ops[j] =
- CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
+ Ops[j] = CGF.Builder.CreateTruncOrBitCast(
+ Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
Ops[j] =
CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
}
@@ -5399,8 +5606,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
const bool HasLegalHalfType = getTarget().hasLegalHalfType();
+ const bool AllowBFloatArgsAndRet =
+ getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
- llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
+ llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType, false,
+ AllowBFloatArgsAndRet);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -5415,6 +5625,19 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
switch (BuiltinID) {
default: break;
+ case NEON::BI__builtin_neon_splat_lane_v:
+ case NEON::BI__builtin_neon_splat_laneq_v:
+ case NEON::BI__builtin_neon_splatq_lane_v:
+ case NEON::BI__builtin_neon_splatq_laneq_v: {
+ auto NumElements = VTy->getElementCount();
+ if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
+ NumElements = NumElements * 2;
+ if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
+ NumElements = NumElements / 2;
+
+ Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
+ return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
+ }
case NEON::BI__builtin_neon_vpadd_v:
case NEON::BI__builtin_neon_vpaddq_v:
// We don't allow fp/int overloading of intrinsics.
@@ -5467,7 +5690,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ty = HalfTy;
break;
}
- llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
+ auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
llvm::Type *Tys[] = { VTy, VecFlt };
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
return EmitNeonCall(F, Ops, NameHint);
@@ -5614,7 +5837,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vext_v:
case NEON::BI__builtin_neon_vextq_v: {
int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(i+CV);
@@ -5624,13 +5847,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vfma_v:
case NEON::BI__builtin_neon_vfmaq_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
+ return emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
+ {Ops[1], Ops[2], Ops[0]});
}
case NEON::BI__builtin_neon_vld1_v:
case NEON::BI__builtin_neon_vld1q_v: {
@@ -5644,7 +5868,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vld1q_x3_v:
case NEON::BI__builtin_neon_vld1_x4_v:
case NEON::BI__builtin_neon_vld1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
llvm::Type *Tys[2] = { VTy, PTy };
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
@@ -5726,8 +5950,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy =
llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
- llvm::Type *NarrowTy =
- llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
+ auto *NarrowTy =
+ llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
}
@@ -5736,8 +5960,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
- llvm::Type *NarrowTy =
- llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
+ auto *NarrowTy =
+ llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
}
@@ -5749,6 +5973,29 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops.resize(2);
return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
}
+ case NEON::BI__builtin_neon_vqdmulhq_lane_v:
+ case NEON::BI__builtin_neon_vqdmulh_lane_v:
+ case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
+ case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
+ auto *RTy = cast<llvm::VectorType>(Ty);
+ if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
+ BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
+ RTy = llvm::FixedVectorType::get(RTy->getElementType(),
+ RTy->getNumElements() * 2);
+ llvm::Type *Tys[2] = {
+ RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
+ /*isQuad*/ false))};
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
+ }
+ case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
+ case NEON::BI__builtin_neon_vqdmulh_laneq_v:
+ case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
+ case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
+ llvm::Type *Tys[2] = {
+ Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
+ /*isQuad*/ true))};
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
+ }
case NEON::BI__builtin_neon_vqshl_n_v:
case NEON::BI__builtin_neon_vqshlq_n_v:
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
@@ -5765,7 +6012,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
case NEON::BI__builtin_neon_vrndi_v:
case NEON::BI__builtin_neon_vrndiq_v:
- Int = Intrinsic::nearbyint;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_nearbyint
+ : Intrinsic::nearbyint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
case NEON::BI__builtin_neon_vrshr_n_v:
case NEON::BI__builtin_neon_vrshrq_n_v:
@@ -5823,7 +6072,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vst1q_x3_v:
case NEON::BI__builtin_neon_vst1_x4_v:
case NEON::BI__builtin_neon_vst1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+ llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
// TODO: Currently in AArch32 mode the pointer operand comes first, whereas
// in AArch64 it comes last. We may want to stick to one or another.
if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
@@ -5860,7 +6109,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back(i+vi);
Indices.push_back(i+e+vi);
@@ -5888,7 +6137,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(2*i+vi);
@@ -5906,7 +6155,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back((i + vi*e) >> 1);
Indices.push_back(((i + vi*e) >> 1)+e);
@@ -5919,40 +6168,91 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vdot_v:
case NEON::BI__builtin_neon_vdotq_v: {
- llvm::Type *InputTy =
- llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ auto *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
llvm::Type *Tys[2] = { Ty, InputTy };
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
}
case NEON::BI__builtin_neon_vfmlal_low_v:
case NEON::BI__builtin_neon_vfmlalq_low_v: {
- llvm::Type *InputTy =
- llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ auto *InputTy =
+ llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
}
case NEON::BI__builtin_neon_vfmlsl_low_v:
case NEON::BI__builtin_neon_vfmlslq_low_v: {
- llvm::Type *InputTy =
- llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ auto *InputTy =
+ llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
}
case NEON::BI__builtin_neon_vfmlal_high_v:
case NEON::BI__builtin_neon_vfmlalq_high_v: {
- llvm::Type *InputTy =
- llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ auto *InputTy =
+ llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
}
case NEON::BI__builtin_neon_vfmlsl_high_v:
case NEON::BI__builtin_neon_vfmlslq_high_v: {
- llvm::Type *InputTy =
- llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ auto *InputTy =
+ llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
}
+ case NEON::BI__builtin_neon_vmmlaq_v: {
+ auto *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla");
+ }
+ case NEON::BI__builtin_neon_vusmmlaq_v: {
+ auto *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
+ }
+ case NEON::BI__builtin_neon_vusdot_v:
+ case NEON::BI__builtin_neon_vusdotq_v: {
+ auto *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
+ }
+ case NEON::BI__builtin_neon_vbfdot_v:
+ case NEON::BI__builtin_neon_vbfdotq_v: {
+ llvm::Type *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
+ }
+ case NEON::BI__builtin_neon_vbfmmlaq_v: {
+ llvm::Type *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmmla");
+ }
+ case NEON::BI__builtin_neon_vbfmlalbq_v: {
+ llvm::Type *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalb");
+ }
+ case NEON::BI__builtin_neon_vbfmlaltq_v: {
+ llvm::Type *InputTy =
+ llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalt");
+ }
+ case NEON::BI__builtin_neon___a32_vcvt_bf16_v: {
+ llvm::Type *Tys[1] = { Ty };
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtfp2bf");
+ }
+
}
assert(Int && "Expected valid intrinsic number");
@@ -5997,7 +6297,7 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
TblOps.push_back(ExtOp);
// Build a vector containing sequential number like (0, 1, 2, ..., 15)
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
Indices.push_back(2*i);
@@ -6061,6 +6361,12 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
llvm::ConstantInt::get(Int32Ty, Value));
}
+enum SpecialRegisterAccessKind {
+ NormalRead,
+ VolatileRead,
+ Write,
+};
+
// Generates the IR for the read/write special register builtin,
// ValueType is the type of the value that is to be written or read,
// RegisterType is the type of the register being written to or read from.
@@ -6068,7 +6374,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
llvm::Type *RegisterType,
llvm::Type *ValueType,
- bool IsRead,
+ SpecialRegisterAccessKind AccessKind,
StringRef SysReg = "") {
// write and register intrinsics only support 32 and 64 bit operations.
assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
@@ -6093,8 +6399,12 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
&& "Can't fit 64-bit value in 32-bit register");
- if (IsRead) {
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+ if (AccessKind != Write) {
+ assert(AccessKind == NormalRead || AccessKind == VolatileRead);
+ llvm::Function *F = CGM.getIntrinsic(
+ AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
+ : llvm::Intrinsic::read_register,
+ Types);
llvm::Value *Call = Builder.CreateCall(F, Metadata);
if (MixedTypes)
@@ -6132,21 +6442,27 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
default: break;
case NEON::BI__builtin_neon_vget_lane_i8:
case NEON::BI__builtin_neon_vget_lane_i16:
+ case NEON::BI__builtin_neon_vget_lane_bf16:
case NEON::BI__builtin_neon_vget_lane_i32:
case NEON::BI__builtin_neon_vget_lane_i64:
case NEON::BI__builtin_neon_vget_lane_f32:
case NEON::BI__builtin_neon_vgetq_lane_i8:
case NEON::BI__builtin_neon_vgetq_lane_i16:
+ case NEON::BI__builtin_neon_vgetq_lane_bf16:
case NEON::BI__builtin_neon_vgetq_lane_i32:
case NEON::BI__builtin_neon_vgetq_lane_i64:
case NEON::BI__builtin_neon_vgetq_lane_f32:
+ case NEON::BI__builtin_neon_vduph_lane_bf16:
+ case NEON::BI__builtin_neon_vduph_laneq_bf16:
case NEON::BI__builtin_neon_vset_lane_i8:
case NEON::BI__builtin_neon_vset_lane_i16:
+ case NEON::BI__builtin_neon_vset_lane_bf16:
case NEON::BI__builtin_neon_vset_lane_i32:
case NEON::BI__builtin_neon_vset_lane_i64:
case NEON::BI__builtin_neon_vset_lane_f32:
case NEON::BI__builtin_neon_vsetq_lane_i8:
case NEON::BI__builtin_neon_vsetq_lane_i16:
+ case NEON::BI__builtin_neon_vsetq_lane_bf16:
case NEON::BI__builtin_neon_vsetq_lane_i32:
case NEON::BI__builtin_neon_vsetq_lane_i64:
case NEON::BI__builtin_neon_vsetq_lane_f32:
@@ -6154,6 +6470,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
case NEON::BI__builtin_neon_vsha1cq_u32:
case NEON::BI__builtin_neon_vsha1pq_u32:
case NEON::BI__builtin_neon_vsha1mq_u32:
+ case NEON::BI__builtin_neon_vcvth_bf16_f32:
case clang::ARM::BI_MoveToCoprocessor:
case clang::ARM::BI_MoveToCoprocessor2:
return false;
@@ -6466,9 +6783,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
BuiltinID == ARM::BI__builtin_arm_wsr64 ||
BuiltinID == ARM::BI__builtin_arm_wsrp) {
- bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
- BuiltinID == ARM::BI__builtin_arm_rsr64 ||
- BuiltinID == ARM::BI__builtin_arm_rsrp;
+ SpecialRegisterAccessKind AccessKind = Write;
+ if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+ BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+ BuiltinID == ARM::BI__builtin_arm_rsrp)
+ AccessKind = VolatileRead;
bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
BuiltinID == ARM::BI__builtin_arm_wsrp;
@@ -6487,12 +6806,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
ValueType = RegisterType = Int32Ty;
}
- return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+ return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
+ AccessKind);
}
// Deal with MVE builtins
if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
return Result;
+ // Handle CDE builtins
+ if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
+ return Result;
// Find out if any arguments are required to be integer constant
// expressions.
@@ -6589,12 +6912,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vget_lane_i16:
case NEON::BI__builtin_neon_vget_lane_i32:
case NEON::BI__builtin_neon_vget_lane_i64:
+ case NEON::BI__builtin_neon_vget_lane_bf16:
case NEON::BI__builtin_neon_vget_lane_f32:
case NEON::BI__builtin_neon_vgetq_lane_i8:
case NEON::BI__builtin_neon_vgetq_lane_i16:
case NEON::BI__builtin_neon_vgetq_lane_i32:
case NEON::BI__builtin_neon_vgetq_lane_i64:
+ case NEON::BI__builtin_neon_vgetq_lane_bf16:
case NEON::BI__builtin_neon_vgetq_lane_f32:
+ case NEON::BI__builtin_neon_vduph_lane_bf16:
+ case NEON::BI__builtin_neon_vduph_laneq_bf16:
return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
case NEON::BI__builtin_neon_vrndns_f32: {
@@ -6607,11 +6934,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vset_lane_i16:
case NEON::BI__builtin_neon_vset_lane_i32:
case NEON::BI__builtin_neon_vset_lane_i64:
+ case NEON::BI__builtin_neon_vset_lane_bf16:
case NEON::BI__builtin_neon_vset_lane_f32:
case NEON::BI__builtin_neon_vsetq_lane_i8:
case NEON::BI__builtin_neon_vsetq_lane_i16:
case NEON::BI__builtin_neon_vsetq_lane_i32:
case NEON::BI__builtin_neon_vsetq_lane_i64:
+ case NEON::BI__builtin_neon_vsetq_lane_bf16:
case NEON::BI__builtin_neon_vsetq_lane_f32:
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
@@ -6628,6 +6957,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
"vsha1h");
+ case NEON::BI__builtin_neon_vcvth_bf16_f32: {
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
+ "vcvtbfp2bf");
+ }
+
// The ARM _MoveToCoprocessor builtins put the input register value as
// the first argument, but the LLVM intrinsic expects it as the third one.
case ARM::BI_MoveToCoprocessor:
@@ -6807,7 +7141,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
bool rightShift = false;
llvm::VectorType *VTy = GetNeonType(this, Type,
- getTarget().hasLegalHalfType());
+ getTarget().hasLegalHalfType(),
+ false,
+ getTarget().hasBFloat16Type());
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -6815,7 +7151,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// Many NEON builtins have identical semantics and uses in ARM and
// AArch64. Emit these in a single function.
auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
- const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
if (Builtin)
return EmitCommonNeonBuiltinExpr(
@@ -6831,19 +7167,18 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
if (VTy->getElementType()->isIntegerTy(64)) {
// Extract the other lane.
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
+ int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
// Load the value as a one-element vector.
- Ty = llvm::VectorType::get(VTy->getElementType(), 1);
+ Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
llvm::Type *Tys[] = {Ty, Int8PtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
Value *Align = getAlignmentValue32(PtrOp0);
Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
// Combine them.
- uint32_t Indices[] = {1 - Lane, Lane};
- SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
- return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
+ int Indices[] = {1 - Lane, Lane};
+ return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
}
LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vld1_lane_v: {
@@ -6966,8 +7301,9 @@ static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
// equal to the lane size. In LLVM IR, an LShr with that parameter would be
// undefined behavior, but in MVE it's legal, so we must convert it to code
// that is not undefined in IR.
- unsigned LaneBits =
- V->getType()->getVectorElementType()->getPrimitiveSizeInBits();
+ unsigned LaneBits = cast<llvm::VectorType>(V->getType())
+ ->getElementType()
+ ->getPrimitiveSizeInBits();
if (Shift == LaneBits) {
// An unsigned shift of the full lane size always generates zero, so we can
// simply emit a zero vector. A signed shift of the full lane size does the
@@ -6988,6 +7324,86 @@ static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
return Builder.CreateVectorSplat(Elements, V);
}
+static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
+ CodeGenFunction *CGF,
+ llvm::Value *V,
+ llvm::Type *DestType) {
+ // Convert one MVE vector type into another by reinterpreting its in-register
+ // format.
+ //
+ // Little-endian, this is identical to a bitcast (which reinterprets the
+ // memory format). But big-endian, they're not necessarily the same, because
+ // the register and memory formats map to each other differently depending on
+ // the lane size.
+ //
+ // We generate a bitcast whenever we can (if we're little-endian, or if the
+ // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
+ // that performs the different kind of reinterpretation.
+ if (CGF->getTarget().isBigEndian() &&
+ V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
+ return Builder.CreateCall(
+ CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
+ {DestType, V->getType()}),
+ V);
+ } else {
+ return Builder.CreateBitCast(V, DestType);
+ }
+}
+
+static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
+ // Make a shufflevector that extracts every other element of a vector (evens
+ // or odds, as desired).
+ SmallVector<int, 16> Indices;
+ unsigned InputElements =
+ cast<llvm::VectorType>(V->getType())->getNumElements();
+ for (unsigned i = 0; i < InputElements; i += 2)
+ Indices.push_back(i + Odd);
+ return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
+ Indices);
+}
+
+static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
+ llvm::Value *V1) {
+ // Make a shufflevector that interleaves two vectors element by element.
+ assert(V0->getType() == V1->getType() && "Can't zip different vector types");
+ SmallVector<int, 16> Indices;
+ unsigned InputElements =
+ cast<llvm::VectorType>(V0->getType())->getNumElements();
+ for (unsigned i = 0; i < InputElements; i++) {
+ Indices.push_back(i);
+ Indices.push_back(i + InputElements);
+ }
+ return Builder.CreateShuffleVector(V0, V1, Indices);
+}
+
+template<unsigned HighBit, unsigned OtherBits>
+static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
+ // MVE-specific helper function to make a vector splat of a constant such as
+ // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
+ llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
+ unsigned LaneBits = T->getPrimitiveSizeInBits();
+ uint32_t Value = HighBit << (LaneBits - 1);
+ if (OtherBits)
+ Value |= (1UL << (LaneBits - 1)) - 1;
+ llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
+ return ARMMVEVectorSplat(Builder, Lane);
+}
+
+static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
+ llvm::Value *V,
+ unsigned ReverseWidth) {
+ // MVE-specific helper function which reverses the elements of a
+ // vector within every (ReverseWidth)-bit collection of lanes.
+ SmallVector<int, 16> Indices;
+ unsigned LaneSize = V->getType()->getScalarSizeInBits();
+ unsigned Elements = 128 / LaneSize;
+ unsigned Mask = ReverseWidth / LaneSize - 1;
+ for (unsigned i = 0; i < Elements; i++)
+ Indices.push_back(i ^ Mask);
+ return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
+ Indices);
+}
+
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue,
@@ -7089,6 +7505,17 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
llvm_unreachable("unknown custom codegen type.");
}
+Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E,
+ ReturnValueSlot ReturnValue,
+ llvm::Triple::ArchType Arch) {
+ switch (BuiltinID) {
+ default:
+ return nullptr;
+#include "clang/Basic/arm_cde_builtin_cg.inc"
+ }
+}
+
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
const CallExpr *E,
SmallVectorImpl<Value *> &Ops,
@@ -7238,7 +7665,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
}
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
- llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
+ auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
Op = Builder.CreateBitCast(Op, Int16Ty);
Value *V = UndefValue::get(VTy);
llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
@@ -7246,9 +7673,840 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
return Op;
}
+/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
+/// access builtin. Only required if it can't be inferred from the base pointer
+/// operand.
+llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) {
+ switch (TypeFlags.getMemEltType()) {
+ case SVETypeFlags::MemEltTyDefault:
+ return getEltType(TypeFlags);
+ case SVETypeFlags::MemEltTyInt8:
+ return Builder.getInt8Ty();
+ case SVETypeFlags::MemEltTyInt16:
+ return Builder.getInt16Ty();
+ case SVETypeFlags::MemEltTyInt32:
+ return Builder.getInt32Ty();
+ case SVETypeFlags::MemEltTyInt64:
+ return Builder.getInt64Ty();
+ }
+ llvm_unreachable("Unknown MemEltType");
+}
+
+llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) {
+ switch (TypeFlags.getEltType()) {
+ default:
+ llvm_unreachable("Invalid SVETypeFlag!");
+
+ case SVETypeFlags::EltTyInt8:
+ return Builder.getInt8Ty();
+ case SVETypeFlags::EltTyInt16:
+ return Builder.getInt16Ty();
+ case SVETypeFlags::EltTyInt32:
+ return Builder.getInt32Ty();
+ case SVETypeFlags::EltTyInt64:
+ return Builder.getInt64Ty();
+
+ case SVETypeFlags::EltTyFloat16:
+ return Builder.getHalfTy();
+ case SVETypeFlags::EltTyFloat32:
+ return Builder.getFloatTy();
+ case SVETypeFlags::EltTyFloat64:
+ return Builder.getDoubleTy();
+
+ case SVETypeFlags::EltTyBFloat16:
+ return Builder.getBFloatTy();
+
+ case SVETypeFlags::EltTyBool8:
+ case SVETypeFlags::EltTyBool16:
+ case SVETypeFlags::EltTyBool32:
+ case SVETypeFlags::EltTyBool64:
+ return Builder.getInt1Ty();
+ }
+}
+
+// Return the llvm predicate vector type corresponding to the specified element
+// TypeFlags.
+llvm::ScalableVectorType *
+CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) {
+ switch (TypeFlags.getEltType()) {
+ default: llvm_unreachable("Unhandled SVETypeFlag!");
+
+ case SVETypeFlags::EltTyInt8:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ case SVETypeFlags::EltTyInt16:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
+ case SVETypeFlags::EltTyInt32:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
+ case SVETypeFlags::EltTyInt64:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
+
+ case SVETypeFlags::EltTyBFloat16:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
+ case SVETypeFlags::EltTyFloat16:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
+ case SVETypeFlags::EltTyFloat32:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
+ case SVETypeFlags::EltTyFloat64:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
+
+ case SVETypeFlags::EltTyBool8:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ case SVETypeFlags::EltTyBool16:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
+ case SVETypeFlags::EltTyBool32:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
+ case SVETypeFlags::EltTyBool64:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
+ }
+}
+
+// Return the llvm vector type corresponding to the specified element TypeFlags.
+llvm::ScalableVectorType *
+CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
+ switch (TypeFlags.getEltType()) {
+ default:
+ llvm_unreachable("Invalid SVETypeFlag!");
+
+ case SVETypeFlags::EltTyInt8:
+ return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
+ case SVETypeFlags::EltTyInt16:
+ return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
+ case SVETypeFlags::EltTyInt32:
+ return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
+ case SVETypeFlags::EltTyInt64:
+ return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
+
+ case SVETypeFlags::EltTyFloat16:
+ return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
+ case SVETypeFlags::EltTyBFloat16:
+ return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
+ case SVETypeFlags::EltTyFloat32:
+ return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
+ case SVETypeFlags::EltTyFloat64:
+ return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
+
+ case SVETypeFlags::EltTyBool8:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ case SVETypeFlags::EltTyBool16:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
+ case SVETypeFlags::EltTyBool32:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
+ case SVETypeFlags::EltTyBool64:
+ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
+ }
+}
+
+llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) {
+ Function *Ptrue =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
+ return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
+}
+
+constexpr unsigned SVEBitsPerBlock = 128;
+
+static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
+ unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
+ return llvm::ScalableVectorType::get(EltTy, NumElts);
+}
+
+// Reinterpret the input predicate so that it can be used to correctly isolate
+// the elements of the specified datatype.
+Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
+ llvm::ScalableVectorType *VTy) {
+ auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
+ if (Pred->getType() == RTy)
+ return Pred;
+
+ unsigned IntID;
+ llvm::Type *IntrinsicTy;
+ switch (VTy->getMinNumElements()) {
+ default:
+ llvm_unreachable("unsupported element count!");
+ case 2:
+ case 4:
+ case 8:
+ IntID = Intrinsic::aarch64_sve_convert_from_svbool;
+ IntrinsicTy = RTy;
+ break;
+ case 16:
+ IntID = Intrinsic::aarch64_sve_convert_to_svbool;
+ IntrinsicTy = Pred->getType();
+ break;
+ }
+
+ Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
+ Value *C = Builder.CreateCall(F, Pred);
+ assert(C->getType() == RTy && "Unexpected return type!");
+ return C;
+}
+
+Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ auto *ResultTy = getSVEType(TypeFlags);
+ auto *OverloadedTy =
+ llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
+
+ // At the ACLE level there's only one predicate type, svbool_t, which is
+ // mapped to <n x 16 x i1>. However, this might be incompatible with the
+ // actual type being loaded. For example, when loading doubles (i64) the
+ // predicated should be <n x 2 x i1> instead. At the IR level the type of
+ // the predicate and the data being loaded must match. Cast accordingly.
+ Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
+
+ Function *F = nullptr;
+ if (Ops[1]->getType()->isVectorTy())
+ // This is the "vector base, scalar offset" case. In order to uniquely
+ // map this built-in to an LLVM IR intrinsic, we need both the return type
+ // and the type of the vector base.
+ F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
+ else
+ // This is the "scalar base, vector offset case". The type of the offset
+ // is encoded in the name of the intrinsic. We only need to specify the
+ // return type in order to uniquely map this built-in to an LLVM IR
+ // intrinsic.
+ F = CGM.getIntrinsic(IntID, OverloadedTy);
+
+ // Pass 0 when the offset is missing. This can only be applied when using
+ // the "vector base" addressing mode for which ACLE allows no offset. The
+ // corresponding LLVM IR always requires an offset.
+ if (Ops.size() == 2) {
+ assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
+ Ops.push_back(ConstantInt::get(Int64Ty, 0));
+ }
+
+ // For "vector base, scalar index" scale the index so that it becomes a
+ // scalar offset.
+ if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
+ unsigned BytesPerElt =
+ OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
+ Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+ Ops[2] = Builder.CreateMul(Ops[2], Scale);
+ }
+
+ Value *Call = Builder.CreateCall(F, Ops);
+
+ // The following sext/zext is only needed when ResultTy != OverloadedTy. In
+ // other cases it's folded into a nop.
+ return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
+ : Builder.CreateSExt(Call, ResultTy);
+}
+
+Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ auto *SrcDataTy = getSVEType(TypeFlags);
+ auto *OverloadedTy =
+ llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
+
+ // In ACLE the source data is passed in the last argument, whereas in LLVM IR
+ // it's the first argument. Move it accordingly.
+ Ops.insert(Ops.begin(), Ops.pop_back_val());
+
+ Function *F = nullptr;
+ if (Ops[2]->getType()->isVectorTy())
+ // This is the "vector base, scalar offset" case. In order to uniquely
+ // map this built-in to an LLVM IR intrinsic, we need both the return type
+ // and the type of the vector base.
+ F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
+ else
+ // This is the "scalar base, vector offset case". The type of the offset
+ // is encoded in the name of the intrinsic. We only need to specify the
+ // return type in order to uniquely map this built-in to an LLVM IR
+ // intrinsic.
+ F = CGM.getIntrinsic(IntID, OverloadedTy);
+
+ // Pass 0 when the offset is missing. This can only be applied when using
+ // the "vector base" addressing mode for which ACLE allows no offset. The
+ // corresponding LLVM IR always requires an offset.
+ if (Ops.size() == 3) {
+ assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
+ Ops.push_back(ConstantInt::get(Int64Ty, 0));
+ }
+
+ // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
+ // folded into a nop.
+ Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
+
+ // At the ACLE level there's only one predicate type, svbool_t, which is
+ // mapped to <n x 16 x i1>. However, this might be incompatible with the
+ // actual type being stored. For example, when storing doubles (i64) the
+ // predicated should be <n x 2 x i1> instead. At the IR level the type of
+ // the predicate and the data being stored must match. Cast accordingly.
+ Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
+
+ // For "vector base, scalar index" scale the index so that it becomes a
+ // scalar offset.
+ if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
+ unsigned BytesPerElt =
+ OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
+ Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+ Ops[3] = Builder.CreateMul(Ops[3], Scale);
+ }
+
+ return Builder.CreateCall(F, Ops);
+}
+
+Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ // The gather prefetches are overloaded on the vector input - this can either
+ // be the vector of base addresses or vector of offsets.
+ auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
+ if (!OverloadedTy)
+ OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
+
+ // Cast the predicate from svbool_t to the right number of elements.
+ Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
+
+ // vector + imm addressing modes
+ if (Ops[1]->getType()->isVectorTy()) {
+ if (Ops.size() == 3) {
+ // Pass 0 for 'vector+imm' when the index is omitted.
+ Ops.push_back(ConstantInt::get(Int64Ty, 0));
+
+ // The sv_prfop is the last operand in the builtin and IR intrinsic.
+ std::swap(Ops[2], Ops[3]);
+ } else {
+ // Index needs to be passed as scaled offset.
+ llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
+ unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
+ Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+ Ops[2] = Builder.CreateMul(Ops[2], Scale);
+ }
+ }
+
+ Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
+ return Builder.CreateCall(F, Ops);
+}
+
+Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value*> &Ops,
+ unsigned IntID) {
+ llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
+ auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
+ auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
+
+ unsigned N;
+ switch (IntID) {
+ case Intrinsic::aarch64_sve_ld2:
+ N = 2;
+ break;
+ case Intrinsic::aarch64_sve_ld3:
+ N = 3;
+ break;
+ case Intrinsic::aarch64_sve_ld4:
+ N = 4;
+ break;
+ default:
+ llvm_unreachable("unknown intrinsic!");
+ }
+ auto RetTy = llvm::VectorType::get(VTy->getElementType(),
+ VTy->getElementCount() * N);
+
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
+ Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
+ Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
+ BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
+ BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
+
+ Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()});
+ return Builder.CreateCall(F, { Predicate, BasePtr });
+}
+
+Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value*> &Ops,
+ unsigned IntID) {
+ llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
+ auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
+ auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
+
+ unsigned N;
+ switch (IntID) {
+ case Intrinsic::aarch64_sve_st2:
+ N = 2;
+ break;
+ case Intrinsic::aarch64_sve_st3:
+ N = 3;
+ break;
+ case Intrinsic::aarch64_sve_st4:
+ N = 4;
+ break;
+ default:
+ llvm_unreachable("unknown intrinsic!");
+ }
+ auto TupleTy =
+ llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N);
+
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
+ Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
+ Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0);
+ Value *Val = Ops.back();
+ BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
+ BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
+
+ // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
+ // need to break up the tuple vector.
+ SmallVector<llvm::Value*, 5> Operands;
+ Function *FExtr =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
+ for (unsigned I = 0; I < N; ++I)
+ Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)}));
+ Operands.append({Predicate, BasePtr});
+
+ Function *F = CGM.getIntrinsic(IntID, { VTy });
+ return Builder.CreateCall(F, Operands);
+}
+
+// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
+// svpmullt_pair intrinsics, with the exception that their results are bitcast
+// to a wider type.
+Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned BuiltinID) {
+ // Splat scalar operand to vector (intrinsics with _n infix)
+ if (TypeFlags.hasSplatOperand()) {
+ unsigned OpNo = TypeFlags.getSplatOperand();
+ Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
+ }
+
+ // The pair-wise function has a narrower overloaded type.
+ Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
+ Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
+
+ // Now bitcast to the wider result type.
+ llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
+ return EmitSVEReinterpret(Call, Ty);
+}
+
+Value *CodeGenFunction::EmitSVEMovl(SVETypeFlags TypeFlags,
+ ArrayRef<Value *> Ops, unsigned BuiltinID) {
+ llvm::Type *OverloadedTy = getSVEType(TypeFlags);
+ Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
+ return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
+}
+
+Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned BuiltinID) {
+ auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
+ auto *VectorTy = getSVEVectorForElementType(MemEltTy);
+ auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
+
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
+ Value *BasePtr = Ops[1];
+
+ // Implement the index operand if not omitted.
+ if (Ops.size() > 3) {
+ BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
+ BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
+ }
+
+ // Prefetch intriniscs always expect an i8*
+ BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
+ Value *PrfOp = Ops.back();
+
+ Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
+ return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
+}
+
+Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
+ llvm::Type *ReturnTy,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned BuiltinID,
+ bool IsZExtReturn) {
+ QualType LangPTy = E->getArg(1)->getType();
+ llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
+ LangPTy->getAs<PointerType>()->getPointeeType());
+
+ // The vector type that is returned may be different from the
+ // eventual type loaded from memory.
+ auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
+ auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
+
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
+ Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
+ Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
+ BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
+
+ BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
+ Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
+ Value *Load = Builder.CreateCall(F, {Predicate, BasePtr});
+
+ return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
+ : Builder.CreateSExt(Load, VectorTy);
+}
+
+Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned BuiltinID) {
+ QualType LangPTy = E->getArg(1)->getType();
+ llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
+ LangPTy->getAs<PointerType>()->getPointeeType());
+
+ // The vector type that is stored may be different from the
+ // eventual type stored to memory.
+ auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
+ auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
+
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
+ Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
+ Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0);
+ BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
+
+ // Last value is always the data
+ llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
+
+ BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
+ Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
+ return Builder.CreateCall(F, {Val, Predicate, BasePtr});
+}
+
+// Limit the usage of scalable llvm IR generated by the ACLE by using the
+// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
+Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
+ auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty);
+ return Builder.CreateCall(F, Scalar);
+}
+
+Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
+ return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
+}
+
+Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
+ // FIXME: For big endian this needs an additional REV, or needs a separate
+ // intrinsic that is code-generated as a no-op, because the LLVM bitcast
+ // instruction is defined as 'bitwise' equivalent from memory point of
+ // view (when storing/reloading), whereas the svreinterpret builtin
+ // implements bitwise equivalent cast from register point of view.
+ // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
+ return Builder.CreateBitCast(Val, Ty);
+}
+
+static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
+ SmallVectorImpl<Value *> &Ops) {
+ auto *SplatZero = Constant::getNullValue(Ty);
+ Ops.insert(Ops.begin(), SplatZero);
+}
+
+static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
+ SmallVectorImpl<Value *> &Ops) {
+ auto *SplatUndef = UndefValue::get(Ty);
+ Ops.insert(Ops.begin(), SplatUndef);
+}
+
+SmallVector<llvm::Type *, 2> CodeGenFunction::getSVEOverloadTypes(
+ SVETypeFlags TypeFlags, llvm::Type *ResultType, ArrayRef<Value *> Ops) {
+ if (TypeFlags.isOverloadNone())
+ return {};
+
+ llvm::Type *DefaultType = getSVEType(TypeFlags);
+
+ if (TypeFlags.isOverloadWhile())
+ return {DefaultType, Ops[1]->getType()};
+
+ if (TypeFlags.isOverloadWhileRW())
+ return {getSVEPredType(TypeFlags), Ops[0]->getType()};
+
+ if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet())
+ return {Ops[0]->getType(), Ops.back()->getType()};
+
+ if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet())
+ return {ResultType, Ops[0]->getType()};
+
+ assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
+ return {DefaultType};
+}
+
+Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ // Find out if any arguments are required to be integer constant expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
+ llvm::Type *Ty = ConvertType(E->getType());
+ if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
+ BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
+ Value *Val = EmitScalarExpr(E->getArg(0));
+ return EmitSVEReinterpret(Val, Ty);
+ }
+
+ llvm::SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ if ((ICEArguments & (1 << i)) == 0)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ else {
+ // If this is required to be a constant, constant fold it so that we know
+ // that the generated intrinsic gets a ConstantInt.
+ llvm::APSInt Result;
+ if (!E->getArg(i)->isIntegerConstantExpr(Result, getContext()))
+ llvm_unreachable("Expected argument to be a constant");
+
+ // Immediates for SVE llvm intrinsics are always 32bit. We can safely
+ // truncate because the immediate has been range checked and no valid
+ // immediate requires more than a handful of bits.
+ Result = Result.extOrTrunc(32);
+ Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+ }
+ }
+
+ auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
+ AArch64SVEIntrinsicsProvenSorted);
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ if (TypeFlags.isLoad())
+ return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
+ TypeFlags.isZExtReturn());
+ else if (TypeFlags.isStore())
+ return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isGatherLoad())
+ return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isScatterStore())
+ return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isPrefetch())
+ return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isGatherPrefetch())
+ return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isStructLoad())
+ return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isStructStore())
+ return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isUndef())
+ return UndefValue::get(Ty);
+ else if (Builtin->LLVMIntrinsic != 0) {
+ if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
+ InsertExplicitZeroOperand(Builder, Ty, Ops);
+
+ if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
+ InsertExplicitUndefOperand(Builder, Ty, Ops);
+
+ // Some ACLE builtins leave out the argument to specify the predicate
+ // pattern, which is expected to be expanded to an SV_ALL pattern.
+ if (TypeFlags.isAppendSVALL())
+ Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
+ if (TypeFlags.isInsertOp1SVALL())
+ Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
+
+ // Predicates must match the main datatype.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
+ if (PredTy->getElementType()->isIntegerTy(1))
+ Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
+
+ // Splat scalar operand to vector (intrinsics with _n infix)
+ if (TypeFlags.hasSplatOperand()) {
+ unsigned OpNo = TypeFlags.getSplatOperand();
+ Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
+ }
+
+ if (TypeFlags.isReverseCompare())
+ std::swap(Ops[1], Ops[2]);
+
+ if (TypeFlags.isReverseUSDOT())
+ std::swap(Ops[1], Ops[2]);
+
+ // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
+ if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
+ llvm::Type *OpndTy = Ops[1]->getType();
+ auto *SplatZero = Constant::getNullValue(OpndTy);
+ Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy);
+ Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero});
+ }
+
+ Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
+ getSVEOverloadTypes(TypeFlags, Ty, Ops));
+ Value *Call = Builder.CreateCall(F, Ops);
+
+ // Predicate results must be converted to svbool_t.
+ if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
+ if (PredTy->getScalarType()->isIntegerTy(1))
+ Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
+
+ return Call;
+ }
+
+ switch (BuiltinID) {
+ default:
+ return nullptr;
+
+ case SVE::BI__builtin_sve_svmov_b_z: {
+ // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ llvm::Type* OverloadedTy = getSVEType(TypeFlags);
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
+ return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
+ }
+
+ case SVE::BI__builtin_sve_svnot_b_z: {
+ // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ llvm::Type* OverloadedTy = getSVEType(TypeFlags);
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
+ return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
+ }
+
+ case SVE::BI__builtin_sve_svmovlb_u16:
+ case SVE::BI__builtin_sve_svmovlb_u32:
+ case SVE::BI__builtin_sve_svmovlb_u64:
+ return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
+
+ case SVE::BI__builtin_sve_svmovlb_s16:
+ case SVE::BI__builtin_sve_svmovlb_s32:
+ case SVE::BI__builtin_sve_svmovlb_s64:
+ return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
+
+ case SVE::BI__builtin_sve_svmovlt_u16:
+ case SVE::BI__builtin_sve_svmovlt_u32:
+ case SVE::BI__builtin_sve_svmovlt_u64:
+ return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
+
+ case SVE::BI__builtin_sve_svmovlt_s16:
+ case SVE::BI__builtin_sve_svmovlt_s32:
+ case SVE::BI__builtin_sve_svmovlt_s64:
+ return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
+
+ case SVE::BI__builtin_sve_svpmullt_u16:
+ case SVE::BI__builtin_sve_svpmullt_u64:
+ case SVE::BI__builtin_sve_svpmullt_n_u16:
+ case SVE::BI__builtin_sve_svpmullt_n_u64:
+ return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
+
+ case SVE::BI__builtin_sve_svpmullb_u16:
+ case SVE::BI__builtin_sve_svpmullb_u64:
+ case SVE::BI__builtin_sve_svpmullb_n_u16:
+ case SVE::BI__builtin_sve_svpmullb_n_u64:
+ return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
+
+ case SVE::BI__builtin_sve_svdup_n_b8:
+ case SVE::BI__builtin_sve_svdup_n_b16:
+ case SVE::BI__builtin_sve_svdup_n_b32:
+ case SVE::BI__builtin_sve_svdup_n_b64: {
+ Value *CmpNE =
+ Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
+ llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
+ Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
+ return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
+ }
+
+ case SVE::BI__builtin_sve_svdupq_n_b8:
+ case SVE::BI__builtin_sve_svdupq_n_b16:
+ case SVE::BI__builtin_sve_svdupq_n_b32:
+ case SVE::BI__builtin_sve_svdupq_n_b64:
+ case SVE::BI__builtin_sve_svdupq_n_u8:
+ case SVE::BI__builtin_sve_svdupq_n_s8:
+ case SVE::BI__builtin_sve_svdupq_n_u64:
+ case SVE::BI__builtin_sve_svdupq_n_f64:
+ case SVE::BI__builtin_sve_svdupq_n_s64:
+ case SVE::BI__builtin_sve_svdupq_n_u16:
+ case SVE::BI__builtin_sve_svdupq_n_f16:
+ case SVE::BI__builtin_sve_svdupq_n_bf16:
+ case SVE::BI__builtin_sve_svdupq_n_s16:
+ case SVE::BI__builtin_sve_svdupq_n_u32:
+ case SVE::BI__builtin_sve_svdupq_n_f32:
+ case SVE::BI__builtin_sve_svdupq_n_s32: {
+ // These builtins are implemented by storing each element to an array and using
+ // ld1rq to materialize a vector.
+ unsigned NumOpnds = Ops.size();
+
+ bool IsBoolTy =
+ cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
+
+ // For svdupq_n_b* the element type of is an integer of type 128/numelts,
+ // so that the compare can use the width that is natural for the expected
+ // number of predicate lanes.
+ llvm::Type *EltTy = Ops[0]->getType();
+ if (IsBoolTy)
+ EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
+
+ Address Alloca = CreateTempAlloca(llvm::ArrayType::get(EltTy, NumOpnds),
+ CharUnits::fromQuantity(16));
+ for (unsigned I = 0; I < NumOpnds; ++I)
+ Builder.CreateDefaultAlignedStore(
+ IsBoolTy ? Builder.CreateZExt(Ops[I], EltTy) : Ops[I],
+ Builder.CreateGEP(Alloca.getPointer(),
+ {Builder.getInt64(0), Builder.getInt64(I)}));
+
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ Value *Pred = EmitSVEAllTruePred(TypeFlags);
+
+ llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_ld1rq, OverloadedTy);
+ Value *Alloca0 = Builder.CreateGEP(
+ Alloca.getPointer(), {Builder.getInt64(0), Builder.getInt64(0)});
+ Value *LD1RQ = Builder.CreateCall(F, {Pred, Alloca0});
+
+ if (!IsBoolTy)
+ return LD1RQ;
+
+ // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
+ F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
+ : Intrinsic::aarch64_sve_cmpne_wide,
+ OverloadedTy);
+ Value *Call =
+ Builder.CreateCall(F, {Pred, LD1RQ, EmitSVEDupX(Builder.getInt64(0))});
+ return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
+ }
+
+ case SVE::BI__builtin_sve_svpfalse_b:
+ return ConstantInt::getFalse(Ty);
+
+ case SVE::BI__builtin_sve_svlen_bf16:
+ case SVE::BI__builtin_sve_svlen_f16:
+ case SVE::BI__builtin_sve_svlen_f32:
+ case SVE::BI__builtin_sve_svlen_f64:
+ case SVE::BI__builtin_sve_svlen_s8:
+ case SVE::BI__builtin_sve_svlen_s16:
+ case SVE::BI__builtin_sve_svlen_s32:
+ case SVE::BI__builtin_sve_svlen_s64:
+ case SVE::BI__builtin_sve_svlen_u8:
+ case SVE::BI__builtin_sve_svlen_u16:
+ case SVE::BI__builtin_sve_svlen_u32:
+ case SVE::BI__builtin_sve_svlen_u64: {
+ SVETypeFlags TF(Builtin->TypeModifier);
+ auto VTy = cast<llvm::VectorType>(getSVEType(TF));
+ auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min);
+
+ Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
+ return Builder.CreateMul(NumEls, Builder.CreateCall(F));
+ }
+
+ case SVE::BI__builtin_sve_svtbl2_u8:
+ case SVE::BI__builtin_sve_svtbl2_s8:
+ case SVE::BI__builtin_sve_svtbl2_u16:
+ case SVE::BI__builtin_sve_svtbl2_s16:
+ case SVE::BI__builtin_sve_svtbl2_u32:
+ case SVE::BI__builtin_sve_svtbl2_s32:
+ case SVE::BI__builtin_sve_svtbl2_u64:
+ case SVE::BI__builtin_sve_svtbl2_s64:
+ case SVE::BI__builtin_sve_svtbl2_f16:
+ case SVE::BI__builtin_sve_svtbl2_bf16:
+ case SVE::BI__builtin_sve_svtbl2_f32:
+ case SVE::BI__builtin_sve_svtbl2_f64: {
+ SVETypeFlags TF(Builtin->TypeModifier);
+ auto VTy = cast<llvm::VectorType>(getSVEType(TF));
+ auto TupleTy = llvm::VectorType::get(VTy->getElementType(),
+ VTy->getElementCount() * 2);
+ Function *FExtr =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
+ Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)});
+ Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)});
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
+ return Builder.CreateCall(F, {V0, V1, Ops[1]});
+ }
+ }
+
+ /// Should not happen
+ return nullptr;
+}
+
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
llvm::Triple::ArchType Arch) {
+ if (BuiltinID >= AArch64::FirstSVEBuiltin &&
+ BuiltinID <= AArch64::LastSVEBuiltin)
+ return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
+
unsigned HintID = static_cast<unsigned>(-1);
switch (BuiltinID) {
default: break;
@@ -7589,9 +8847,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
BuiltinID == AArch64::BI__builtin_arm_wsrp) {
- bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
- BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
- BuiltinID == AArch64::BI__builtin_arm_rsrp;
+ SpecialRegisterAccessKind AccessKind = Write;
+ if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+ BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+ BuiltinID == AArch64::BI__builtin_arm_rsrp)
+ AccessKind = VolatileRead;
bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
BuiltinID == AArch64::BI__builtin_arm_wsrp;
@@ -7609,7 +8869,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
ValueType = Int32Ty;
}
- return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+ return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
+ AccessKind);
}
if (BuiltinID == AArch64::BI_ReadStatusReg ||
@@ -7665,7 +8926,27 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
assert(Error == ASTContext::GE_None && "Should not codegen an error");
llvm::SmallVector<Value*, 4> Ops;
+ Address PtrOp0 = Address::invalid();
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+ if (i == 0) {
+ switch (BuiltinID) {
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v:
+ case NEON::BI__builtin_neon_vld1_lane_v:
+ case NEON::BI__builtin_neon_vld1q_lane_v:
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ case NEON::BI__builtin_neon_vst1_lane_v:
+ case NEON::BI__builtin_neon_vst1q_lane_v:
+ // Get the alignment for the argument in addition to the value;
+ // we'll use it later.
+ PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
+ Ops.push_back(PtrOp0.getPointer());
+ continue;
+ }
+ }
if ((ICEArguments & (1 << i)) == 0) {
Ops.push_back(EmitScalarExpr(E->getArg(i)));
} else {
@@ -7680,7 +8961,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
- const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
if (Builtin) {
@@ -7896,7 +9177,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
}
case NEON::BI__builtin_neon_vpaddd_s64: {
- llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
+ auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
Value *Vec = EmitScalarExpr(E->getArg(0));
// The vector is v2f64, so make sure it's bitcast to that.
Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
@@ -7908,8 +9189,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateAdd(Op0, Op1, "vpaddd");
}
case NEON::BI__builtin_neon_vpaddd_f64: {
- llvm::Type *Ty =
- llvm::VectorType::get(DoubleTy, 2);
+ auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
Value *Vec = EmitScalarExpr(E->getArg(0));
// The vector is v2f64, so make sure it's bitcast to that.
Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
@@ -7921,8 +9201,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
}
case NEON::BI__builtin_neon_vpadds_f32: {
- llvm::Type *Ty =
- llvm::VectorType::get(FloatTy, 2);
+ auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
Value *Vec = EmitScalarExpr(E->getArg(0));
// The vector is v2f32, so make sure it's bitcast to that.
Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
@@ -8085,97 +9364,107 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vset_lane_i16:
case NEON::BI__builtin_neon_vset_lane_i32:
case NEON::BI__builtin_neon_vset_lane_i64:
+ case NEON::BI__builtin_neon_vset_lane_bf16:
case NEON::BI__builtin_neon_vset_lane_f32:
case NEON::BI__builtin_neon_vsetq_lane_i8:
case NEON::BI__builtin_neon_vsetq_lane_i16:
case NEON::BI__builtin_neon_vsetq_lane_i32:
case NEON::BI__builtin_neon_vsetq_lane_i64:
+ case NEON::BI__builtin_neon_vsetq_lane_bf16:
case NEON::BI__builtin_neon_vsetq_lane_f32:
Ops.push_back(EmitScalarExpr(E->getArg(2)));
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
case NEON::BI__builtin_neon_vset_lane_f64:
// The vector type needs a cast for the v1f64 variant.
- Ops[1] = Builder.CreateBitCast(Ops[1],
- llvm::VectorType::get(DoubleTy, 1));
+ Ops[1] =
+ Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
Ops.push_back(EmitScalarExpr(E->getArg(2)));
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
case NEON::BI__builtin_neon_vsetq_lane_f64:
// The vector type needs a cast for the v2f64 variant.
- Ops[1] = Builder.CreateBitCast(Ops[1],
- llvm::VectorType::get(DoubleTy, 2));
+ Ops[1] =
+ Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
Ops.push_back(EmitScalarExpr(E->getArg(2)));
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
case NEON::BI__builtin_neon_vget_lane_i8:
case NEON::BI__builtin_neon_vdupb_lane_i8:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vgetq_lane_i8:
case NEON::BI__builtin_neon_vdupb_laneq_i8:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vget_lane_i16:
case NEON::BI__builtin_neon_vduph_lane_i16:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vgetq_lane_i16:
case NEON::BI__builtin_neon_vduph_laneq_i16:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vget_lane_i32:
case NEON::BI__builtin_neon_vdups_lane_i32:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vdups_lane_f32:
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::VectorType::get(FloatTy, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vdups_lane");
case NEON::BI__builtin_neon_vgetq_lane_i32:
case NEON::BI__builtin_neon_vdups_laneq_i32:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vget_lane_i64:
case NEON::BI__builtin_neon_vdupd_lane_i64:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vdupd_lane_f64:
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::VectorType::get(DoubleTy, 1));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vdupd_lane");
case NEON::BI__builtin_neon_vgetq_lane_i64:
case NEON::BI__builtin_neon_vdupd_laneq_i64:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vget_lane_f32:
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::VectorType::get(FloatTy, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vget_lane_f64:
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::VectorType::get(DoubleTy, 1));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vgetq_lane_f32:
case NEON::BI__builtin_neon_vdups_laneq_f32:
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::VectorType::get(FloatTy, 4));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vgetq_lane_f64:
case NEON::BI__builtin_neon_vdupd_laneq_f64:
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::VectorType::get(DoubleTy, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vaddh_f16:
@@ -8190,18 +9479,20 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vdivh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(1)));
return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
- case NEON::BI__builtin_neon_vfmah_f16: {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+ case NEON::BI__builtin_neon_vfmah_f16:
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return Builder.CreateCall(F,
- {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
- }
+ return emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
+ {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
case NEON::BI__builtin_neon_vfmsh_f16: {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+ // FIXME: This should be an fneg instruction:
Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
+
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+ return emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
+ {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
}
case NEON::BI__builtin_neon_vaddd_s64:
case NEON::BI__builtin_neon_vaddd_u64:
@@ -8214,7 +9505,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
SmallVector<Value *, 2> ProductOps;
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
- llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
+ auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
ProductOps, "vqdmlXl");
Constant *CI = ConstantInt::get(SizeTy, 0);
@@ -8311,7 +9602,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
SmallVector<Value *, 2> ProductOps;
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
ProductOps.push_back(vectorWrapScalar16(Ops[2]));
- llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
+ auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
ProductOps, "vqdmlXl");
Constant *CI = ConstantInt::get(SizeTy, 0);
@@ -8358,10 +9649,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
: Intrinsic::aarch64_neon_sqsub;
return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
}
+ case NEON::BI__builtin_neon_vget_lane_bf16:
+ case NEON::BI__builtin_neon_vduph_lane_bf16:
case NEON::BI__builtin_neon_vduph_lane_f16: {
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
}
+ case NEON::BI__builtin_neon_vgetq_lane_bf16:
+ case NEON::BI__builtin_neon_vduph_laneq_bf16:
case NEON::BI__builtin_neon_vduph_laneq_f16: {
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
@@ -8520,8 +9815,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// Not all intrinsics handled by the common case work for AArch64 yet, so only
// defer to common code if it's been added to our special map.
- Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
- AArch64SIMDIntrinsicsProvenSorted);
+ Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
+ AArch64SIMDIntrinsicsProvenSorted);
if (Builtin)
return EmitCommonNeonBuiltinExpr(
@@ -8559,16 +9854,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[2] = Addend;
// Now adjust things to handle the lane access.
- llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
- llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
- VTy;
+ auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
+ ? llvm::FixedVectorType::get(VTy->getElementType(),
+ VTy->getNumElements() / 2)
+ : VTy;
llvm::Constant *cst = cast<Constant>(Ops[3]);
- Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
+ Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
Ops.pop_back();
- Int = Intrinsic::fma;
+ Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
+ : Intrinsic::fma;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
}
case NEON::BI__builtin_neon_vfma_laneq_v: {
@@ -8581,31 +9878,35 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
NeonTypeFlags(NeonTypeFlags::Float64, false, true));
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
- Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
+ Value *Result;
+ Result = emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
+ DoubleTy, {Ops[1], Ops[2], Ops[0]});
return Builder.CreateBitCast(Result, Ty);
}
- Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
- VTy->getNumElements() * 2);
+ auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
+ VTy->getNumElements() * 2);
Ops[2] = Builder.CreateBitCast(Ops[2], STy);
- Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
+ Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
cast<ConstantInt>(Ops[3]));
Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
- return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
+ return emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
+ {Ops[2], Ops[1], Ops[0]});
}
case NEON::BI__builtin_neon_vfmaq_laneq_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
- return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
+ return emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
+ {Ops[2], Ops[1], Ops[0]});
}
case NEON::BI__builtin_neon_vfmah_lane_f16:
case NEON::BI__builtin_neon_vfmas_lane_f32:
@@ -8615,9 +9916,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vfmad_laneq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(3)));
llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
- Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
+ return emitCallMaybeConstrainedFPBuiltin(
+ *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
+ {Ops[1], Ops[2], Ops[0]});
}
case NEON::BI__builtin_neon_vmull_v:
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
@@ -8657,8 +9959,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
unsigned ArgElts = VTy->getNumElements();
llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
unsigned BitWidth = EltTy->getBitWidth();
- llvm::Type *ArgTy = llvm::VectorType::get(
- llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
+ auto *ArgTy = llvm::FixedVectorType::get(
+ llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
llvm::Type* Tys[2] = { VTy, ArgTy };
Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
SmallVector<llvm::Value*, 1> TmpOps;
@@ -8726,27 +10028,37 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
case NEON::BI__builtin_neon_vrndah_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::round;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_round
+ : Intrinsic::round;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
}
case NEON::BI__builtin_neon_vrnda_v:
case NEON::BI__builtin_neon_vrndaq_v: {
- Int = Intrinsic::round;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_round
+ : Intrinsic::round;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
}
case NEON::BI__builtin_neon_vrndih_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::nearbyint;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_nearbyint
+ : Intrinsic::nearbyint;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
}
case NEON::BI__builtin_neon_vrndmh_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::floor;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_floor
+ : Intrinsic::floor;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
}
case NEON::BI__builtin_neon_vrndm_v:
case NEON::BI__builtin_neon_vrndmq_v: {
- Int = Intrinsic::floor;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_floor
+ : Intrinsic::floor;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
}
case NEON::BI__builtin_neon_vrndnh_f16: {
@@ -8766,32 +10078,44 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vrndph_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::ceil;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_ceil
+ : Intrinsic::ceil;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
}
case NEON::BI__builtin_neon_vrndp_v:
case NEON::BI__builtin_neon_vrndpq_v: {
- Int = Intrinsic::ceil;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_ceil
+ : Intrinsic::ceil;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
}
case NEON::BI__builtin_neon_vrndxh_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::rint;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_rint
+ : Intrinsic::rint;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
}
case NEON::BI__builtin_neon_vrndx_v:
case NEON::BI__builtin_neon_vrndxq_v: {
- Int = Intrinsic::rint;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_rint
+ : Intrinsic::rint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
}
case NEON::BI__builtin_neon_vrndh_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::trunc;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_trunc
+ : Intrinsic::trunc;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
}
case NEON::BI__builtin_neon_vrnd_v:
case NEON::BI__builtin_neon_vrndq_v: {
- Int = Intrinsic::trunc;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_trunc
+ : Intrinsic::trunc;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
}
case NEON::BI__builtin_neon_vcvt_f64_v:
@@ -8942,12 +10266,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vsqrth_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::sqrt;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_sqrt
+ : Intrinsic::sqrt;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
}
case NEON::BI__builtin_neon_vsqrt_v:
case NEON::BI__builtin_neon_vsqrtq_v: {
- Int = Intrinsic::sqrt;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_sqrt
+ : Intrinsic::sqrt;
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
}
@@ -8963,7 +10291,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddv_s8: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
@@ -8975,7 +10303,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddv_s16: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
@@ -8987,7 +10315,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddvq_s8: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
@@ -8999,7 +10327,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddvq_s16: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
@@ -9008,7 +10336,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxv_u8: {
Int = Intrinsic::aarch64_neon_umaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9017,7 +10345,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxv_u16: {
Int = Intrinsic::aarch64_neon_umaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9026,7 +10354,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxvq_u8: {
Int = Intrinsic::aarch64_neon_umaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9035,7 +10363,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxvq_u16: {
Int = Intrinsic::aarch64_neon_umaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9044,7 +10372,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxv_s8: {
Int = Intrinsic::aarch64_neon_smaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9053,7 +10381,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxv_s16: {
Int = Intrinsic::aarch64_neon_smaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9062,7 +10390,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxvq_s8: {
Int = Intrinsic::aarch64_neon_smaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9071,7 +10399,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxvq_s16: {
Int = Intrinsic::aarch64_neon_smaxv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9080,7 +10408,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxv_f16: {
Int = Intrinsic::aarch64_neon_fmaxv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 4);
+ VTy = llvm::FixedVectorType::get(HalfTy, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9089,7 +10417,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxvq_f16: {
Int = Intrinsic::aarch64_neon_fmaxv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 8);
+ VTy = llvm::FixedVectorType::get(HalfTy, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
@@ -9098,7 +10426,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminv_u8: {
Int = Intrinsic::aarch64_neon_uminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9107,7 +10435,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminv_u16: {
Int = Intrinsic::aarch64_neon_uminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9116,7 +10444,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminvq_u8: {
Int = Intrinsic::aarch64_neon_uminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9125,7 +10453,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminvq_u16: {
Int = Intrinsic::aarch64_neon_uminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9134,7 +10462,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminv_s8: {
Int = Intrinsic::aarch64_neon_sminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9143,7 +10471,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminv_s16: {
Int = Intrinsic::aarch64_neon_sminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9152,7 +10480,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminvq_s8: {
Int = Intrinsic::aarch64_neon_sminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9161,7 +10489,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminvq_s16: {
Int = Intrinsic::aarch64_neon_sminv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9170,7 +10498,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminv_f16: {
Int = Intrinsic::aarch64_neon_fminv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 4);
+ VTy = llvm::FixedVectorType::get(HalfTy, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9179,7 +10507,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminvq_f16: {
Int = Intrinsic::aarch64_neon_fminv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 8);
+ VTy = llvm::FixedVectorType::get(HalfTy, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
@@ -9188,7 +10516,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxnmv_f16: {
Int = Intrinsic::aarch64_neon_fmaxnmv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 4);
+ VTy = llvm::FixedVectorType::get(HalfTy, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
@@ -9197,7 +10525,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vmaxnmvq_f16: {
Int = Intrinsic::aarch64_neon_fmaxnmv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 8);
+ VTy = llvm::FixedVectorType::get(HalfTy, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
@@ -9206,7 +10534,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminnmv_f16: {
Int = Intrinsic::aarch64_neon_fminnmv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 4);
+ VTy = llvm::FixedVectorType::get(HalfTy, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
@@ -9215,7 +10543,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vminnmvq_f16: {
Int = Intrinsic::aarch64_neon_fminnmv;
Ty = HalfTy;
- VTy = llvm::VectorType::get(HalfTy, 8);
+ VTy = llvm::FixedVectorType::get(HalfTy, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
@@ -9229,7 +10557,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlv_u8: {
Int = Intrinsic::aarch64_neon_uaddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9238,7 +10566,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlv_u16: {
Int = Intrinsic::aarch64_neon_uaddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9246,7 +10574,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlvq_u8: {
Int = Intrinsic::aarch64_neon_uaddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9255,7 +10583,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlvq_u16: {
Int = Intrinsic::aarch64_neon_uaddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9263,7 +10591,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlv_s8: {
Int = Intrinsic::aarch64_neon_saddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9272,7 +10600,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlv_s16: {
Int = Intrinsic::aarch64_neon_saddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 4);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 4);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9280,7 +10608,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlvq_s8: {
Int = Intrinsic::aarch64_neon_saddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int8Ty, 16);
+ VTy = llvm::FixedVectorType::get(Int8Ty, 16);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9289,7 +10617,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddlvq_s16: {
Int = Intrinsic::aarch64_neon_saddlv;
Ty = Int32Ty;
- VTy = llvm::VectorType::get(Int16Ty, 8);
+ VTy = llvm::FixedVectorType::get(Int16Ty, 8);
llvm::Type *Tys[2] = { Ty, VTy };
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -9325,24 +10653,20 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld1_v:
case NEON::BI__builtin_neon_vld1q_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
- auto Alignment = CharUnits::fromQuantity(
- BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
- return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
+ return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
}
case NEON::BI__builtin_neon_vst1_v:
case NEON::BI__builtin_neon_vst1q_v:
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
+ return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
case NEON::BI__builtin_neon_vld1_lane_v:
case NEON::BI__builtin_neon_vld1q_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- auto Alignment = CharUnits::fromQuantity(
- BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
- Ops[0] =
- Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
+ Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
+ PtrOp0.getAlignment());
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
}
case NEON::BI__builtin_neon_vld1_dup_v:
@@ -9350,10 +10674,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *V = UndefValue::get(Ty);
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- auto Alignment = CharUnits::fromQuantity(
- BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
- Ops[0] =
- Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
+ Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
+ PtrOp0.getAlignment());
llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
return EmitNeonSplat(Ops[0], CI);
@@ -9363,8 +10685,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- return Builder.CreateDefaultAlignedStore(Ops[1],
- Builder.CreateBitCast(Ops[0], Ty));
+ return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
+ PtrOp0.getAlignment());
case NEON::BI__builtin_neon_vld2_v:
case NEON::BI__builtin_neon_vld2q_v: {
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
@@ -9538,7 +10860,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back(i+vi);
Indices.push_back(i+e+vi);
@@ -9557,7 +10879,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(2*i+vi);
@@ -9575,7 +10897,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<uint32_t, 16> Indices;
+ SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back((i + vi*e) >> 1);
Indices.push_back(((i + vi*e) >> 1)+e);
@@ -9633,33 +10955,103 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
- assert(BuiltinID == BPF::BI__builtin_preserve_field_info &&
- "unexpected ARM builtin");
+ assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
+ BuiltinID == BPF::BI__builtin_btf_type_id) &&
+ "unexpected BPF builtin");
- const Expr *Arg = E->getArg(0);
- bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
+ switch (BuiltinID) {
+ default:
+ llvm_unreachable("Unexpected BPF builtin");
+ case BPF::BI__builtin_preserve_field_info: {
+ const Expr *Arg = E->getArg(0);
+ bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
- if (!getDebugInfo()) {
- CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g");
- return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
- : EmitLValue(Arg).getPointer(*this);
- }
+ if (!getDebugInfo()) {
+ CGM.Error(E->getExprLoc(),
+ "using __builtin_preserve_field_info() without -g");
+ return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
+ : EmitLValue(Arg).getPointer(*this);
+ }
- // Enable underlying preserve_*_access_index() generation.
- bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
- IsInPreservedAIRegion = true;
- Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
- : EmitLValue(Arg).getPointer(*this);
- IsInPreservedAIRegion = OldIsInPreservedAIRegion;
+ // Enable underlying preserve_*_access_index() generation.
+ bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
+ IsInPreservedAIRegion = true;
+ Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
+ : EmitLValue(Arg).getPointer(*this);
+ IsInPreservedAIRegion = OldIsInPreservedAIRegion;
+
+ ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
+
+ // Built the IR for the preserve_field_info intrinsic.
+ llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
+ {FieldAddr->getType()});
+ return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
+ }
+ case BPF::BI__builtin_btf_type_id: {
+ Value *FieldVal = nullptr;
+
+ // The LValue cannot be converted Value in order to be used as the function
+ // parameter. If it is a structure, it is the "alloca" result of the LValue
+ // (a pointer) is used in the parameter. If it is a simple type,
+ // the value will be loaded from its corresponding "alloca" and used as
+ // the parameter. In our case, let us just get a pointer of the LValue
+ // since we do not really use the parameter. The purpose of parameter
+ // is to prevent the generated IR llvm.bpf.btf.type.id intrinsic call,
+ // which carries metadata, from being changed.
+ bool IsLValue = E->getArg(0)->isLValue();
+ if (IsLValue)
+ FieldVal = EmitLValue(E->getArg(0)).getPointer(*this);
+ else
+ FieldVal = EmitScalarExpr(E->getArg(0));
- ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
+ if (!getDebugInfo()) {
+ CGM.Error(E->getExprLoc(), "using __builtin_btf_type_id() without -g");
+ return nullptr;
+ }
- // Built the IR for the preserve_field_info intrinsic.
- llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
- {FieldAddr->getType()});
- return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
+ // Generate debuginfo type for the first argument.
+ llvm::DIType *DbgInfo =
+ getDebugInfo()->getOrCreateStandaloneType(E->getArg(0)->getType(),
+ E->getArg(0)->getExprLoc());
+
+ ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
+
+ // Built the IR for the btf_type_id intrinsic.
+ //
+ // In the above, we converted LValue argument to a pointer to LValue.
+ // For example, the following
+ // int v;
+ // C1: __builtin_btf_type_id(v, flag);
+ // will be converted to
+ // L1: llvm.bpf.btf.type.id(&v, flag)
+ // This makes it hard to differentiate from
+ // C2: __builtin_btf_type_id(&v, flag);
+ // to
+ // L2: llvm.bpf.btf.type.id(&v, flag)
+ //
+ // If both C1 and C2 are present in the code, the llvm may later
+ // on do CSE on L1 and L2, which will result in incorrect tagged types.
+ //
+ // The C1->L1 transformation only happens if the argument of
+ // __builtin_btf_type_id() is a LValue. So Let us put whether
+ // the argument is an LValue or not into generated IR. This should
+ // prevent potential CSE from causing debuginfo type loss.
+ //
+ // The generated IR intrinsics will hence look like
+ // L1: llvm.bpf.btf.type.id(&v, 1, flag) !di_type_for_{v};
+ // L2: llvm.bpf.btf.type.id(&v, 0, flag) !di_type_for_{&v};
+ Constant *CV = ConstantInt::get(IntTy, IsLValue);
+ llvm::Function *FnBtfTypeId = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id,
+ {FieldVal->getType(), CV->getType()});
+ CallInst *Fn = Builder.CreateCall(FnBtfTypeId, {FieldVal, CV, FlagValue});
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+ return Fn;
+ }
+ }
}
llvm::Value *CodeGenFunction::
@@ -9679,8 +11071,8 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
}
// Otherwise, insertelement the values to build the vector.
- Value *Result =
- llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
+ Value *Result = llvm::UndefValue::get(
+ llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
@@ -9692,14 +11084,15 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
unsigned NumElts) {
- llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
- cast<IntegerType>(Mask->getType())->getBitWidth());
+ auto *MaskTy = llvm::FixedVectorType::get(
+ CGF.Builder.getInt1Ty(),
+ cast<IntegerType>(Mask->getType())->getBitWidth());
Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
// If we have less than 8 elements, then the starting mask was an i8 and
// we need to extract down to the right number of elements.
if (NumElts < 8) {
- uint32_t Indices[4];
+ int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
@@ -9709,42 +11102,40 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
return MaskVec;
}
-static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops,
- unsigned Align) {
+static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ Align Alignment) {
// Cast the pointer to right type.
Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Ops[1]->getType()));
- Value *MaskVec = getMaskVecValue(CGF, Ops[2],
- Ops[1]->getType()->getVectorNumElements());
+ Value *MaskVec = getMaskVecValue(
+ CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements());
- return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
+ return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
}
-static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops, unsigned Align) {
+static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ Align Alignment) {
// Cast the pointer to right type.
Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Ops[1]->getType()));
- Value *MaskVec = getMaskVecValue(CGF, Ops[2],
- Ops[1]->getType()->getVectorNumElements());
+ Value *MaskVec = getMaskVecValue(
+ CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements());
- return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
+ return CGF.Builder.CreateMaskedLoad(Ptr, Alignment, MaskVec, Ops[1]);
}
static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
ArrayRef<Value *> Ops) {
- llvm::Type *ResultTy = Ops[1]->getType();
- llvm::Type *PtrTy = ResultTy->getVectorElementType();
+ auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
+ llvm::Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getMaskVecValue(CGF, Ops[2],
- ResultTy->getVectorNumElements());
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
ResultTy);
@@ -9754,10 +11145,9 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
ArrayRef<Value *> Ops,
bool IsCompress) {
- llvm::Type *ResultTy = Ops[1]->getType();
+ auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
- Value *MaskVec = getMaskVecValue(CGF, Ops[2],
- ResultTy->getVectorNumElements());
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
: Intrinsic::x86_avx512_mask_expand;
@@ -9767,15 +11157,14 @@ static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
static Value *EmitX86CompressStore(CodeGenFunction &CGF,
ArrayRef<Value *> Ops) {
- llvm::Type *ResultTy = Ops[1]->getType();
- llvm::Type *PtrTy = ResultTy->getVectorElementType();
+ auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
+ llvm::Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getMaskVecValue(CGF, Ops[2],
- ResultTy->getVectorNumElements());
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
ResultTy);
@@ -9804,7 +11193,7 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
- unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumElts = cast<llvm::VectorType>(Ty)->getNumElements();
Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
}
@@ -9862,7 +11251,8 @@ static Value *EmitX86Select(CodeGenFunction &CGF,
if (C->isAllOnesValue())
return Op0;
- Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
+ Mask = getMaskVecValue(
+ CGF, Mask, cast<llvm::VectorType>(Op0->getType())->getNumElements());
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
}
@@ -9874,9 +11264,8 @@ static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
if (C->isAllOnesValue())
return Op0;
- llvm::VectorType *MaskTy =
- llvm::VectorType::get(CGF.Builder.getInt1Ty(),
- Mask->getType()->getIntegerBitWidth());
+ auto *MaskTy = llvm::FixedVectorType::get(
+ CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
@@ -9891,7 +11280,7 @@ static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
}
if (NumElts < 8) {
- uint32_t Indices[8];
+ int Indices[8];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i)
@@ -9909,15 +11298,16 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
bool Signed, ArrayRef<Value *> Ops) {
assert((Ops.size() == 2 || Ops.size() == 4) &&
"Unexpected number of arguments");
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
Value *Cmp;
if (CC == 3) {
Cmp = Constant::getNullValue(
- llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+ llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
} else if (CC == 7) {
Cmp = Constant::getAllOnesValue(
- llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+ llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
} else {
ICmpInst::Predicate Pred;
switch (CC) {
@@ -10033,24 +11423,19 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
// Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
if (IID != Intrinsic::not_intrinsic &&
- cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
+ (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
+ IsAddSub)) {
Function *Intr = CGF.CGM.getIntrinsic(IID);
Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
} else {
llvm::Type *Ty = A->getType();
- Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
-
- if (IsAddSub) {
- // Negate even elts in C using a mask.
- unsigned NumElts = Ty->getVectorNumElements();
- SmallVector<uint32_t, 16> Indices(NumElts);
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i + (i % 2) * NumElts;
-
- Value *NegC = CGF.Builder.CreateFNeg(C);
- Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
- Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
+ Function *FMA;
+ if (CGF.Builder.getIsFPConstrained()) {
+ FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
+ Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
+ } else {
+ FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Res = CGF.Builder.CreateCall(FMA, {A, B, C});
}
}
@@ -10108,6 +11493,10 @@ EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
Intrinsic::x86_avx512_vfmadd_f64;
Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
{Ops[0], Ops[1], Ops[2], Ops[4]});
+ } else if (CGF.Builder.getIsFPConstrained()) {
+ Function *FMA = CGF.CGM.getIntrinsic(
+ Intrinsic::experimental_constrained_fma, Ops[0]->getType());
+ Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
} else {
Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
@@ -10132,8 +11521,8 @@ static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
ArrayRef<Value *> Ops) {
llvm::Type *Ty = Ops[0]->getType();
// Arguments have a vXi32 type so cast to vXi64.
- Ty = llvm::VectorType::get(CGF.Int64Ty,
- Ty->getPrimitiveSizeInBits() / 64);
+ Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
+ Ty->getPrimitiveSizeInBits() / 64);
Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
@@ -10187,7 +11576,7 @@ static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
llvm::Type *DstTy) {
- unsigned NumberOfElements = DstTy->getVectorNumElements();
+ unsigned NumberOfElements = cast<llvm::VectorType>(DstTy)->getNumElements();
Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
@@ -10209,6 +11598,43 @@ Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
return EmitX86CpuIs(CPUStr);
}
+// Convert F16 halfs to floats.
+static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops,
+ llvm::Type *DstTy) {
+ assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
+ "Unknown cvtph2ps intrinsic");
+
+ // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+ if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
+ Function *F =
+ CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
+ return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
+ }
+
+ unsigned NumDstElts = cast<llvm::VectorType>(DstTy)->getNumElements();
+ Value *Src = Ops[0];
+
+ // Extract the subvector.
+ if (NumDstElts != cast<llvm::VectorType>(Src->getType())->getNumElements()) {
+ assert(NumDstElts == 4 && "Unexpected vector size");
+ Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()),
+ ArrayRef<int>{0, 1, 2, 3});
+ }
+
+ // Bitcast from vXi16 to vXf16.
+ auto *HalfTy = llvm::FixedVectorType::get(
+ llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
+ Src = CGF.Builder.CreateBitCast(Src, HalfTy);
+
+ // Perform the fp-extension.
+ Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
+
+ if (Ops.size() >= 3)
+ Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
+ return Res;
+}
+
// Convert a BF16 to a float.
static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF,
const CallExpr *E,
@@ -10245,11 +11671,11 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
#define X86_VENDOR(ENUM, STRING) \
.Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
-#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \
- .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
-#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \
+#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
+ .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_TYPE(ENUM, STR) \
.Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
-#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \
+#define X86_CPU_SUBTYPE(ENUM, STR) \
.Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
#include "llvm/Support/X86TargetParser.def"
.Default({0, 0});
@@ -10279,7 +11705,7 @@ CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
for (const StringRef &FeatureStr : FeatureStrs) {
unsigned Feature =
StringSwitch<unsigned>(FeatureStr)
-#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
+#define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, llvm::X86::FEATURE_##ENUM)
#include "llvm/Support/X86TargetParser.def"
;
FeaturesMask |= (1ULL << Feature);
@@ -10404,8 +11830,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// TODO: The builtins could be removed if the SSE header files used vector
// extension comparisons directly (vector ordered/unordered may need
// additional support via __builtin_isnan()).
- auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
- Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
+ bool IsSignaling) {
+ Value *Cmp;
+ if (IsSignaling)
+ Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
+ else
+ Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
@@ -10484,7 +11915,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vec_ext_v16hi:
case X86::BI__builtin_ia32_vec_ext_v8si:
case X86::BI__builtin_ia32_vec_ext_v4di: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
Index &= NumElts - 1;
// These builtins exist so we can ensure the index is an ICE and in range.
@@ -10499,7 +11931,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vec_set_v16hi:
case X86::BI__builtin_ia32_vec_set_v8si:
case X86::BI__builtin_ia32_vec_set_v4di: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
Index &= NumElts - 1;
// These builtins exist so we can ensure the index is an ICE and in range.
@@ -10587,12 +12020,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storedquqi512_mask:
case X86::BI__builtin_ia32_storeupd512_mask:
case X86::BI__builtin_ia32_storeups512_mask:
- return EmitX86MaskedStore(*this, Ops, 1);
+ return EmitX86MaskedStore(*this, Ops, Align(1));
case X86::BI__builtin_ia32_storess128_mask:
- case X86::BI__builtin_ia32_storesd128_mask: {
- return EmitX86MaskedStore(*this, Ops, 1);
- }
+ case X86::BI__builtin_ia32_storesd128_mask:
+ return EmitX86MaskedStore(*this, Ops, Align(1));
+
case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
case X86::BI__builtin_ia32_vpopcntq_128:
@@ -10678,10 +12111,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
- case X86::BI__builtin_ia32_vfmaddsubps:
- case X86::BI__builtin_ia32_vfmaddsubpd:
- case X86::BI__builtin_ia32_vfmaddsubps256:
- case X86::BI__builtin_ia32_vfmaddsubpd256:
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -10703,11 +12132,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_movdqa32store512_mask:
case X86::BI__builtin_ia32_movdqa64store512_mask:
case X86::BI__builtin_ia32_storeaps512_mask:
- case X86::BI__builtin_ia32_storeapd512_mask: {
- unsigned Align =
- getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
- return EmitX86MaskedStore(*this, Ops, Align);
- }
+ case X86::BI__builtin_ia32_storeapd512_mask:
+ return EmitX86MaskedStore(
+ *this, Ops,
+ getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
+
case X86::BI__builtin_ia32_loadups128_mask:
case X86::BI__builtin_ia32_loadups256_mask:
case X86::BI__builtin_ia32_loadups512_mask:
@@ -10726,11 +12155,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loaddqudi128_mask:
case X86::BI__builtin_ia32_loaddqudi256_mask:
case X86::BI__builtin_ia32_loaddqudi512_mask:
- return EmitX86MaskedLoad(*this, Ops, 1);
+ return EmitX86MaskedLoad(*this, Ops, Align(1));
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
- return EmitX86MaskedLoad(*this, Ops, 1);
+ return EmitX86MaskedLoad(*this, Ops, Align(1));
case X86::BI__builtin_ia32_loadaps128_mask:
case X86::BI__builtin_ia32_loadaps256_mask:
@@ -10743,11 +12172,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_movdqa32load512_mask:
case X86::BI__builtin_ia32_movdqa64load128_mask:
case X86::BI__builtin_ia32_movdqa64load256_mask:
- case X86::BI__builtin_ia32_movdqa64load512_mask: {
- unsigned Align =
- getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
- return EmitX86MaskedLoad(*this, Ops, Align);
- }
+ case X86::BI__builtin_ia32_movdqa64load512_mask:
+ return EmitX86MaskedLoad(
+ *this, Ops,
+ getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
case X86::BI__builtin_ia32_expandloaddf128_mask:
case X86::BI__builtin_ia32_expandloaddf256_mask:
@@ -10930,8 +12358,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
break;
}
- unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(),
- Ops[2]->getType()->getVectorNumElements());
+ unsigned MinElts =
+ std::min(cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(),
+ cast<llvm::VectorType>(Ops[2]->getType())->getNumElements());
Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
Function *Intr = CGM.getIntrinsic(IID);
return Builder.CreateCall(Intr, Ops);
@@ -11038,8 +12467,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
break;
}
- unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(),
- Ops[3]->getType()->getVectorNumElements());
+ unsigned MinElts =
+ std::min(cast<llvm::VectorType>(Ops[2]->getType())->getNumElements(),
+ cast<llvm::VectorType>(Ops[3]->getType())->getNumElements());
Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
Function *Intr = CGM.getIntrinsic(IID);
return Builder.CreateCall(Intr, Ops);
@@ -11061,16 +12491,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_extracti64x2_256_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask: {
- llvm::Type *DstTy = ConvertType(E->getType());
- unsigned NumElts = DstTy->getVectorNumElements();
- unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
+ auto *DstTy = cast<llvm::VectorType>(ConvertType(E->getType()));
+ unsigned NumElts = DstTy->getNumElements();
+ unsigned SrcNumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
unsigned SubVectors = SrcNumElts / NumElts;
unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
Index &= SubVectors - 1; // Remove any extra bits.
Index *= NumElts;
- uint32_t Indices[16];
+ int Indices[16];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + Index;
@@ -11100,15 +12531,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_inserti64x2_256:
case X86::BI__builtin_ia32_insertf64x2_512:
case X86::BI__builtin_ia32_inserti64x2_512: {
- unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
- unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
+ unsigned DstNumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ unsigned SrcNumElts =
+ cast<llvm::VectorType>(Ops[1]->getType())->getNumElements();
unsigned SubVectors = DstNumElts / SrcNumElts;
unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
Index &= SubVectors - 1; // Remove any extra bits.
Index *= SrcNumElts;
- uint32_t Indices[16];
+ int Indices[16];
for (unsigned i = 0; i != DstNumElts; ++i)
Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
@@ -11165,10 +12598,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pblendw256:
case X86::BI__builtin_ia32_pblendd128:
case X86::BI__builtin_ia32_pblendd256: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- uint32_t Indices[16];
+ int Indices[16];
// If there are more than 8 elements, the immediate is used twice so make
// sure we handle that.
for (unsigned i = 0; i != NumElts; ++i)
@@ -11182,13 +12616,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- llvm::Type *Ty = Ops[0]->getType();
- unsigned NumElts = Ty->getVectorNumElements();
+ auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ unsigned NumElts = Ty->getNumElements();
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101;
- uint32_t Indices[32];
+ int Indices[32];
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i) {
Indices[l + i] = l + (Imm & 3);
@@ -11206,13 +12640,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pshufhw256:
case X86::BI__builtin_ia32_pshufhw512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- llvm::Type *Ty = Ops[0]->getType();
- unsigned NumElts = Ty->getVectorNumElements();
+ auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ unsigned NumElts = Ty->getNumElements();
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101;
- uint32_t Indices[32];
+ int Indices[32];
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + i;
@@ -11236,15 +12670,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vpermilpd512:
case X86::BI__builtin_ia32_vpermilps512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- llvm::Type *Ty = Ops[0]->getType();
- unsigned NumElts = Ty->getVectorNumElements();
+ auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ unsigned NumElts = Ty->getNumElements();
unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101;
- uint32_t Indices[16];
+ int Indices[16];
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
Indices[i + l] = (Imm % NumLaneElts) + l;
@@ -11263,15 +12697,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- llvm::Type *Ty = Ops[0]->getType();
- unsigned NumElts = Ty->getVectorNumElements();
+ auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ unsigned NumElts = Ty->getNumElements();
unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101;
- uint32_t Indices[16];
+ int Indices[16];
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
unsigned Index = Imm % NumLaneElts;
@@ -11291,11 +12725,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_permdi512:
case X86::BI__builtin_ia32_permdf512: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- llvm::Type *Ty = Ops[0]->getType();
- unsigned NumElts = Ty->getVectorNumElements();
+ auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ unsigned NumElts = Ty->getNumElements();
// These intrinsics operate on 256-bit lanes of four 64-bit elements.
- uint32_t Indices[8];
+ int Indices[8];
for (unsigned l = 0; l != NumElts; l += 4)
for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
@@ -11309,7 +12743,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_palignr512: {
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
assert(NumElts % 16 == 0);
// If palignr is shifting the pair of vectors more than the size of two
@@ -11325,7 +12760,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
}
- uint32_t Indices[64];
+ int Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
@@ -11346,13 +12781,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
// Mask the shift amount to width of two vectors.
ShiftVal &= (2 * NumElts) - 1;
- uint32_t Indices[16];
+ int Indices[16];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + ShiftVal;
@@ -11369,12 +12805,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_shuf_i32x4:
case X86::BI__builtin_ia32_shuf_i64x2: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- llvm::Type *Ty = Ops[0]->getType();
- unsigned NumElts = Ty->getVectorNumElements();
+ auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ unsigned NumElts = Ty->getNumElements();
unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
unsigned NumLaneElts = NumElts / NumLanes;
- uint32_t Indices[16];
+ int Indices[16];
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
unsigned Index = (Imm % NumLanes) * NumLaneElts;
Imm /= NumLanes; // Discard the bits we just used.
@@ -11395,7 +12831,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vperm2f128_si256:
case X86::BI__builtin_ia32_permti256: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
// This takes a very simple approach since there are two lanes and a
// shuffle can have 2 inputs. So we reserve the first input for the first
@@ -11403,7 +12840,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// duplicate sources, but this can be dealt with in the backend.
Value *OutOps[2];
- uint32_t Indices[8];
+ int Indices[8];
for (unsigned l = 0; l != 2; ++l) {
// Determine the source for this lane.
if (Imm & (1 << ((l * 4) + 3)))
@@ -11433,15 +12870,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift: {
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- llvm::Type *ResultType = Ops[0]->getType();
+ auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType());
// Builtin type is vXi64 so multiply by 8 to get bytes.
- unsigned NumElts = ResultType->getVectorNumElements() * 8;
+ unsigned NumElts = ResultType->getNumElements() * 8;
// If pslldq is shifting the vector more than 15 bytes, emit zero.
if (ShiftVal >= 16)
return llvm::Constant::getNullValue(ResultType);
- uint32_t Indices[64];
+ int Indices[64];
// 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
@@ -11451,7 +12888,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
Value *Zero = llvm::Constant::getNullValue(VecTy);
Value *SV = Builder.CreateShuffleVector(Zero, Cast,
@@ -11463,15 +12900,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_psrldqi256_byteshift:
case X86::BI__builtin_ia32_psrldqi512_byteshift: {
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- llvm::Type *ResultType = Ops[0]->getType();
+ auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType());
// Builtin type is vXi64 so multiply by 8 to get bytes.
- unsigned NumElts = ResultType->getVectorNumElements() * 8;
+ unsigned NumElts = ResultType->getNumElements() * 8;
// If psrldq is shifting the vector more than 15 bytes, emit zero.
if (ShiftVal >= 16)
return llvm::Constant::getNullValue(ResultType);
- uint32_t Indices[64];
+ int Indices[64];
// 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
@@ -11481,7 +12918,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
Value *Zero = llvm::Constant::getNullValue(VecTy);
Value *SV = Builder.CreateShuffleVector(Cast, Zero,
@@ -11501,7 +12938,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *In = getMaskVecValue(*this, Ops[0], NumElts);
- uint32_t Indices[64];
+ int Indices[64];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = NumElts + i - ShiftVal;
@@ -11523,7 +12960,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *In = getMaskVecValue(*this, Ops[0], NumElts);
- uint32_t Indices[64];
+ int Indices[64];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + ShiftVal;
@@ -11555,7 +12992,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Unaligned nontemporal store of the scalar value.
StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
- SI->setAlignment(llvm::Align::None());
+ SI->setAlignment(llvm::Align(1));
return SI;
}
// Rotate is a special case of funnel shift - 1st 2 args are the same.
@@ -11803,7 +13240,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
- uint32_t Indices[64];
+ int Indices[64];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
@@ -11832,8 +13269,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_sqrtss:
case X86::BI__builtin_ia32_sqrtsd: {
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
- A = Builder.CreateCall(F, {A});
+ Function *F;
+ if (Builder.getIsFPConstrained()) {
+ F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
+ A->getType());
+ A = Builder.CreateConstrainedFPCall(F, {A});
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, {A});
+ }
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
}
case X86::BI__builtin_ia32_sqrtsd_round_mask:
@@ -11848,8 +13292,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
- A = Builder.CreateCall(F, A);
+ Function *F;
+ if (Builder.getIsFPConstrained()) {
+ F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
+ A->getType());
+ A = Builder.CreateConstrainedFPCall(F, A);
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
+ A = Builder.CreateCall(F, A);
+ }
Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
@@ -11871,8 +13322,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
}
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
- return Builder.CreateCall(F, Ops[0]);
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
+ Ops[0]->getType());
+ return Builder.CreateConstrainedFPCall(F, Ops[0]);
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
+ return Builder.CreateCall(F, Ops[0]);
+ }
}
case X86::BI__builtin_ia32_pabsb128:
case X86::BI__builtin_ia32_pabsw128:
@@ -12089,7 +13546,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclasspd128_mask:
case X86::BI__builtin_ia32_fpclasspd256_mask:
case X86::BI__builtin_ia32_fpclasspd512_mask: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
Value *MaskIn = Ops[2];
Ops.erase(&Ops[2]);
@@ -12126,7 +13584,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vp2intersect_d_512:
case X86::BI__builtin_ia32_vp2intersect_d_256:
case X86::BI__builtin_ia32_vp2intersect_d_128: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
Intrinsic::ID ID;
switch (BuiltinID) {
@@ -12184,7 +13643,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
Value *MaskIn = Ops[2];
Ops.erase(&Ops[2]);
@@ -12209,28 +13669,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
- return getVectorFCmpIR(CmpInst::FCMP_OEQ);
+ return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
case X86::BI__builtin_ia32_cmpltps:
case X86::BI__builtin_ia32_cmpltpd:
- return getVectorFCmpIR(CmpInst::FCMP_OLT);
+ return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
case X86::BI__builtin_ia32_cmpleps:
case X86::BI__builtin_ia32_cmplepd:
- return getVectorFCmpIR(CmpInst::FCMP_OLE);
+ return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
case X86::BI__builtin_ia32_cmpunordps:
case X86::BI__builtin_ia32_cmpunordpd:
- return getVectorFCmpIR(CmpInst::FCMP_UNO);
+ return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
case X86::BI__builtin_ia32_cmpneqps:
case X86::BI__builtin_ia32_cmpneqpd:
- return getVectorFCmpIR(CmpInst::FCMP_UNE);
+ return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
case X86::BI__builtin_ia32_cmpnltps:
case X86::BI__builtin_ia32_cmpnltpd:
- return getVectorFCmpIR(CmpInst::FCMP_UGE);
+ return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
case X86::BI__builtin_ia32_cmpnleps:
case X86::BI__builtin_ia32_cmpnlepd:
- return getVectorFCmpIR(CmpInst::FCMP_UGT);
+ return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
case X86::BI__builtin_ia32_cmpordps:
case X86::BI__builtin_ia32_cmpordpd:
- return getVectorFCmpIR(CmpInst::FCMP_ORD);
+ return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd:
@@ -12255,42 +13715,90 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Ignoring requested signaling behaviour,
// e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
FCmpInst::Predicate Pred;
- switch (CC) {
- case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
- case 0x01: Pred = FCmpInst::FCMP_OLT; break;
- case 0x02: Pred = FCmpInst::FCMP_OLE; break;
- case 0x03: Pred = FCmpInst::FCMP_UNO; break;
- case 0x04: Pred = FCmpInst::FCMP_UNE; break;
- case 0x05: Pred = FCmpInst::FCMP_UGE; break;
- case 0x06: Pred = FCmpInst::FCMP_UGT; break;
- case 0x07: Pred = FCmpInst::FCMP_ORD; break;
- case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
- case 0x09: Pred = FCmpInst::FCMP_ULT; break;
- case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
- case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
- case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
- case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
- case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
- case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
- case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
- case 0x11: Pred = FCmpInst::FCMP_OLT; break;
- case 0x12: Pred = FCmpInst::FCMP_OLE; break;
- case 0x13: Pred = FCmpInst::FCMP_UNO; break;
- case 0x14: Pred = FCmpInst::FCMP_UNE; break;
- case 0x15: Pred = FCmpInst::FCMP_UGE; break;
- case 0x16: Pred = FCmpInst::FCMP_UGT; break;
- case 0x17: Pred = FCmpInst::FCMP_ORD; break;
- case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
- case 0x19: Pred = FCmpInst::FCMP_ULT; break;
- case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
- case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
- case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
- case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
- case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
- case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
+ bool IsSignaling;
+ // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
+ // behavior is inverted. We'll handle that after the switch.
+ switch (CC & 0xf) {
+ case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
+ case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
+ case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
+ case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
+ case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
+ case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
+ case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
+ case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
+ case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
+ case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
+ case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
+ case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
+ case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
+ case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
+ case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
+ case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
default: llvm_unreachable("Unhandled CC");
}
+ // Invert the signalling behavior for 16-31.
+ if (CC & 0x10)
+ IsSignaling = !IsSignaling;
+
+ // If the predicate is true or false and we're using constrained intrinsics,
+ // we don't have a compare intrinsic we can use. Just use the legacy X86
+ // specific intrinsic.
+ if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) &&
+ Builder.getIsFPConstrained()) {
+
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_cmpps:
+ IID = Intrinsic::x86_sse_cmp_ps;
+ break;
+ case X86::BI__builtin_ia32_cmpps256:
+ IID = Intrinsic::x86_avx_cmp_ps_256;
+ break;
+ case X86::BI__builtin_ia32_cmppd:
+ IID = Intrinsic::x86_sse2_cmp_pd;
+ break;
+ case X86::BI__builtin_ia32_cmppd256:
+ IID = Intrinsic::x86_avx_cmp_pd_256;
+ break;
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ IID = Intrinsic::x86_avx512_cmp_ps_512;
+ break;
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ IID = Intrinsic::x86_avx512_cmp_pd_512;
+ break;
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ IID = Intrinsic::x86_avx512_cmp_ps_128;
+ break;
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ IID = Intrinsic::x86_avx512_cmp_ps_256;
+ break;
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ IID = Intrinsic::x86_avx512_cmp_pd_128;
+ break;
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ IID = Intrinsic::x86_avx512_cmp_pd_256;
+ break;
+ }
+
+ Function *Intr = CGM.getIntrinsic(IID);
+ if (cast<llvm::VectorType>(Intr->getReturnType())
+ ->getElementType()
+ ->isIntegerTy(1)) {
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ Value *MaskIn = Ops[3];
+ Ops.erase(&Ops[3]);
+
+ Value *Cmp = Builder.CreateCall(Intr, Ops);
+ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
+ }
+
+ return Builder.CreateCall(Intr, Ops);
+ }
+
// Builtins without the _mask suffix return a vector of integers
// of the same width as the input vectors
switch (BuiltinID) {
@@ -12300,12 +13808,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpps256_mask:
case X86::BI__builtin_ia32_cmppd128_mask:
case X86::BI__builtin_ia32_cmppd256_mask: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
- Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ // FIXME: Support SAE.
+ unsigned NumElts =
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ Value *Cmp;
+ if (IsSignaling)
+ Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
+ else
+ Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
}
default:
- return getVectorFCmpIR(Pred);
+ return getVectorFCmpIR(Pred, IsSignaling);
}
}
@@ -12343,10 +13857,19 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpordsd:
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
+ // f16c half2float intrinsics
+ case X86::BI__builtin_ia32_vcvtph2ps:
+ case X86::BI__builtin_ia32_vcvtph2ps256:
+ case X86::BI__builtin_ia32_vcvtph2ps_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+ return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
+
// AVX512 bf16 intrinsics
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
- Ops[2] = getMaskVecValue(*this, Ops[2],
- Ops[0]->getType()->getVectorNumElements());
+ Ops[2] = getMaskVecValue(
+ *this, Ops[2],
+ cast<llvm::VectorType>(Ops[0]->getType())->getNumElements());
Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
@@ -12506,7 +14029,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__stosb: {
// We treat __stosb as a volatile memset - it may not generate "rep stosb"
// instruction, but it will create a memset that won't be optimized away.
- return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align::None(), true);
+ return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
}
case X86::BI__ud2:
// llvm.trap makes a ud2a instruction on x86.
@@ -12731,9 +14254,14 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
case PPC::BI__builtin_vsx_xvsqrtdp: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
- ID = Intrinsic::sqrt;
- llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, X);
+ if (Builder.getIsFPConstrained()) {
+ llvm::Function *F = CGM.getIntrinsic(
+ Intrinsic::experimental_constrained_sqrt, ResultType);
+ return Builder.CreateConstrainedFPCall(F, X);
+ } else {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
+ return Builder.CreateCall(F, X);
+ }
}
// Count leading zeros
case PPC::BI__builtin_altivec_vclzb:
@@ -12790,21 +14318,32 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
BuiltinID == PPC::BI__builtin_vsx_xvrspim)
- ID = Intrinsic::floor;
+ ID = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_floor
+ : Intrinsic::floor;
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
BuiltinID == PPC::BI__builtin_vsx_xvrspi)
- ID = Intrinsic::round;
+ ID = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_round
+ : Intrinsic::round;
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
BuiltinID == PPC::BI__builtin_vsx_xvrspic)
- ID = Intrinsic::nearbyint;
+ ID = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_nearbyint
+ : Intrinsic::nearbyint;
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
BuiltinID == PPC::BI__builtin_vsx_xvrspip)
- ID = Intrinsic::ceil;
+ ID = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_ceil
+ : Intrinsic::ceil;
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
- ID = Intrinsic::trunc;
+ ID = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_trunc
+ : Intrinsic::trunc;
llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, X);
+ return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
+ : Builder.CreateCall(F, X);
}
// Absolute value
@@ -12829,25 +14368,43 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ llvm::Function *F;
+ if (Builder.getIsFPConstrained())
+ F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
+ else
+ F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
switch (BuiltinID) {
case PPC::BI__builtin_vsx_xvmaddadp:
case PPC::BI__builtin_vsx_xvmaddasp:
- return Builder.CreateCall(F, {X, Y, Z});
+ if (Builder.getIsFPConstrained())
+ return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
+ else
+ return Builder.CreateCall(F, {X, Y, Z});
case PPC::BI__builtin_vsx_xvnmaddadp:
case PPC::BI__builtin_vsx_xvnmaddasp:
- return Builder.CreateFSub(Zero,
- Builder.CreateCall(F, {X, Y, Z}), "sub");
+ if (Builder.getIsFPConstrained())
+ return Builder.CreateFNeg(
+ Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
+ else
+ return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
case PPC::BI__builtin_vsx_xvmsubadp:
case PPC::BI__builtin_vsx_xvmsubasp:
- return Builder.CreateCall(F,
- {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+ if (Builder.getIsFPConstrained())
+ return Builder.CreateConstrainedFPCall(
+ F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+ else
+ return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
case PPC::BI__builtin_vsx_xvnmsubadp:
case PPC::BI__builtin_vsx_xvnmsubasp:
- Value *FsubRes =
- Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
- return Builder.CreateFSub(Zero, FsubRes, "sub");
+ if (Builder.getIsFPConstrained())
+ return Builder.CreateFNeg(
+ Builder.CreateConstrainedFPCall(
+ F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
+ "neg");
+ else
+ return Builder.CreateFNeg(
+ Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
+ "neg");
}
llvm_unreachable("Unknown FMA operation");
return nullptr; // Suppress no-return warning
@@ -12873,25 +14430,22 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// Need to cast the second argument from a vector of unsigned int to a
// vector of long long.
- Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+ Ops[1] =
+ Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
if (getTarget().isLittleEndian()) {
- // Create a shuffle mask of (1, 0)
- Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
- ConstantInt::get(Int32Ty, 0)
- };
- Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
-
// Reverse the double words in the vector we will extract from.
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
- Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
+ Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0});
// Reverse the index.
Index = MaxIndex - Index;
}
// Intrinsic expects the first arg to be a vector of int.
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
return Builder.CreateCall(F, Ops);
}
@@ -12900,7 +14454,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
// Intrinsic expects the first argument to be a vector of doublewords.
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
// The second argument is a compile time constant int that needs to
// be clamped to the range [0, 12].
@@ -12918,13 +14473,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// Emit the call, then reverse the double words of the results vector.
Value *Call = Builder.CreateCall(F, Ops);
- // Create a shuffle mask of (1, 0)
- Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
- ConstantInt::get(Int32Ty, 0)
- };
- Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
-
- Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
+ Value *ShuffleCall =
+ Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
return ShuffleCall;
} else {
Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
@@ -12937,21 +14487,20 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
assert(ArgCI && "Third arg must be constant integer!");
unsigned Index = ArgCI->getZExtValue();
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
- Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
+ Ops[1] =
+ Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
// Account for endianness by treating this as just a shuffle. So we use the
// same indices for both LE and BE in order to produce expected results in
// both cases.
- unsigned ElemIdx0 = (Index & 2) >> 1;
- unsigned ElemIdx1 = 2 + (Index & 1);
-
- Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
- ConstantInt::get(Int32Ty, ElemIdx1)};
- Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+ int ElemIdx0 = (Index & 2) >> 1;
+ int ElemIdx1 = 2 + (Index & 1);
+ int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
Value *ShuffleCall =
- Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+ Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
QualType BIRetType = E->getType();
auto RetTy = ConvertType(BIRetType);
return Builder.CreateBitCast(ShuffleCall, RetTy);
@@ -12961,14 +14510,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
assert(ArgCI && "Third argument must be a compile time constant");
unsigned Index = ArgCI->getZExtValue() & 0x3;
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
- Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
+ Ops[0] =
+ Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
+ Ops[1] =
+ Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4));
// Create a shuffle mask
- unsigned ElemIdx0;
- unsigned ElemIdx1;
- unsigned ElemIdx2;
- unsigned ElemIdx3;
+ int ElemIdx0;
+ int ElemIdx1;
+ int ElemIdx2;
+ int ElemIdx3;
if (getTarget().isLittleEndian()) {
// Little endian element N comes from element 8+N-Index of the
// concatenated wide vector (of course, using modulo arithmetic on
@@ -12985,14 +14536,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
ElemIdx3 = Index + 3;
}
- Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
- ConstantInt::get(Int32Ty, ElemIdx1),
- ConstantInt::get(Int32Ty, ElemIdx2),
- ConstantInt::get(Int32Ty, ElemIdx3)};
-
- Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+ int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
Value *ShuffleCall =
- Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+ Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
QualType BIRetType = E->getType();
auto RetTy = ConvertType(BIRetType);
return Builder.CreateBitCast(ShuffleCall, RetTy);
@@ -13001,7 +14547,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
case PPC::BI__builtin_pack_vector_int128: {
bool isLittleEndian = getTarget().isLittleEndian();
Value *UndefValue =
- llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
+ llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2));
Value *Res = Builder.CreateInsertElement(
UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
Res = Builder.CreateInsertElement(Res, Ops[1],
@@ -13012,7 +14558,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
case PPC::BI__builtin_unpack_vector_int128: {
ConstantInt *Index = cast<ConstantInt>(Ops[1]);
Value *Unpacked = Builder.CreateBitCast(
- Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
+ Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
if (getTarget().isLittleEndian())
Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
@@ -13022,8 +14568,91 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
}
}
+namespace {
+// If \p E is not null pointer, insert address space cast to match return
+// type of \p E if necessary.
+Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
+ const CallExpr *E = nullptr) {
+ auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
+ auto *Call = CGF.Builder.CreateCall(F);
+ Call->addAttribute(
+ AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
+ Call->addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithAlignment(Call->getContext(), Align(4)));
+ if (!E)
+ return Call;
+ QualType BuiltinRetType = E->getType();
+ auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
+ if (RetTy == Call->getType())
+ return Call;
+ return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
+}
+
+// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
+Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
+ const unsigned XOffset = 4;
+ auto *DP = EmitAMDGPUDispatchPtr(CGF);
+ // Indexing the HSA kernel_dispatch_packet struct.
+ auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2);
+ auto *GEP = CGF.Builder.CreateGEP(DP, Offset);
+ auto *DstTy =
+ CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
+ auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
+ auto *LD = CGF.Builder.CreateLoad(Address(Cast, CharUnits::fromQuantity(2)));
+ llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+ llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
+ APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
+ LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
+ LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
+ llvm::MDNode::get(CGF.getLLVMContext(), None));
+ return LD;
+}
+} // namespace
+
+// For processing memory ordering and memory scope arguments of various
+// amdgcn builtins.
+// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// it into LLVM's memory ordering specifier using atomic C ABI, and writes
+// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
+// specific SyncScopeID and writes it to \p SSID.
+bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
+ llvm::AtomicOrdering &AO,
+ llvm::SyncScope::ID &SSID) {
+ if (isa<llvm::ConstantInt>(Order)) {
+ int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+
+ // Map C11/C++11 memory ordering to LLVM memory ordering
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ AO = llvm::AtomicOrdering::Acquire;
+ break;
+ case llvm::AtomicOrderingCABI::release:
+ AO = llvm::AtomicOrdering::Release;
+ break;
+ case llvm::AtomicOrderingCABI::acq_rel:
+ AO = llvm::AtomicOrdering::AcquireRelease;
+ break;
+ case llvm::AtomicOrderingCABI::seq_cst:
+ AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ case llvm::AtomicOrderingCABI::consume:
+ case llvm::AtomicOrderingCABI::relaxed:
+ break;
+ }
+
+ StringRef scp;
+ llvm::getConstantStringInfo(Scope, scp);
+ SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+ return true;
+ }
+ return false;
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
+ llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ llvm::SyncScope::ID SSID;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
@@ -13091,6 +14720,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_rcpf:
case AMDGPU::BI__builtin_amdgcn_rcph:
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
+ case AMDGPU::BI__builtin_amdgcn_sqrt:
+ case AMDGPU::BI__builtin_amdgcn_sqrtf:
+ case AMDGPU::BI__builtin_amdgcn_sqrth:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
case AMDGPU::BI__builtin_amdgcn_rsq:
case AMDGPU::BI__builtin_amdgcn_rsqf:
case AMDGPU::BI__builtin_amdgcn_rsqh:
@@ -13104,6 +14737,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_cosf:
case AMDGPU::BI__builtin_amdgcn_cosh:
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
+ case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
+ return EmitAMDGPUDispatchPtr(*this, E);
case AMDGPU::BI__builtin_amdgcn_log_clampf:
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
case AMDGPU::BI__builtin_amdgcn_ldexp:
@@ -13146,7 +14781,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
// FIXME-GFX10: How should 32 bit mask be handled?
- Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
{ Builder.getInt64Ty(), Src0->getType() });
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
@@ -13157,7 +14792,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
// FIXME-GFX10: How should 32 bit mask be handled?
- Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
{ Builder.getInt64Ty(), Src0->getType() });
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
@@ -13178,7 +14813,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_read_exec: {
CallInst *CI = cast<CallInst>(
- EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
+ EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
CI->setConvergent();
return CI;
}
@@ -13187,7 +14822,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
"exec_lo" : "exec_hi";
CallInst *CI = cast<CallInst>(
- EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
+ EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
CI->setConvergent();
return CI;
}
@@ -13199,6 +14834,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
+ // amdgcn workgroup size
+ case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
+ return EmitAMDGPUWorkGroupSize(*this, 0);
+ case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
+ return EmitAMDGPUWorkGroupSize(*this, 1);
+ case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
+ return EmitAMDGPUWorkGroupSize(*this, 2);
+
// r600 intrinsics
case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
@@ -13209,6 +14852,61 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
case AMDGPU::BI__builtin_r600_read_tidig_z:
return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
+ case AMDGPU::BI__builtin_amdgcn_alignbit: {
+ llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+ llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
+ Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
+ return Builder.CreateCall(F, { Src0, Src1, Src2 });
+ }
+
+ case AMDGPU::BI__builtin_amdgcn_fence: {
+ if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1)), AO, SSID))
+ return Builder.CreateFence(AO, SSID);
+ LLVM_FALLTHROUGH;
+ }
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
+ unsigned BuiltinAtomicOp;
+ llvm::Type *ResultType = ConvertType(E->getType());
+
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
+ break;
+ }
+
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+
+ llvm::Function *F =
+ CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
+
+ if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)), AO, SSID)) {
+
+ // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
+ // scope as unsigned values
+ Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
+ Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
+
+ QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
+ bool Volatile =
+ PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
+ Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
+
+ return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
+ }
+ LLVM_FALLTHROUGH;
+ }
default:
return nullptr;
}
@@ -13306,8 +15004,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
case SystemZ::BI__builtin_s390_vfsqdb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
- return Builder.CreateCall(F, X);
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
+ return Builder.CreateConstrainedFPCall(F, { X });
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
+ return Builder.CreateCall(F, X);
+ }
}
case SystemZ::BI__builtin_s390_vfmasb:
case SystemZ::BI__builtin_s390_vfmadb: {
@@ -13315,8 +15018,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateCall(F, {X, Y, Z});
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
+ return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ return Builder.CreateCall(F, {X, Y, Z});
+ }
}
case SystemZ::BI__builtin_s390_vfmssb:
case SystemZ::BI__builtin_s390_vfmsdb: {
@@ -13324,8 +15032,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
+ return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+ }
}
case SystemZ::BI__builtin_s390_vfnmasb:
case SystemZ::BI__builtin_s390_vfnmadb: {
@@ -13333,8 +15046,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
+ return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
+ }
}
case SystemZ::BI__builtin_s390_vfnmssb:
case SystemZ::BI__builtin_s390_vfnmsdb: {
@@ -13342,9 +15060,15 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- Value *NegZ = Builder.CreateFNeg(Z, "neg");
- return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
+ Value *NegZ = Builder.CreateFNeg(Z, "sub");
+ return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
+ } else {
+ Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ Value *NegZ = Builder.CreateFNeg(Z, "neg");
+ return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
+ }
}
case SystemZ::BI__builtin_s390_vflpsb:
case SystemZ::BI__builtin_s390_vflpdb: {
@@ -13373,30 +15097,42 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
// Check whether this instance can be represented via a LLVM standard
// intrinsic. We only support some combinations of M4 and M5.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ Intrinsic::ID CI;
switch (M4.getZExtValue()) {
default: break;
case 0: // IEEE-inexact exception allowed
switch (M5.getZExtValue()) {
default: break;
- case 0: ID = Intrinsic::rint; break;
+ case 0: ID = Intrinsic::rint;
+ CI = Intrinsic::experimental_constrained_rint; break;
}
break;
case 4: // IEEE-inexact exception suppressed
switch (M5.getZExtValue()) {
default: break;
- case 0: ID = Intrinsic::nearbyint; break;
- case 1: ID = Intrinsic::round; break;
- case 5: ID = Intrinsic::trunc; break;
- case 6: ID = Intrinsic::ceil; break;
- case 7: ID = Intrinsic::floor; break;
+ case 0: ID = Intrinsic::nearbyint;
+ CI = Intrinsic::experimental_constrained_nearbyint; break;
+ case 1: ID = Intrinsic::round;
+ CI = Intrinsic::experimental_constrained_round; break;
+ case 5: ID = Intrinsic::trunc;
+ CI = Intrinsic::experimental_constrained_trunc; break;
+ case 6: ID = Intrinsic::ceil;
+ CI = Intrinsic::experimental_constrained_ceil; break;
+ case 7: ID = Intrinsic::floor;
+ CI = Intrinsic::experimental_constrained_floor; break;
}
break;
}
if (ID != Intrinsic::not_intrinsic) {
- Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, X);
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(CI, ResultType);
+ return Builder.CreateConstrainedFPCall(F, X);
+ } else {
+ Function *F = CGM.getIntrinsic(ID, ResultType);
+ return Builder.CreateCall(F, X);
+ }
}
- switch (BuiltinID) {
+ switch (BuiltinID) { // FIXME: constrained version?
case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
default: llvm_unreachable("Unknown BuiltinID");
@@ -13419,13 +15155,20 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
// Check whether this instance can be represented via a LLVM standard
// intrinsic. We only support some values of M4.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ Intrinsic::ID CI;
switch (M4.getZExtValue()) {
default: break;
- case 4: ID = Intrinsic::maxnum; break;
+ case 4: ID = Intrinsic::maxnum;
+ CI = Intrinsic::experimental_constrained_maxnum; break;
}
if (ID != Intrinsic::not_intrinsic) {
- Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, {X, Y});
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(CI, ResultType);
+ return Builder.CreateConstrainedFPCall(F, {X, Y});
+ } else {
+ Function *F = CGM.getIntrinsic(ID, ResultType);
+ return Builder.CreateCall(F, {X, Y});
+ }
}
switch (BuiltinID) {
case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
@@ -13449,13 +15192,20 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
// Check whether this instance can be represented via a LLVM standard
// intrinsic. We only support some values of M4.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ Intrinsic::ID CI;
switch (M4.getZExtValue()) {
default: break;
- case 4: ID = Intrinsic::minnum; break;
+ case 4: ID = Intrinsic::minnum;
+ CI = Intrinsic::experimental_constrained_minnum; break;
}
if (ID != Intrinsic::not_intrinsic) {
- Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, {X, Y});
+ if (Builder.getIsFPConstrained()) {
+ Function *F = CGM.getIntrinsic(CI, ResultType);
+ return Builder.CreateConstrainedFPCall(F, {X, Y});
+ } else {
+ Function *F = CGM.getIntrinsic(ID, ResultType);
+ return Builder.CreateCall(F, {X, Y});
+ }
}
switch (BuiltinID) {
case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
@@ -13815,7 +15565,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
auto MakeLdg = [&](unsigned IntrinsicID) {
Value *Ptr = EmitScalarExpr(E->getArg(0));
clang::CharUnits Align =
- getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
return Builder.CreateCall(
CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
Ptr->getType()}),
@@ -14344,7 +16094,7 @@ RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
Result = Builder.CreatePointerCast(Result, Args.SrcType);
// Emit an alignment assumption to ensure that the new alignment is
// propagated to loads/stores, etc.
- EmitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
+ emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
}
assert(Result->getType() == Args.SrcType);
return RValue::get(Result);
@@ -14368,30 +16118,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
return Builder.CreateCall(Callee, Args);
}
- case WebAssembly::BI__builtin_wasm_memory_init: {
- llvm::APSInt SegConst;
- if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
- llvm_unreachable("Constant arg isn't actually constant?");
- llvm::APSInt MemConst;
- if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext()))
- llvm_unreachable("Constant arg isn't actually constant?");
- if (!MemConst.isNullValue())
- ErrorUnsupported(E, "non-zero memory index");
- Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst),
- llvm::ConstantInt::get(getLLVMContext(), MemConst),
- EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
- EmitScalarExpr(E->getArg(4))};
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init);
- return Builder.CreateCall(Callee, Args);
- }
- case WebAssembly::BI__builtin_wasm_data_drop: {
- llvm::APSInt SegConst;
- if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
- llvm_unreachable("Constant arg isn't actually constant?");
- Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst);
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop);
- return Builder.CreateCall(Callee, {Arg});
- }
case WebAssembly::BI__builtin_wasm_tls_size: {
llvm::Type *ResultType = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
@@ -14460,8 +16186,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
@@ -14472,8 +16197,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
@@ -14500,6 +16224,55 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_pmin_f32x4:
+ case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_pmax_f32x4:
+ case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_ceil_f32x4:
+ case WebAssembly::BI__builtin_wasm_floor_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_f32x4:
+ case WebAssembly::BI__builtin_wasm_nearest_f32x4:
+ case WebAssembly::BI__builtin_wasm_ceil_f64x2:
+ case WebAssembly::BI__builtin_wasm_floor_f64x2:
+ case WebAssembly::BI__builtin_wasm_trunc_f64x2:
+ case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_ceil_f32x4:
+ case WebAssembly::BI__builtin_wasm_ceil_f64x2:
+ IntNo = Intrinsic::wasm_ceil;
+ break;
+ case WebAssembly::BI__builtin_wasm_floor_f32x4:
+ case WebAssembly::BI__builtin_wasm_floor_f64x2:
+ IntNo = Intrinsic::wasm_floor;
+ break;
+ case WebAssembly::BI__builtin_wasm_trunc_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_f64x2:
+ IntNo = Intrinsic::wasm_trunc;
+ break;
+ case WebAssembly::BI__builtin_wasm_nearest_f32x4:
+ case WebAssembly::BI__builtin_wasm_nearest_f64x2:
+ IntNo = Intrinsic::wasm_nearest;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *Value = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, Value);
+ }
case WebAssembly::BI__builtin_wasm_swizzle_v8x16: {
Value *Src = EmitScalarExpr(E->getArg(0));
Value *Indices = EmitScalarExpr(E->getArg(1));
@@ -14551,7 +16324,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
- llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
+ llvm::Type *ElemType =
+ cast<llvm::VectorType>(ConvertType(E->getType()))->getElementType();
Value *Trunc = Builder.CreateTrunc(Val, ElemType);
return Builder.CreateInsertElement(Vec, Trunc, Lane);
}
@@ -14598,6 +16372,56 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_abs_i8x16:
+ case WebAssembly::BI__builtin_wasm_abs_i16x8:
+ case WebAssembly::BI__builtin_wasm_abs_i32x4: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Neg = Builder.CreateNeg(Vec, "neg");
+ Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
+ Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
+ return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
+ }
+ case WebAssembly::BI__builtin_wasm_min_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_min_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_max_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_max_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_min_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_min_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_max_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_max_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_min_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_min_u_i32x4:
+ case WebAssembly::BI__builtin_wasm_max_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *ICmp;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_min_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_min_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_min_s_i32x4:
+ ICmp = Builder.CreateICmpSLT(LHS, RHS);
+ break;
+ case WebAssembly::BI__builtin_wasm_min_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_min_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_min_u_i32x4:
+ ICmp = Builder.CreateICmpULT(LHS, RHS);
+ break;
+ case WebAssembly::BI__builtin_wasm_max_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_max_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_max_s_i32x4:
+ ICmp = Builder.CreateICmpSGT(LHS, RHS);
+ break;
+ case WebAssembly::BI__builtin_wasm_max_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_max_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_max_u_i32x4:
+ ICmp = Builder.CreateICmpUGT(LHS, RHS);
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ return Builder.CreateSelect(ICmp, LHS, RHS);
+ }
case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
Value *LHS = EmitScalarExpr(E->getArg(0));
@@ -14649,6 +16473,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
return Builder.CreateCall(Callee, {Vec});
}
+ case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
+ case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
+ case WebAssembly::BI__builtin_wasm_bitmask_i32x4: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
case WebAssembly::BI__builtin_wasm_abs_f32x4:
case WebAssembly::BI__builtin_wasm_abs_f64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
@@ -14741,68 +16573,124 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()});
return Builder.CreateCall(Callee, Vec);
}
+ case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
+ Value *Ops[18];
+ size_t OpIdx = 0;
+ Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
+ Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
+ while (OpIdx < 18) {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ }
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
+ return Builder.CreateCall(Callee, Ops);
+ }
default:
return nullptr;
}
}
+static std::pair<Intrinsic::ID, unsigned>
+getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) {
+ struct Info {
+ unsigned BuiltinID;
+ Intrinsic::ID IntrinsicID;
+ unsigned VecLen;
+ };
+ Info Infos[] = {
+#define CUSTOM_BUILTIN_MAPPING(x,s) \
+ { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
+ CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
+#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
+#undef CUSTOM_BUILTIN_MAPPING
+ };
+
+ auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
+ static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
+ (void)SortOnce;
+
+ const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
+ Info{BuiltinID, 0, 0}, CmpInfo);
+ if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
+ return {Intrinsic::not_intrinsic, 0};
+
+ return {F->IntrinsicID, F->VecLen};
+}
+
Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
- SmallVector<llvm::Value *, 4> Ops;
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ Intrinsic::ID ID;
+ unsigned VecLen;
+ std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID);
- auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
+ auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
// The base pointer is passed by address, so it needs to be loaded.
- Address BP = EmitPointerWithAlignment(E->getArg(0));
- BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
- BP.getAlignment());
+ Address A = EmitPointerWithAlignment(E->getArg(0));
+ Address BP = Address(
+ Builder.CreateBitCast(A.getPointer(), Int8PtrPtrTy), A.getAlignment());
llvm::Value *Base = Builder.CreateLoad(BP);
- // Operands are Base, Increment, Modifier, Start.
- if (HasImm)
- Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(3)) };
- else
- Ops = { Base, EmitScalarExpr(E->getArg(1)),
- EmitScalarExpr(E->getArg(2)) };
+ // The treatment of both loads and stores is the same: the arguments for
+ // the builtin are the same as the arguments for the intrinsic.
+ // Load:
+ // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
+ // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
+ // Store:
+ // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
+ // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
+ SmallVector<llvm::Value*,5> Ops = { Base };
+ for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
- llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
- llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
- NewBase->getType()->getPointerTo());
+ // The load intrinsics generate two results (Value, NewBase), stores
+ // generate one (NewBase). The new base address needs to be stored.
+ llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
+ : Result;
+ llvm::Value *LV = Builder.CreateBitCast(
+ EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo());
Address Dest = EmitPointerWithAlignment(E->getArg(0));
- // The intrinsic generates two results. The new value for the base pointer
- // needs to be stored.
- Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
- return Builder.CreateExtractValue(Result, 0);
- };
-
- auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
- // The base pointer is passed by address, so it needs to be loaded.
- Address BP = EmitPointerWithAlignment(E->getArg(0));
- BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
- BP.getAlignment());
- llvm::Value *Base = Builder.CreateLoad(BP);
- // Operands are Base, Increment, Modifier, Value, Start.
- if (HasImm)
- Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
- else
- Ops = { Base, EmitScalarExpr(E->getArg(1)),
- EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
-
- llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
- llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
- NewBase->getType()->getPointerTo());
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- // The intrinsic generates one result, which is the new value for the base
- // pointer. It needs to be stored.
- return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
+ llvm::Value *RetVal =
+ Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
+ if (IsLoad)
+ RetVal = Builder.CreateExtractValue(Result, 0);
+ return RetVal;
};
// Handle the conversion of bit-reverse load intrinsics to bit code.
// The intrinsic call after this function only reads from memory and the
// write to memory is dealt by the store instruction.
- auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
+ auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
// The intrinsic generates one result, which is the new value for the base
// pointer. It needs to be returned. The result of the load instruction is
// passed to intrinsic by address, so the value needs to be stored.
@@ -14820,9 +16708,9 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
// Operands are Base, Dest, Modifier.
// The intrinsic format in LLVM IR is defined as
// { ValueType, i8* } (i8*, i32).
- Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
+ llvm::Value *Result = Builder.CreateCall(
+ CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
// The value needs to be stored as the variable is passed by reference.
llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
@@ -14838,95 +16726,65 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Result, 1);
};
+ auto V2Q = [this, VecLen] (llvm::Value *Vec) {
+ Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
+ : Intrinsic::hexagon_V6_vandvrt;
+ return Builder.CreateCall(CGM.getIntrinsic(ID),
+ {Vec, Builder.getInt32(-1)});
+ };
+ auto Q2V = [this, VecLen] (llvm::Value *Pred) {
+ Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
+ : Intrinsic::hexagon_V6_vandqrt;
+ return Builder.CreateCall(CGM.getIntrinsic(ID),
+ {Pred, Builder.getInt32(-1)});
+ };
+
switch (BuiltinID) {
+ // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
+ // and the corresponding C/C++ builtins use loads/stores to update
+ // the predicate.
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
- case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
- Address Dest = EmitPointerWithAlignment(E->getArg(2));
- unsigned Size;
- if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
- Size = 512;
- ID = Intrinsic::hexagon_V6_vaddcarry;
- } else {
- Size = 1024;
- ID = Intrinsic::hexagon_V6_vaddcarry_128B;
- }
- Dest = Builder.CreateBitCast(Dest,
- llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
- LoadInst *QLd = Builder.CreateLoad(Dest);
- Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
- llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
- Vprd->getType()->getPointerTo(0));
- Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
- return Builder.CreateExtractValue(Result, 0);
- }
+ case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
- Address Dest = EmitPointerWithAlignment(E->getArg(2));
- unsigned Size;
- if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
- Size = 512;
- ID = Intrinsic::hexagon_V6_vsubcarry;
- } else {
- Size = 1024;
- ID = Intrinsic::hexagon_V6_vsubcarry_128B;
- }
- Dest = Builder.CreateBitCast(Dest,
- llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
- LoadInst *QLd = Builder.CreateLoad(Dest);
- Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
- llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
- Vprd->getType()->getPointerTo(0));
- Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+ // Get the type from the 0-th argument.
+ llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
+ Address PredAddr = Builder.CreateBitCast(
+ EmitPointerWithAlignment(E->getArg(2)), VecType->getPointerTo(0));
+ llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
+ {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
+
+ llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
+ Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
+ PredAddr.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
+
case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
- return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
- return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
- return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
- return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
- return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
+ return MakeCircOp(ID, /*IsLoad=*/true);
case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
- return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
- return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
- return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
- return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
- return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
- return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
- return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
- return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
- return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
- return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
+ return MakeCircOp(ID, /*IsLoad=*/false);
case Hexagon::BI__builtin_brev_ldub:
return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
case Hexagon::BI__builtin_brev_ldb:
@@ -14939,8 +16797,40 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
case Hexagon::BI__builtin_brev_ldd:
return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
- default:
- break;
+
+ default: {
+ if (ID == Intrinsic::not_intrinsic)
+ return nullptr;
+
+ auto IsVectorPredTy = [](llvm::Type *T) {
+ return T->isVectorTy() &&
+ cast<llvm::VectorType>(T)->getElementType()->isIntegerTy(1);
+ };
+
+ llvm::Function *IntrFn = CGM.getIntrinsic(ID);
+ llvm::FunctionType *IntrTy = IntrFn->getFunctionType();
+ SmallVector<llvm::Value*,4> Ops;
+ for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) {
+ llvm::Type *T = IntrTy->getParamType(i);
+ const Expr *A = E->getArg(i);
+ if (IsVectorPredTy(T)) {
+ // There will be an implicit cast to a boolean vector. Strip it.
+ if (auto *Cast = dyn_cast<ImplicitCastExpr>(A)) {
+ if (Cast->getCastKind() == CK_BitCast)
+ A = Cast->getSubExpr();
+ }
+ Ops.push_back(V2Q(EmitScalarExpr(A)));
+ } else {
+ Ops.push_back(EmitScalarExpr(A));
+ }
+ }
+
+ llvm::Value *Call = Builder.CreateCall(IntrFn, Ops);
+ if (IsVectorPredTy(IntrTy->getReturnType()))
+ Call = Q2V(Call);
+
+ return Call;
+ } // default
} // switch
return nullptr;
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 5c5cbaff0252..baf2c79cc2b6 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -50,7 +50,7 @@ private:
struct VarInfo {
llvm::GlobalVariable *Var;
const VarDecl *D;
- unsigned Flag;
+ DeviceVarFlags Flags;
};
llvm::SmallVector<VarInfo, 16> DeviceVars;
/// Keeps track of variable containing handle of GPU binary. Populated by
@@ -117,23 +117,38 @@ private:
void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
- std::string getDeviceSideName(const Decl *ND);
+ std::string getDeviceSideName(const NamedDecl *ND) override;
public:
CGNVCUDARuntime(CodeGenModule &CGM);
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
- unsigned Flags) override {
- DeviceVars.push_back({&Var, VD, Flags});
+ bool Extern, bool Constant) override {
+ DeviceVars.push_back({&Var,
+ VD,
+ {DeviceVarFlags::Variable, Extern, Constant,
+ /*Normalized*/ false, /*Type*/ 0}});
+ }
+ void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
+ bool Extern, int Type) override {
+ DeviceVars.push_back({&Var,
+ VD,
+ {DeviceVarFlags::Surface, Extern, /*Constant*/ false,
+ /*Normalized*/ false, Type}});
+ }
+ void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
+ bool Extern, int Type, bool Normalized) override {
+ DeviceVars.push_back({&Var,
+ VD,
+ {DeviceVarFlags::Texture, Extern, /*Constant*/ false,
+ Normalized, Type}});
}
/// Creates module constructor function
llvm::Function *makeModuleCtorFunction() override;
/// Creates module destructor function
llvm::Function *makeModuleDtorFunction() override;
- /// Construct and return the stub name of a kernel.
- std::string getDeviceStubName(llvm::StringRef Name) const override;
};
}
@@ -204,40 +219,30 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
return llvm::FunctionType::get(VoidTy, Params, false);
}
-std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) {
- auto *ND = cast<const NamedDecl>(D);
+std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
+ GlobalDecl GD;
+ // D could be either a kernel or a variable.
+ if (auto *FD = dyn_cast<FunctionDecl>(ND))
+ GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
+ else
+ GD = GlobalDecl(ND);
std::string DeviceSideName;
if (DeviceMC->shouldMangleDeclName(ND)) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
- DeviceMC->mangleName(ND, Out);
- DeviceSideName = Out.str();
+ DeviceMC->mangleName(GD, Out);
+ DeviceSideName = std::string(Out.str());
} else
- DeviceSideName = ND->getIdentifier()->getName();
+ DeviceSideName = std::string(ND->getIdentifier()->getName());
return DeviceSideName;
}
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
- // Ensure either we have different ABIs between host and device compilations,
- // says host compilation following MSVC ABI but device compilation follows
- // Itanium C++ ABI or, if they follow the same ABI, kernel names after
- // mangling should be the same after name stubbing. The later checking is
- // very important as the device kernel name being mangled in host-compilation
- // is used to resolve the device binaries to be executed. Inconsistent naming
- // result in undefined behavior. Even though we cannot check that naming
- // directly between host- and device-compilations, the host- and
- // device-mangling in host compilation could help catching certain ones.
- assert((CGF.CGM.getContext().getAuxTargetInfo() &&
- (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
- CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
- getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
- CGF.CurFn->getName());
-
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
- CGF.getLangOpts().HIPUseNewLaunchAPI)
+ (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
@@ -418,7 +423,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// each emitted kernel.
llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
for (auto &&I : EmittedKernels) {
- llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D));
+ llvm::Constant *KernelName =
+ makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
@@ -434,30 +440,70 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
Builder.CreateCall(RegisterFunc, Args);
}
+ llvm::Type *VarSizeTy = IntTy;
+ // For HIP or CUDA 9.0+, device variable size is type of `size_t`.
+ if (CGM.getLangOpts().HIP ||
+ ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90)
+ VarSizeTy = SizeTy;
+
// void __cudaRegisterVar(void **, char *, char *, const char *,
// int, int, int, int)
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
- CharPtrTy, IntTy, IntTy,
+ CharPtrTy, IntTy, VarSizeTy,
IntTy, IntTy};
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, RegisterVarParams, false),
+ llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
addUnderscoredPrefixToName("RegisterVar"));
+ // void __cudaRegisterSurface(void **, const struct surfaceReference *,
+ // const void **, const char *, int, int);
+ llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(
+ VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
+ false),
+ addUnderscoredPrefixToName("RegisterSurface"));
+ // void __cudaRegisterTexture(void **, const struct textureReference *,
+ // const void **, const char *, int, int, int)
+ llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(
+ VoidTy,
+ {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
+ false),
+ addUnderscoredPrefixToName("RegisterTexture"));
for (auto &&Info : DeviceVars) {
llvm::GlobalVariable *Var = Info.Var;
- unsigned Flags = Info.Flag;
llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
- uint64_t VarSize =
- CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
- llvm::Value *Args[] = {
- &GpuBinaryHandlePtr,
- Builder.CreateBitCast(Var, VoidPtrTy),
- VarName,
- VarName,
- llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
- llvm::ConstantInt::get(IntTy, VarSize),
- llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
- llvm::ConstantInt::get(IntTy, 0)};
- Builder.CreateCall(RegisterVar, Args);
+ switch (Info.Flags.getKind()) {
+ case DeviceVarFlags::Variable: {
+ uint64_t VarSize =
+ CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
+ llvm::Value *Args[] = {
+ &GpuBinaryHandlePtr,
+ Builder.CreateBitCast(Var, VoidPtrTy),
+ VarName,
+ VarName,
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
+ llvm::ConstantInt::get(VarSizeTy, VarSize),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
+ llvm::ConstantInt::get(IntTy, 0)};
+ Builder.CreateCall(RegisterVar, Args);
+ break;
+ }
+ case DeviceVarFlags::Surface:
+ Builder.CreateCall(
+ RegisterSurf,
+ {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
+ VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
+ break;
+ case DeviceVarFlags::Texture:
+ Builder.CreateCall(
+ RegisterTex,
+ {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
+ VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
+ break;
+ }
}
Builder.CreateRetVoid();
@@ -551,8 +597,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
if (CudaGpuBinary) {
// If fatbin is available from early finalization, create a string
// literal containing the fat binary loaded from the given file.
- FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
- FatbinConstantName, 8);
+ FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()),
+ "", FatbinConstantName, 8);
} else {
// If fatbin is not available, create an external symbol
// __hip_fatbin in section .hip_fatbin. The external symbol is supposed
@@ -586,7 +632,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// For CUDA, create a string literal containing the fat binary loaded from
// the given file.
- FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
FatbinConstantName, 8);
FatMagic = CudaFatMagic;
}
@@ -691,8 +737,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
- llvm::Constant *ModuleIDConstant =
- makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
+ llvm::Constant *ModuleIDConstant = makeConstantString(
+ std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
// Create an alias for the FatbinWrapper that nvcc will look for.
llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
@@ -797,12 +843,6 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
return ModuleDtorFunc;
}
-std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
- if (!CGM.getLangOpts().HIP)
- return Name;
- return (Name + ".stub").str();
-}
-
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
return new CGNVCUDARuntime(CGM);
}
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index e548a3a546d4..19e70a2022a5 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -25,6 +25,7 @@ class GlobalVariable;
namespace clang {
class CUDAKernelCallExpr;
+class NamedDecl;
class VarDecl;
namespace CodeGen {
@@ -41,9 +42,30 @@ protected:
public:
// Global variable properties that must be passed to CUDA runtime.
- enum DeviceVarFlags {
- ExternDeviceVar = 0x01, // extern
- ConstantDeviceVar = 0x02, // __constant__
+ class DeviceVarFlags {
+ public:
+ enum DeviceVarKind {
+ Variable, // Variable
+ Surface, // Builtin surface
+ Texture, // Builtin texture
+ };
+
+ private:
+ unsigned Kind : 2;
+ unsigned Extern : 1;
+ unsigned Constant : 1; // Constant variable.
+ unsigned Normalized : 1; // Normalized texture.
+ int SurfTexType; // Type of surface/texutre.
+
+ public:
+ DeviceVarFlags(DeviceVarKind K, bool E, bool C, bool N, int T)
+ : Kind(K), Extern(E), Constant(C), Normalized(N), SurfTexType(T) {}
+
+ DeviceVarKind getKind() const { return static_cast<DeviceVarKind>(Kind); }
+ bool isExtern() const { return Extern; }
+ bool isConstant() const { return Constant; }
+ bool isNormalized() const { return Normalized; }
+ int getSurfTexType() const { return SurfTexType; }
};
CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
@@ -56,7 +78,11 @@ public:
/// Emits a kernel launch stub.
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
- unsigned Flags) = 0;
+ bool Extern, bool Constant) = 0;
+ virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
+ bool Extern, int Type) = 0;
+ virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
+ bool Extern, int Type, bool Normalized) = 0;
/// Constructs and returns a module initialization function or nullptr if it's
/// not needed. Must be called after all kernels have been emitted.
@@ -66,8 +92,9 @@ public:
/// Must be called after ModuleCtorFunction
virtual llvm::Function *makeModuleDtorFunction() = 0;
- /// Construct and return the stub name of a kernel.
- virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0;
+ /// Returns function or variable name on device side even if the current
+ /// compilation is for host.
+ virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
};
/// Creates an instance of a CUDA runtime class.
diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp
index 1928e0df3809..a4bd2c6d5da0 100644
--- a/clang/lib/CodeGen/CGCXX.cpp
+++ b/clang/lib/CodeGen/CGCXX.cpp
@@ -263,8 +263,8 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
AddressPoint.AddressPointIndex;
llvm::Value *VFuncPtr =
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
- llvm::Value *VFunc =
- CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
+ llvm::Value *VFunc = CGF.Builder.CreateAlignedLoad(
+ VFuncPtr, llvm::Align(CGF.PointerAlignInBytes));
CGCallee Callee(GD, VFunc);
return Callee;
}
diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp
index 7ada4032b3ee..65327a2435b5 100644
--- a/clang/lib/CodeGen/CGCXXABI.cpp
+++ b/clang/lib/CodeGen/CGCXXABI.cpp
@@ -156,6 +156,8 @@ void CGCXXABI::setCXXABIThisValue(CodeGenFunction &CGF, llvm::Value *ThisPtr) {
void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
RValue RV, QualType ResultType) {
+ assert(!CGF.hasAggregateEvaluationKind(ResultType) &&
+ "cannot handle aggregates");
CGF.EmitReturnOfRValue(RV, ResultType);
}
@@ -313,3 +315,20 @@ CatchTypeInfo CGCXXABI::getCatchAllTypeInfo() {
std::vector<CharUnits> CGCXXABI::getVBPtrOffsets(const CXXRecordDecl *RD) {
return std::vector<CharUnits>();
}
+
+CGCXXABI::AddedStructorArgCounts CGCXXABI::addImplicitConstructorArgs(
+ CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
+ bool ForVirtualBase, bool Delegating, CallArgList &Args) {
+ AddedStructorArgs AddedArgs =
+ getImplicitConstructorArgs(CGF, D, Type, ForVirtualBase, Delegating);
+ for (size_t i = 0; i < AddedArgs.Prefix.size(); ++i) {
+ Args.insert(Args.begin() + 1 + i,
+ CallArg(RValue::get(AddedArgs.Prefix[i].Value),
+ AddedArgs.Prefix[i].Type));
+ }
+ for (const auto &arg : AddedArgs.Suffix) {
+ Args.add(RValue::get(arg.Value), arg.Type);
+ }
+ return AddedStructorArgCounts(AddedArgs.Prefix.size(),
+ AddedArgs.Suffix.size());
+}
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index bff49be7a3c4..f5b3fc13bbbd 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -16,6 +16,7 @@
#include "CodeGenFunction.h"
#include "clang/Basic/LLVM.h"
+#include "clang/CodeGen/CodeGenABITypes.h"
namespace llvm {
class Constant;
@@ -107,6 +108,8 @@ public:
virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; }
+ virtual bool useSinitAndSterm() const { return false; }
+
/// Returns true if the target allows calling a function through a pointer
/// with a different signature than the actual function (or equivalently,
/// bitcasting a function or function pointer to a different function type).
@@ -287,24 +290,44 @@ public:
/// Emit constructor variants required by this ABI.
virtual void EmitCXXConstructors(const CXXConstructorDecl *D) = 0;
- /// Notes how many arguments were added to the beginning (Prefix) and ending
- /// (Suffix) of an arg list.
+ /// Additional implicit arguments to add to the beginning (Prefix) and end
+ /// (Suffix) of a constructor / destructor arg list.
///
- /// Note that Prefix actually refers to the number of args *after* the first
- /// one: `this` arguments always come first.
+ /// Note that Prefix should actually be inserted *after* the first existing
+ /// arg; `this` arguments always come first.
struct AddedStructorArgs {
+ struct Arg {
+ llvm::Value *Value;
+ QualType Type;
+ };
+ SmallVector<Arg, 1> Prefix;
+ SmallVector<Arg, 1> Suffix;
+ AddedStructorArgs() = default;
+ AddedStructorArgs(SmallVector<Arg, 1> P, SmallVector<Arg, 1> S)
+ : Prefix(std::move(P)), Suffix(std::move(S)) {}
+ static AddedStructorArgs prefix(SmallVector<Arg, 1> Args) {
+ return {std::move(Args), {}};
+ }
+ static AddedStructorArgs suffix(SmallVector<Arg, 1> Args) {
+ return {{}, std::move(Args)};
+ }
+ };
+
+ /// Similar to AddedStructorArgs, but only notes the number of additional
+ /// arguments.
+ struct AddedStructorArgCounts {
unsigned Prefix = 0;
unsigned Suffix = 0;
- AddedStructorArgs() = default;
- AddedStructorArgs(unsigned P, unsigned S) : Prefix(P), Suffix(S) {}
- static AddedStructorArgs prefix(unsigned N) { return {N, 0}; }
- static AddedStructorArgs suffix(unsigned N) { return {0, N}; }
+ AddedStructorArgCounts() = default;
+ AddedStructorArgCounts(unsigned P, unsigned S) : Prefix(P), Suffix(S) {}
+ static AddedStructorArgCounts prefix(unsigned N) { return {N, 0}; }
+ static AddedStructorArgCounts suffix(unsigned N) { return {0, N}; }
};
/// Build the signature of the given constructor or destructor variant by
/// adding any required parameters. For convenience, ArgTys has been
/// initialized with the type of 'this'.
- virtual AddedStructorArgs
+ virtual AddedStructorArgCounts
buildStructorSignature(GlobalDecl GD,
SmallVectorImpl<CanQualType> &ArgTys) = 0;
@@ -365,14 +388,26 @@ public:
/// Emit the ABI-specific prolog for the function.
virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0;
+ virtual AddedStructorArgs
+ getImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating) = 0;
+
/// Add any ABI-specific implicit arguments needed to call a constructor.
///
/// \return The number of arguments added at the beginning and end of the
/// call, which is typically zero or one.
- virtual AddedStructorArgs
+ AddedStructorArgCounts
addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
CXXCtorType Type, bool ForVirtualBase,
- bool Delegating, CallArgList &Args) = 0;
+ bool Delegating, CallArgList &Args);
+
+ /// Get the implicit (second) parameter that comes after the "this" pointer,
+ /// or nullptr if there is isn't one.
+ virtual llvm::Value *
+ getCXXDestructorImplicitParam(CodeGenFunction &CGF,
+ const CXXDestructorDecl *DD, CXXDtorType Type,
+ bool ForVirtualBase, bool Delegating) = 0;
/// Emit the destructor call.
virtual void EmitDestructorCall(CodeGenFunction &CGF,
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e4803fde230f..e8235c775d8f 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -16,6 +16,7 @@
#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGCleanup.h"
+#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
@@ -325,7 +326,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
if (PassParams)
appendParameterTypes(*this, argTypes, paramInfos, FTP);
- CGCXXABI::AddedStructorArgs AddedArgs =
+ CGCXXABI::AddedStructorArgCounts AddedArgs =
TheCXXABI.buildStructorSignature(GD, argTypes);
if (!paramInfos.empty()) {
// Note: prefix implies after the first param.
@@ -815,6 +816,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
FI->ASTCallingConvention = info.getCC();
FI->InstanceMethod = instanceMethod;
FI->ChainCall = chainCall;
+ FI->CmseNSCall = info.getCmseNSCall();
FI->NoReturn = info.getNoReturn();
FI->ReturnsRetained = info.getProducesResult();
FI->NoCallerSavedRegs = info.getNoCallerSavedRegs();
@@ -1014,8 +1016,8 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF,
}
}
-void CodeGenFunction::ExpandTypeFromArgs(
- QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) {
+void CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
+ llvm::Function::arg_iterator &AI) {
assert(LV.isSimple() &&
"Unexpected non-simple lvalue during struct expansion.");
@@ -1044,17 +1046,17 @@ void CodeGenFunction::ExpandTypeFromArgs(
ExpandTypeFromArgs(FD->getType(), SubLV, AI);
}
} else if (isa<ComplexExpansion>(Exp.get())) {
- auto realValue = *AI++;
- auto imagValue = *AI++;
+ auto realValue = &*AI++;
+ auto imagValue = &*AI++;
EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /*init*/ true);
} else {
// Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a
// primitive store.
assert(isa<NoExpansion>(Exp.get()));
if (LV.isBitField())
- EmitStoreThroughLValue(RValue::get(*AI++), LV);
+ EmitStoreThroughLValue(RValue::get(&*AI++), LV);
else
- EmitStoreOfScalar(*AI++, LV);
+ EmitStoreOfScalar(&*AI++, LV);
}
}
@@ -1232,7 +1234,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
Src = EnterStructPointerForCoercedAccess(Src, SrcSTy, DstSize, CGF);
- SrcTy = Src.getType()->getElementType();
+ SrcTy = Src.getElementType();
}
uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -1260,11 +1262,9 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// Otherwise do coercion through memory. This is stupid, but simple.
Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
- Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
- Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
- CGF.Builder.CreateMemCpy(Casted, SrcCasted,
- llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
- false);
+ CGF.Builder.CreateMemCpy(Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
+ Src.getPointer(), Src.getAlignment().getAsAlign(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize));
return CGF.Builder.CreateLoad(Tmp);
}
@@ -1272,18 +1272,17 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// store the elements rather than the aggregate to be more friendly to
// fast-isel.
// FIXME: Do we need to recurse here?
-static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
- Address Dest, bool DestIsVolatile) {
+void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
+ bool DestIsVolatile) {
// Prefer scalar stores to first-class aggregate stores.
- if (llvm::StructType *STy =
- dyn_cast<llvm::StructType>(Val->getType())) {
+ if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i);
- llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
- CGF.Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
+ Address EltPtr = Builder.CreateStructGEP(Dest, i);
+ llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
+ Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
}
} else {
- CGF.Builder.CreateStore(Val, Dest, DestIsVolatile);
+ Builder.CreateStore(Val, Dest, DestIsVolatile);
}
}
@@ -1298,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src,
bool DstIsVolatile,
CodeGenFunction &CGF) {
llvm::Type *SrcTy = Src->getType();
- llvm::Type *DstTy = Dst.getType()->getElementType();
+ llvm::Type *DstTy = Dst.getElementType();
if (SrcTy == DstTy) {
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
@@ -1308,7 +1307,7 @@ static void CreateCoercedStore(llvm::Value *Src,
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy, SrcSize, CGF);
- DstTy = Dst.getType()->getElementType();
+ DstTy = Dst.getElementType();
}
llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
@@ -1334,7 +1333,7 @@ static void CreateCoercedStore(llvm::Value *Src,
// If store is legal, just bitcast the src pointer.
if (SrcSize <= DstSize) {
Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
- BuildAggStore(CGF, Src, Dst, DstIsVolatile);
+ CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
// simple.
@@ -1347,11 +1346,9 @@ static void CreateCoercedStore(llvm::Value *Src,
// to that information.
Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
CGF.Builder.CreateStore(Src, Tmp);
- Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
- Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
- CGF.Builder.CreateMemCpy(DstCasted, Casted,
- llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
- false);
+ CGF.Builder.CreateMemCpy(Dst.getPointer(), Dst.getAlignment().getAsAlign(),
+ Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, DstSize));
}
}
@@ -1702,8 +1699,9 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
-void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
- bool AttrOnCallSite,
+void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
+ bool HasOptnone,
+ bool AttrOnCallSite,
llvm::AttrBuilder &FuncAttrs) {
// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
if (!HasOptnone) {
@@ -1746,13 +1744,20 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
if (CodeGenOpts.NullPointerIsValid)
- FuncAttrs.addAttribute("null-pointer-is-valid", "true");
- if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::Invalid)
+ FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid);
+
+ if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE())
FuncAttrs.addAttribute("denormal-fp-math",
- llvm::denormalModeName(CodeGenOpts.FPDenormalMode));
+ CodeGenOpts.FPDenormalMode.str());
+ if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) {
+ FuncAttrs.addAttribute(
+ "denormal-fp-math-f32",
+ CodeGenOpts.FP32DenormalMode.str());
+ }
FuncAttrs.addAttribute("no-trapping-math",
- llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+ llvm::toStringRef(LangOpts.getFPExceptionMode() ==
+ LangOptions::FPE_Ignore));
// Strict (compliant) code is the default, so only add this attribute to
// indicate that we are trying to workaround a problem case.
@@ -1762,25 +1767,21 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
// TODO: Are these all needed?
// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
FuncAttrs.addAttribute("no-infs-fp-math",
- llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
+ llvm::toStringRef(LangOpts.NoHonorInfs));
FuncAttrs.addAttribute("no-nans-fp-math",
- llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
+ llvm::toStringRef(LangOpts.NoHonorNaNs));
FuncAttrs.addAttribute("unsafe-fp-math",
- llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
+ llvm::toStringRef(LangOpts.UnsafeFPMath));
FuncAttrs.addAttribute("use-soft-float",
llvm::toStringRef(CodeGenOpts.SoftFloat));
FuncAttrs.addAttribute("stack-protector-buffer-size",
llvm::utostr(CodeGenOpts.SSPBufferSize));
FuncAttrs.addAttribute("no-signed-zeros-fp-math",
- llvm::toStringRef(CodeGenOpts.NoSignedZeros));
+ llvm::toStringRef(LangOpts.NoSignedZero));
FuncAttrs.addAttribute(
"correctly-rounded-divide-sqrt-fp-math",
llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
- if (getLangOpts().OpenCL)
- FuncAttrs.addAttribute("denorms-are-zero",
- llvm::toStringRef(CodeGenOpts.FlushDenorm));
-
// TODO: Reciprocal estimate codegen options should apply to instructions?
const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals;
if (!Recips.empty())
@@ -1796,6 +1797,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute("stackrealign");
if (CodeGenOpts.Backchain)
FuncAttrs.addAttribute("backchain");
+ if (CodeGenOpts.EnableSegmentedStacks)
+ FuncAttrs.addAttribute("split-stack");
if (CodeGenOpts.SpeculativeLoadHardening)
FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
@@ -1813,10 +1816,6 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
// Exceptions aren't supported in CUDA device code.
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
-
- // Respect -fcuda-flush-denormals-to-zero.
- if (CodeGenOpts.FlushDenorm)
- FuncAttrs.addAttribute("nvptx-f32ftz", "true");
}
for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) {
@@ -1826,31 +1825,100 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
}
}
-void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
+void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {
llvm::AttrBuilder FuncAttrs;
- ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(),
- /* AttrOnCallSite = */ false, FuncAttrs);
+ getDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
+ /* AttrOnCallSite = */ false, FuncAttrs);
+ // TODO: call GetCPUAndFeaturesAttributes?
F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
}
+void CodeGenModule::addDefaultFunctionDefinitionAttributes(
+ llvm::AttrBuilder &attrs) {
+ getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false,
+ /*for call*/ false, attrs);
+ GetCPUAndFeaturesAttributes(GlobalDecl(), attrs);
+}
+
+static void addNoBuiltinAttributes(llvm::AttrBuilder &FuncAttrs,
+ const LangOptions &LangOpts,
+ const NoBuiltinAttr *NBA = nullptr) {
+ auto AddNoBuiltinAttr = [&FuncAttrs](StringRef BuiltinName) {
+ SmallString<32> AttributeName;
+ AttributeName += "no-builtin-";
+ AttributeName += BuiltinName;
+ FuncAttrs.addAttribute(AttributeName);
+ };
+
+ // First, handle the language options passed through -fno-builtin.
+ if (LangOpts.NoBuiltin) {
+ // -fno-builtin disables them all.
+ FuncAttrs.addAttribute("no-builtins");
+ return;
+ }
+
+ // Then, add attributes for builtins specified through -fno-builtin-<name>.
+ llvm::for_each(LangOpts.NoBuiltinFuncs, AddNoBuiltinAttr);
+
+ // Now, let's check the __attribute__((no_builtin("...")) attribute added to
+ // the source.
+ if (!NBA)
+ return;
+
+ // If there is a wildcard in the builtin names specified through the
+ // attribute, disable them all.
+ if (llvm::is_contained(NBA->builtinNames(), "*")) {
+ FuncAttrs.addAttribute("no-builtins");
+ return;
+ }
+
+ // And last, add the rest of the builtin names.
+ llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr);
+}
+
+/// Construct the IR attribute list of a function or call.
+///
+/// When adding an attribute, please consider where it should be handled:
+///
+/// - getDefaultFunctionAttributes is for attributes that are essentially
+/// part of the global target configuration (but perhaps can be
+/// overridden on a per-function basis). Adding attributes there
+/// will cause them to also be set in frontends that build on Clang's
+/// target-configuration logic, as well as for code defined in library
+/// modules such as CUDA's libdevice.
+///
+/// - ConstructAttributeList builds on top of getDefaultFunctionAttributes
+/// and adds declaration-specific, convention-specific, and
+/// frontend-specific logic. The last is of particular importance:
+/// attributes that restrict how the frontend generates code must be
+/// added here rather than getDefaultFunctionAttributes.
+///
void CodeGenModule::ConstructAttributeList(
StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo,
llvm::AttributeList &AttrList, unsigned &CallingConv, bool AttrOnCallSite) {
llvm::AttrBuilder FuncAttrs;
llvm::AttrBuilder RetAttrs;
+ // Collect function IR attributes from the CC lowering.
+ // We'll collect the paramete and result attributes later.
CallingConv = FI.getEffectiveCallingConvention();
if (FI.isNoReturn())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+ if (FI.isCmseNSCall())
+ FuncAttrs.addAttribute("cmse_nonsecure_call");
- // If we have information about the function prototype, we can learn
- // attributes from there.
+ // Collect function IR attributes from the callee prototype if we have one.
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
bool HasOptnone = false;
+ // The NoBuiltinAttr attached to the target FunctionDecl.
+ const NoBuiltinAttr *NBA = nullptr;
+
+ // Collect function IR attributes based on declaration-specific
+ // information.
// FIXME: handle sseregparm someday...
if (TargetDecl) {
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1869,6 +1937,13 @@ void CodeGenModule::ConstructAttributeList(
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
getContext(), FuncAttrs, Fn->getType()->getAs<FunctionProtoType>());
+ if (AttrOnCallSite && Fn->isReplaceableGlobalAllocationFunction()) {
+ // A sane operator new returns a non-aliasing pointer.
+ auto Kind = Fn->getDeclName().getCXXOverloadedOperator();
+ if (getCodeGenOpts().AssumeSaneOperatorNew &&
+ (Kind == OO_New || Kind == OO_Array_New))
+ RetAttrs.addAttribute(llvm::Attribute::NoAlias);
+ }
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn);
const bool IsVirtualCall = MD && MD->isVirtual();
// Don't use [[noreturn]], _Noreturn or [[no_builtin]] for a call to a
@@ -1876,22 +1951,7 @@ void CodeGenModule::ConstructAttributeList(
if (!(AttrOnCallSite && IsVirtualCall)) {
if (Fn->isNoReturn())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
-
- const auto *NBA = Fn->getAttr<NoBuiltinAttr>();
- bool HasWildcard = NBA && llvm::is_contained(NBA->builtinNames(), "*");
- if (getLangOpts().NoBuiltin || HasWildcard)
- FuncAttrs.addAttribute("no-builtins");
- else {
- auto AddNoBuiltinAttr = [&FuncAttrs](StringRef BuiltinName) {
- SmallString<32> AttributeName;
- AttributeName += "no-builtin-";
- AttributeName += BuiltinName;
- FuncAttrs.addAttribute(AttributeName);
- };
- llvm::for_each(getLangOpts().NoBuiltinFuncs, AddNoBuiltinAttr);
- if (NBA)
- llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr);
- }
+ NBA = Fn->getAttr<NoBuiltinAttr>();
}
}
@@ -1924,70 +1984,93 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),
NumElemsParam);
}
+
+ if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+ if (getLangOpts().OpenCLVersion <= 120) {
+ // OpenCL v1.2 Work groups are always uniform
+ FuncAttrs.addAttribute("uniform-work-group-size", "true");
+ } else {
+ // OpenCL v2.0 Work groups may be whether uniform or not.
+ // '-cl-uniform-work-group-size' compile option gets a hint
+ // to the compiler that the global work-size be a multiple of
+ // the work-group size specified to clEnqueueNDRangeKernel
+ // (i.e. work groups are uniform).
+ FuncAttrs.addAttribute("uniform-work-group-size",
+ llvm::toStringRef(CodeGenOpts.UniformWGSize));
+ }
+ }
}
- ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
+ // Attach "no-builtins" attributes to:
+ // * call sites: both `nobuiltin` and "no-builtins" or "no-builtin-<name>".
+ // * definitions: "no-builtins" or "no-builtin-<name>" only.
+ // The attributes can come from:
+ // * LangOpts: -ffreestanding, -fno-builtin, -fno-builtin-<name>
+ // * FunctionDecl attributes: __attribute__((no_builtin(...)))
+ addNoBuiltinAttributes(FuncAttrs, getLangOpts(), NBA);
+
+ // Collect function IR attributes based on global settiings.
+ getDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
- // This must run after constructing the default function attribute list
- // to ensure that the speculative load hardening attribute is removed
- // in the case where the -mspeculative-load-hardening flag was passed.
+ // Override some default IR attributes based on declaration-specific
+ // information.
if (TargetDecl) {
if (TargetDecl->hasAttr<NoSpeculativeLoadHardeningAttr>())
FuncAttrs.removeAttribute(llvm::Attribute::SpeculativeLoadHardening);
if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>())
FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
- }
-
- if (CodeGenOpts.EnableSegmentedStacks &&
- !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
- FuncAttrs.addAttribute("split-stack");
-
- // Add NonLazyBind attribute to function declarations when -fno-plt
- // is used.
- if (TargetDecl && CodeGenOpts.NoPLT) {
- if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
- if (!Fn->isDefined() && !AttrOnCallSite) {
- FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind);
+ if (TargetDecl->hasAttr<NoSplitStackAttr>())
+ FuncAttrs.removeAttribute("split-stack");
+
+ // Add NonLazyBind attribute to function declarations when -fno-plt
+ // is used.
+ // FIXME: what if we just haven't processed the function definition
+ // yet, or if it's an external definition like C99 inline?
+ if (CodeGenOpts.NoPLT) {
+ if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
+ if (!Fn->isDefined() && !AttrOnCallSite) {
+ FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind);
+ }
}
}
}
- if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
- if (getLangOpts().OpenCLVersion <= 120) {
- // OpenCL v1.2 Work groups are always uniform
- FuncAttrs.addAttribute("uniform-work-group-size", "true");
- } else {
- // OpenCL v2.0 Work groups may be whether uniform or not.
- // '-cl-uniform-work-group-size' compile option gets a hint
- // to the compiler that the global work-size be a multiple of
- // the work-group size specified to clEnqueueNDRangeKernel
- // (i.e. work groups are uniform).
- FuncAttrs.addAttribute("uniform-work-group-size",
- llvm::toStringRef(CodeGenOpts.UniformWGSize));
- }
- }
-
+ // Collect non-call-site function IR attributes from declaration-specific
+ // information.
if (!AttrOnCallSite) {
- bool DisableTailCalls = false;
+ if (TargetDecl && TargetDecl->hasAttr<CmseNSEntryAttr>())
+ FuncAttrs.addAttribute("cmse_nonsecure_entry");
+
+ // Whether tail calls are enabled.
+ auto shouldDisableTailCalls = [&] {
+ // Should this be honored in getDefaultFunctionAttributes?
+ if (CodeGenOpts.DisableTailCalls)
+ return true;
+
+ if (!TargetDecl)
+ return false;
- if (CodeGenOpts.DisableTailCalls)
- DisableTailCalls = true;
- else if (TargetDecl) {
if (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
TargetDecl->hasAttr<AnyX86InterruptAttr>())
- DisableTailCalls = true;
- else if (CodeGenOpts.NoEscapingBlockTailCalls) {
+ return true;
+
+ if (CodeGenOpts.NoEscapingBlockTailCalls) {
if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl))
if (!BD->doesNotEscape())
- DisableTailCalls = true;
+ return true;
}
- }
+ return false;
+ };
FuncAttrs.addAttribute("disable-tail-calls",
- llvm::toStringRef(DisableTailCalls));
+ llvm::toStringRef(shouldDisableTailCalls()));
+
+ // CPU/feature overrides. addDefaultFunctionDefinitionAttributes
+ // handles these separately to set them based on the global defaults.
GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs);
}
+ // Collect attributes from arguments and return values.
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
QualType RetTy = FI.getReturnType();
@@ -2024,11 +2107,16 @@ void CodeGenModule::ConstructAttributeList(
if (const auto *RefTy = RetTy->getAs<ReferenceType>()) {
QualType PTy = RefTy->getPointeeType();
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
- RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
- .getQuantity());
- else if (getContext().getTargetAddressSpace(PTy) == 0 &&
- !CodeGenOpts.NullPointerIsValid)
+ RetAttrs.addDereferenceableAttr(
+ getMinimumObjectSize(PTy).getQuantity());
+ if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ !CodeGenOpts.NullPointerIsValid)
RetAttrs.addAttribute(llvm::Attribute::NonNull);
+ if (PTy->isObjectType()) {
+ llvm::Align Alignment =
+ getNaturalPointeeTypeAlignment(RetTy).getAsAlign();
+ RetAttrs.addAlignmentAttr(Alignment);
+ }
}
bool hasUsedSRet = false;
@@ -2041,6 +2129,7 @@ void CodeGenModule::ConstructAttributeList(
hasUsedSRet = true;
if (RetAI.getInReg())
SRETAttrs.addAttribute(llvm::Attribute::InReg);
+ SRETAttrs.addAlignmentAttr(RetAI.getIndirectAlign().getQuantity());
ArgAttrs[IRFunctionArgs.getSRetArgNo()] =
llvm::AttributeSet::get(getLLVMContext(), SRETAttrs);
}
@@ -2134,11 +2223,16 @@ void CodeGenModule::ConstructAttributeList(
if (const auto *RefTy = ParamType->getAs<ReferenceType>()) {
QualType PTy = RefTy->getPointeeType();
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
- Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
- .getQuantity());
- else if (getContext().getTargetAddressSpace(PTy) == 0 &&
- !CodeGenOpts.NullPointerIsValid)
+ Attrs.addDereferenceableAttr(
+ getMinimumObjectSize(PTy).getQuantity());
+ if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ !CodeGenOpts.NullPointerIsValid)
Attrs.addAttribute(llvm::Attribute::NonNull);
+ if (PTy->isObjectType()) {
+ llvm::Align Alignment =
+ getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
+ Attrs.addAlignmentAttr(Alignment);
+ }
}
switch (FI.getExtParameterInfo(ArgNo).getABI()) {
@@ -2161,8 +2255,7 @@ void CodeGenModule::ConstructAttributeList(
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
auto info = getContext().getTypeInfoInChars(PTy);
Attrs.addDereferenceableAttr(info.first.getQuantity());
- Attrs.addAttribute(llvm::Attribute::getWithAlignment(
- getLLVMContext(), info.second.getAsAlign()));
+ Attrs.addAlignmentAttr(info.second.getAsAlign());
}
break;
}
@@ -2278,19 +2371,13 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// simplify.
ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI);
- // Flattened function arguments.
- SmallVector<llvm::Value *, 16> FnArgs;
- FnArgs.reserve(IRFunctionArgs.totalIRArgs());
- for (auto &Arg : Fn->args()) {
- FnArgs.push_back(&Arg);
- }
- assert(FnArgs.size() == IRFunctionArgs.totalIRArgs());
+ assert(Fn->arg_size() == IRFunctionArgs.totalIRArgs());
// If we're using inalloca, all the memory arguments are GEPs off of the last
// parameter, which is a pointer to the complete memory area.
Address ArgStruct = Address::invalid();
if (IRFunctionArgs.hasInallocaArg()) {
- ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()],
+ ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()),
FI.getArgStructAlignment());
assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo());
@@ -2298,7 +2385,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// Name the struct return parameter.
if (IRFunctionArgs.hasSRetArg()) {
- auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]);
+ auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo());
AI->setName("agg.result");
AI->addAttr(llvm::Attribute::NoAlias);
}
@@ -2340,13 +2427,17 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
auto FieldIndex = ArgI.getInAllocaFieldIndex();
Address V =
Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
+ if (ArgI.getInAllocaIndirect())
+ V = Address(Builder.CreateLoad(V),
+ getContext().getTypeAlignInChars(Ty));
ArgVals.push_back(ParamValue::forIndirect(V));
break;
}
case ABIArgInfo::Indirect: {
assert(NumIRArgs == 1);
- Address ParamAddr = Address(FnArgs[FirstIRArg], ArgI.getIndirectAlign());
+ Address ParamAddr =
+ Address(Fn->getArg(FirstIRArg), ArgI.getIndirectAlign());
if (!hasScalarEvaluationKind(Ty)) {
// Aggregates and complex variables are accessed by reference. All we
@@ -2361,10 +2452,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// FIXME: We should have a common utility for generating an aggregate
// copy.
CharUnits Size = getContext().getTypeSizeInChars(Ty);
- auto SizeVal = llvm::ConstantInt::get(IntPtrTy, Size.getQuantity());
- Address Dst = Builder.CreateBitCast(AlignedTemp, Int8PtrTy);
- Address Src = Builder.CreateBitCast(ParamAddr, Int8PtrTy);
- Builder.CreateMemCpy(Dst, Src, SizeVal, false);
+ Builder.CreateMemCpy(
+ AlignedTemp.getPointer(), AlignedTemp.getAlignment().getAsAlign(),
+ ParamAddr.getPointer(), ParamAddr.getAlignment().getAsAlign(),
+ llvm::ConstantInt::get(IntPtrTy, Size.getQuantity()));
V = AlignedTemp;
}
ArgVals.push_back(ParamValue::forIndirect(V));
@@ -2382,16 +2473,18 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
case ABIArgInfo::Extend:
case ABIArgInfo::Direct: {
-
- // If we have the trivial case, handle it with no muss and fuss.
- if (!isa<llvm::StructType>(ArgI.getCoerceToType()) &&
- ArgI.getCoerceToType() == ConvertType(Ty) &&
- ArgI.getDirectOffset() == 0) {
+ auto AI = Fn->getArg(FirstIRArg);
+ llvm::Type *LTy = ConvertType(Arg->getType());
+
+ // Prepare parameter attributes. So far, only attributes for pointer
+ // parameters are prepared. See
+ // http://llvm.org/docs/LangRef.html#paramattrs.
+ if (ArgI.getDirectOffset() == 0 && LTy->isPointerTy() &&
+ ArgI.getCoerceToType()->isPointerTy()) {
assert(NumIRArgs == 1);
- llvm::Value *V = FnArgs[FirstIRArg];
- auto AI = cast<llvm::Argument>(V);
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
+ // Set `nonnull` attribute if any.
if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
PVD->getFunctionScopeIndex()) &&
!CGM.getCodeGenOpts().NullPointerIsValid)
@@ -2411,9 +2504,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
ArrSize) {
llvm::AttrBuilder Attrs;
Attrs.addDereferenceableAttr(
- getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
+ getContext().getTypeSizeInChars(ETy).getQuantity() *
+ ArrSize);
AI->addAttrs(Attrs);
- } else if (getContext().getTargetAddressSpace(ETy) == 0 &&
+ } else if (getContext().getTargetInfo().getNullPointerValue(
+ ETy.getAddressSpace()) == 0 &&
!CGM.getCodeGenOpts().NullPointerIsValid) {
AI->addAttr(llvm::Attribute::NonNull);
}
@@ -2429,6 +2524,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
AI->addAttr(llvm::Attribute::NonNull);
}
+ // Set `align` attribute if any.
const auto *AVAttr = PVD->getAttr<AlignValueAttr>();
if (!AVAttr)
if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
@@ -2437,21 +2533,33 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// If alignment-assumption sanitizer is enabled, we do *not* add
// alignment attribute here, but emit normal alignment assumption,
// so the UBSAN check could function.
- llvm::Value *AlignmentValue =
- EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI =
- cast<llvm::ConstantInt>(AlignmentValue);
- unsigned Alignment = std::min((unsigned)AlignmentCI->getZExtValue(),
- +llvm::Value::MaximumAlignment);
- AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment));
+ cast<llvm::ConstantInt>(EmitScalarExpr(AVAttr->getAlignment()));
+ unsigned AlignmentInt =
+ AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment);
+ if (AI->getParamAlign().valueOrOne() < AlignmentInt) {
+ AI->removeAttr(llvm::Attribute::AttrKind::Alignment);
+ AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(
+ llvm::Align(AlignmentInt)));
+ }
}
}
+ // Set 'noalias' if an argument type has the `restrict` qualifier.
if (Arg->getType().isRestrictQualified())
AI->addAttr(llvm::Attribute::NoAlias);
+ }
+
+ // Prepare the argument value. If we have the trivial case, handle it
+ // with no muss and fuss.
+ if (!isa<llvm::StructType>(ArgI.getCoerceToType()) &&
+ ArgI.getCoerceToType() == ConvertType(Ty) &&
+ ArgI.getDirectOffset() == 0) {
+ assert(NumIRArgs == 1);
// LLVM expects swifterror parameters to be used in very restricted
// ways. Copy the value into a less-restricted temporary.
+ llvm::Value *V = AI;
if (FI.getExtParameterInfo(ArgNo).getABI()
== ParameterABI::SwiftErrorResult) {
QualType pointeeTy = Ty->getPointeeType();
@@ -2513,7 +2621,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(STy->getNumElements() == NumIRArgs);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- auto AI = FnArgs[FirstIRArg + i];
+ auto AI = Fn->getArg(FirstIRArg + i);
AI->setName(Arg->getName() + ".coerce" + Twine(i));
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
Builder.CreateStore(AI, EltPtr);
@@ -2526,7 +2634,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
} else {
// Simple case, just do a coerced store of the argument into the alloca.
assert(NumIRArgs == 1);
- auto AI = FnArgs[FirstIRArg];
+ auto AI = Fn->getArg(FirstIRArg);
AI->setName(Arg->getName() + ".coerce");
CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
}
@@ -2559,7 +2667,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
continue;
auto eltAddr = Builder.CreateStructGEP(alloca, i);
- auto elt = FnArgs[argIndex++];
+ auto elt = Fn->getArg(argIndex++);
Builder.CreateStore(elt, eltAddr);
}
assert(argIndex == FirstIRArg + NumIRArgs);
@@ -2574,11 +2682,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
LValue LV = MakeAddrLValue(Alloca, Ty);
ArgVals.push_back(ParamValue::forIndirect(Alloca));
- auto FnArgIter = FnArgs.begin() + FirstIRArg;
+ auto FnArgIter = Fn->arg_begin() + FirstIRArg;
ExpandTypeFromArgs(Ty, LV, FnArgIter);
- assert(FnArgIter == FnArgs.begin() + FirstIRArg + NumIRArgs);
+ assert(FnArgIter == Fn->arg_begin() + FirstIRArg + NumIRArgs);
for (unsigned i = 0, e = NumIRArgs; i != e; ++i) {
- auto AI = FnArgs[FirstIRArg + i];
+ auto AI = Fn->getArg(FirstIRArg + i);
AI->setName(Arg->getName() + "." + Twine(i));
}
break;
@@ -2655,10 +2763,10 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
bool doRetainAutorelease;
- if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints().objc_retain) {
+ if (call->getCalledOperand() == CGF.CGM.getObjCEntrypoints().objc_retain) {
doRetainAutorelease = true;
- } else if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints()
- .objc_retainAutoreleasedReturnValue) {
+ } else if (call->getCalledOperand() ==
+ CGF.CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue) {
doRetainAutorelease = false;
// If we emitted an assembly marker for this call (and the
@@ -2674,8 +2782,8 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
assert(prev);
}
assert(isa<llvm::CallInst>(prev));
- assert(cast<llvm::CallInst>(prev)->getCalledValue() ==
- CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker);
+ assert(cast<llvm::CallInst>(prev)->getCalledOperand() ==
+ CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker);
InstsToKill.push_back(prev);
}
} else {
@@ -2718,8 +2826,8 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
// Look for a retain call.
llvm::CallInst *retainCall =
dyn_cast<llvm::CallInst>(result->stripPointerCasts());
- if (!retainCall ||
- retainCall->getCalledValue() != CGF.CGM.getObjCEntrypoints().objc_retain)
+ if (!retainCall || retainCall->getCalledOperand() !=
+ CGF.CGM.getObjCEntrypoints().objc_retain)
return nullptr;
// Look for an ordinary load of 'self'.
@@ -2825,6 +2933,199 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
return store;
}
+// Helper functions for EmitCMSEClearRecord
+
+// Set the bits corresponding to a field having width `BitWidth` and located at
+// offset `BitOffset` (from the least significant bit) within a storage unit of
+// `Bits.size()` bytes. Each element of `Bits` corresponds to one target byte.
+// Use little-endian layout, i.e.`Bits[0]` is the LSB.
+static void setBitRange(SmallVectorImpl<uint64_t> &Bits, int BitOffset,
+ int BitWidth, int CharWidth) {
+ assert(CharWidth <= 64);
+ assert(static_cast<unsigned>(BitWidth) <= Bits.size() * CharWidth);
+
+ int Pos = 0;
+ if (BitOffset >= CharWidth) {
+ Pos += BitOffset / CharWidth;
+ BitOffset = BitOffset % CharWidth;
+ }
+
+ const uint64_t Used = (uint64_t(1) << CharWidth) - 1;
+ if (BitOffset + BitWidth >= CharWidth) {
+ Bits[Pos++] |= (Used << BitOffset) & Used;
+ BitWidth -= CharWidth - BitOffset;
+ BitOffset = 0;
+ }
+
+ while (BitWidth >= CharWidth) {
+ Bits[Pos++] = Used;
+ BitWidth -= CharWidth;
+ }
+
+ if (BitWidth > 0)
+ Bits[Pos++] |= (Used >> (CharWidth - BitWidth)) << BitOffset;
+}
+
+// Set the bits corresponding to a field having width `BitWidth` and located at
+// offset `BitOffset` (from the least significant bit) within a storage unit of
+// `StorageSize` bytes, located at `StorageOffset` in `Bits`. Each element of
+// `Bits` corresponds to one target byte. Use target endian layout.
+static void setBitRange(SmallVectorImpl<uint64_t> &Bits, int StorageOffset,
+ int StorageSize, int BitOffset, int BitWidth,
+ int CharWidth, bool BigEndian) {
+
+ SmallVector<uint64_t, 8> TmpBits(StorageSize);
+ setBitRange(TmpBits, BitOffset, BitWidth, CharWidth);
+
+ if (BigEndian)
+ std::reverse(TmpBits.begin(), TmpBits.end());
+
+ for (uint64_t V : TmpBits)
+ Bits[StorageOffset++] |= V;
+}
+
+static void setUsedBits(CodeGenModule &, QualType, int,
+ SmallVectorImpl<uint64_t> &);
+
+// Set the bits in `Bits`, which correspond to the value representations of
+// the actual members of the record type `RTy`. Note that this function does
+// not handle base classes, virtual tables, etc, since they cannot happen in
+// CMSE function arguments or return. The bit mask corresponds to the target
+// memory layout, i.e. it's endian dependent.
+static void setUsedBits(CodeGenModule &CGM, const RecordType *RTy, int Offset,
+ SmallVectorImpl<uint64_t> &Bits) {
+ ASTContext &Context = CGM.getContext();
+ int CharWidth = Context.getCharWidth();
+ const RecordDecl *RD = RTy->getDecl()->getDefinition();
+ const ASTRecordLayout &ASTLayout = Context.getASTRecordLayout(RD);
+ const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(RD);
+
+ int Idx = 0;
+ for (auto I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++Idx) {
+ const FieldDecl *F = *I;
+
+ if (F->isUnnamedBitfield() || F->isZeroLengthBitField(Context) ||
+ F->getType()->isIncompleteArrayType())
+ continue;
+
+ if (F->isBitField()) {
+ const CGBitFieldInfo &BFI = Layout.getBitFieldInfo(F);
+ setBitRange(Bits, Offset + BFI.StorageOffset.getQuantity(),
+ BFI.StorageSize / CharWidth, BFI.Offset,
+ BFI.Size, CharWidth,
+ CGM.getDataLayout().isBigEndian());
+ continue;
+ }
+
+ setUsedBits(CGM, F->getType(),
+ Offset + ASTLayout.getFieldOffset(Idx) / CharWidth, Bits);
+ }
+}
+
+// Set the bits in `Bits`, which correspond to the value representations of
+// the elements of an array type `ATy`.
+static void setUsedBits(CodeGenModule &CGM, const ConstantArrayType *ATy,
+ int Offset, SmallVectorImpl<uint64_t> &Bits) {
+ const ASTContext &Context = CGM.getContext();
+
+ QualType ETy = Context.getBaseElementType(ATy);
+ int Size = Context.getTypeSizeInChars(ETy).getQuantity();
+ SmallVector<uint64_t, 4> TmpBits(Size);
+ setUsedBits(CGM, ETy, 0, TmpBits);
+
+ for (int I = 0, N = Context.getConstantArrayElementCount(ATy); I < N; ++I) {
+ auto Src = TmpBits.begin();
+ auto Dst = Bits.begin() + Offset + I * Size;
+ for (int J = 0; J < Size; ++J)
+ *Dst++ |= *Src++;
+ }
+}
+
+// Set the bits in `Bits`, which correspond to the value representations of
+// the type `QTy`.
+static void setUsedBits(CodeGenModule &CGM, QualType QTy, int Offset,
+ SmallVectorImpl<uint64_t> &Bits) {
+ if (const auto *RTy = QTy->getAs<RecordType>())
+ return setUsedBits(CGM, RTy, Offset, Bits);
+
+ ASTContext &Context = CGM.getContext();
+ if (const auto *ATy = Context.getAsConstantArrayType(QTy))
+ return setUsedBits(CGM, ATy, Offset, Bits);
+
+ int Size = Context.getTypeSizeInChars(QTy).getQuantity();
+ if (Size <= 0)
+ return;
+
+ std::fill_n(Bits.begin() + Offset, Size,
+ (uint64_t(1) << Context.getCharWidth()) - 1);
+}
+
+static uint64_t buildMultiCharMask(const SmallVectorImpl<uint64_t> &Bits,
+ int Pos, int Size, int CharWidth,
+ bool BigEndian) {
+ assert(Size > 0);
+ uint64_t Mask = 0;
+ if (BigEndian) {
+ for (auto P = Bits.begin() + Pos, E = Bits.begin() + Pos + Size; P != E;
+ ++P)
+ Mask = (Mask << CharWidth) | *P;
+ } else {
+ auto P = Bits.begin() + Pos + Size, End = Bits.begin() + Pos;
+ do
+ Mask = (Mask << CharWidth) | *--P;
+ while (P != End);
+ }
+ return Mask;
+}
+
+// Emit code to clear the bits in a record, which aren't a part of any user
+// declared member, when the record is a function return.
+llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src,
+ llvm::IntegerType *ITy,
+ QualType QTy) {
+ assert(Src->getType() == ITy);
+ assert(ITy->getScalarSizeInBits() <= 64);
+
+ const llvm::DataLayout &DataLayout = CGM.getDataLayout();
+ int Size = DataLayout.getTypeStoreSize(ITy);
+ SmallVector<uint64_t, 4> Bits(Size);
+ setUsedBits(CGM, QTy->getAs<RecordType>(), 0, Bits);
+
+ int CharWidth = CGM.getContext().getCharWidth();
+ uint64_t Mask =
+ buildMultiCharMask(Bits, 0, Size, CharWidth, DataLayout.isBigEndian());
+
+ return Builder.CreateAnd(Src, Mask, "cmse.clear");
+}
+
+// Emit code to clear the bits in a record, which aren't a part of any user
+// declared member, when the record is a function argument.
+llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src,
+ llvm::ArrayType *ATy,
+ QualType QTy) {
+ const llvm::DataLayout &DataLayout = CGM.getDataLayout();
+ int Size = DataLayout.getTypeStoreSize(ATy);
+ SmallVector<uint64_t, 16> Bits(Size);
+ setUsedBits(CGM, QTy->getAs<RecordType>(), 0, Bits);
+
+ // Clear each element of the LLVM array.
+ int CharWidth = CGM.getContext().getCharWidth();
+ int CharsPerElt =
+ ATy->getArrayElementType()->getScalarSizeInBits() / CharWidth;
+ int MaskIndex = 0;
+ llvm::Value *R = llvm::UndefValue::get(ATy);
+ for (int I = 0, N = ATy->getArrayNumElements(); I != N; ++I) {
+ uint64_t Mask = buildMultiCharMask(Bits, MaskIndex, CharsPerElt, CharWidth,
+ DataLayout.isBigEndian());
+ MaskIndex += CharsPerElt;
+ llvm::Value *T0 = Builder.CreateExtractValue(Src, I);
+ llvm::Value *T1 = Builder.CreateAnd(T0, Mask, "cmse.clear");
+ R = Builder.CreateInsertValue(R, T1, I);
+ }
+
+ return R;
+}
+
void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
bool EmitRetDbgLoc,
SourceLocation EndLoc) {
@@ -2991,6 +3292,14 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
llvm::Instruction *Ret;
if (RV) {
+ if (CurFuncDecl && CurFuncDecl->hasAttr<CmseNSEntryAttr>()) {
+ // For certain return types, clear padding bits, as they may reveal
+ // sensitive information.
+ // Small struct/union types are passed as integers.
+ auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType());
+ if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType()))
+ RV = EmitCMSEClearRecord(RV, ITy, RetTy);
+ }
EmitReturnValueCheck(RV);
Ret = Builder.CreateRet(RV);
} else {
@@ -3006,6 +3315,11 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) {
if (!CurCodeDecl)
return;
+ // If the return block isn't reachable, neither is this check, so don't emit
+ // it.
+ if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty())
+ return;
+
ReturnsNonNullAttr *RetNNAttr = nullptr;
if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute))
RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>();
@@ -3026,7 +3340,7 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) {
} else {
if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl))
if (auto *TSI = DD->getTypeSourceInfo())
- if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>())
+ if (auto FTL = TSI->getTypeLoc().getAsAdjusted<FunctionTypeLoc>())
AttrLoc = FTL.getReturnLoc().findNullabilityLoc();
CheckKind = SanitizerKind::NullabilityReturn;
Handler = SanitizerHandler::NullabilityReturn;
@@ -3811,6 +4125,110 @@ void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
DeferredReplacements.push_back(std::make_pair(Old, New));
}
+namespace {
+
+/// Specify given \p NewAlign as the alignment of return value attribute. If
+/// such attribute already exists, re-set it to the maximal one of two options.
+LLVM_NODISCARD llvm::AttributeList
+maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx,
+ const llvm::AttributeList &Attrs,
+ llvm::Align NewAlign) {
+ llvm::Align CurAlign = Attrs.getRetAlignment().valueOrOne();
+ if (CurAlign >= NewAlign)
+ return Attrs;
+ llvm::Attribute AlignAttr = llvm::Attribute::getWithAlignment(Ctx, NewAlign);
+ return Attrs
+ .removeAttribute(Ctx, llvm::AttributeList::ReturnIndex,
+ llvm::Attribute::AttrKind::Alignment)
+ .addAttribute(Ctx, llvm::AttributeList::ReturnIndex, AlignAttr);
+}
+
+template <typename AlignedAttrTy> class AbstractAssumeAlignedAttrEmitter {
+protected:
+ CodeGenFunction &CGF;
+
+ /// We do nothing if this is, or becomes, nullptr.
+ const AlignedAttrTy *AA = nullptr;
+
+ llvm::Value *Alignment = nullptr; // May or may not be a constant.
+ llvm::ConstantInt *OffsetCI = nullptr; // Constant, hopefully zero.
+
+ AbstractAssumeAlignedAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl)
+ : CGF(CGF_) {
+ if (!FuncDecl)
+ return;
+ AA = FuncDecl->getAttr<AlignedAttrTy>();
+ }
+
+public:
+ /// If we can, materialize the alignment as an attribute on return value.
+ LLVM_NODISCARD llvm::AttributeList
+ TryEmitAsCallSiteAttribute(const llvm::AttributeList &Attrs) {
+ if (!AA || OffsetCI || CGF.SanOpts.has(SanitizerKind::Alignment))
+ return Attrs;
+ const auto *AlignmentCI = dyn_cast<llvm::ConstantInt>(Alignment);
+ if (!AlignmentCI)
+ return Attrs;
+ // We may legitimately have non-power-of-2 alignment here.
+ // If so, this is UB land, emit it via `@llvm.assume` instead.
+ if (!AlignmentCI->getValue().isPowerOf2())
+ return Attrs;
+ llvm::AttributeList NewAttrs = maybeRaiseRetAlignmentAttribute(
+ CGF.getLLVMContext(), Attrs,
+ llvm::Align(
+ AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment)));
+ AA = nullptr; // We're done. Disallow doing anything else.
+ return NewAttrs;
+ }
+
+ /// Emit alignment assumption.
+ /// This is a general fallback that we take if either there is an offset,
+ /// or the alignment is variable or we are sanitizing for alignment.
+ void EmitAsAnAssumption(SourceLocation Loc, QualType RetTy, RValue &Ret) {
+ if (!AA)
+ return;
+ CGF.emitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc,
+ AA->getLocation(), Alignment, OffsetCI);
+ AA = nullptr; // We're done. Disallow doing anything else.
+ }
+};
+
+/// Helper data structure to emit `AssumeAlignedAttr`.
+class AssumeAlignedAttrEmitter final
+ : public AbstractAssumeAlignedAttrEmitter<AssumeAlignedAttr> {
+public:
+ AssumeAlignedAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl)
+ : AbstractAssumeAlignedAttrEmitter(CGF_, FuncDecl) {
+ if (!AA)
+ return;
+ // It is guaranteed that the alignment/offset are constants.
+ Alignment = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AA->getAlignment()));
+ if (Expr *Offset = AA->getOffset()) {
+ OffsetCI = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(Offset));
+ if (OffsetCI->isNullValue()) // Canonicalize zero offset to no offset.
+ OffsetCI = nullptr;
+ }
+ }
+};
+
+/// Helper data structure to emit `AllocAlignAttr`.
+class AllocAlignAttrEmitter final
+ : public AbstractAssumeAlignedAttrEmitter<AllocAlignAttr> {
+public:
+ AllocAlignAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl,
+ const CallArgList &CallArgs)
+ : AbstractAssumeAlignedAttrEmitter(CGF_, FuncDecl) {
+ if (!AA)
+ return;
+ // Alignment may or may not be a constant, and that is okay.
+ Alignment = CallArgs[AA->getParamIndex().getLLVMIndex()]
+ .getRValue(CGF)
+ .getScalarVal();
+ }
+};
+
+} // namespace
+
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
const CGCallee &Callee,
ReturnValueSlot ReturnValue,
@@ -3829,7 +4247,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo);
const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
- if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
// We can only guarantee that a function is called from the correct
// context/function based on the appropriate target attributes,
// so only check in the case where we have both always_inline and target
@@ -3840,6 +4258,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
TargetDecl->hasAttr<TargetAttr>())
checkTargetFeatures(Loc, FD);
+ // Some architectures (such as x86-64) have the ABI changed based on
+ // attribute-target/features. Give them a chance to diagnose.
+ CGM.getTargetCodeGenInfo().checkFunctionCallABI(
+ CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), FD, CallArgs);
+ }
+
#ifndef NDEBUG
if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) {
// For an inalloca varargs function, we don't expect CallInfo to match the
@@ -3940,18 +4364,39 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(NumIRArgs == 0);
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
if (I->isAggregate()) {
- // Replace the placeholder with the appropriate argument slot GEP.
Address Addr = I->hasLValue()
? I->getKnownLValue().getAddress(*this)
: I->getKnownRValue().getAggregateAddress();
llvm::Instruction *Placeholder =
cast<llvm::Instruction>(Addr.getPointer());
- CGBuilderTy::InsertPoint IP = Builder.saveIP();
- Builder.SetInsertPoint(Placeholder);
- Addr =
- Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
- Builder.restoreIP(IP);
+
+ if (!ArgInfo.getInAllocaIndirect()) {
+ // Replace the placeholder with the appropriate argument slot GEP.
+ CGBuilderTy::InsertPoint IP = Builder.saveIP();
+ Builder.SetInsertPoint(Placeholder);
+ Addr = Builder.CreateStructGEP(ArgMemory,
+ ArgInfo.getInAllocaFieldIndex());
+ Builder.restoreIP(IP);
+ } else {
+ // For indirect things such as overaligned structs, replace the
+ // placeholder with a regular aggregate temporary alloca. Store the
+ // address of this alloca into the struct.
+ Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
+ Address ArgSlot = Builder.CreateStructGEP(
+ ArgMemory, ArgInfo.getInAllocaFieldIndex());
+ Builder.CreateStore(Addr.getPointer(), ArgSlot);
+ }
deferPlaceholderReplacement(Placeholder, Addr.getPointer());
+ } else if (ArgInfo.getInAllocaIndirect()) {
+ // Make a temporary alloca and store the address of it into the argument
+ // struct.
+ Address Addr = CreateMemTempWithoutCast(
+ I->Ty, getContext().getTypeAlignInChars(I->Ty),
+ "indirect-arg-temp");
+ I->copyInto(*this, Addr);
+ Address ArgSlot =
+ Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
+ Builder.CreateStore(Addr.getPointer(), ArgSlot);
} else {
// Store the RValue into the argument struct.
Address Addr =
@@ -4001,8 +4446,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
bool NeedCopy = false;
if (Addr.getAlignment() < Align &&
- llvm::getOrEnforceKnownAlignment(V, Align.getQuantity(), *TD) <
- Align.getQuantity()) {
+ llvm::getOrEnforceKnownAlignment(V, Align.getAsAlign(), *TD) <
+ Align.getAsAlign()) {
NeedCopy = true;
} else if (I->hasLValue()) {
auto LV = I->getKnownLValue();
@@ -4128,7 +4573,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
- llvm::Type *SrcTy = Src.getType()->getElementType();
+ llvm::Type *SrcTy = Src.getElementType();
uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
@@ -4156,8 +4601,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else {
// In the simple case, just pass the coerced loaded value.
assert(NumIRArgs == 1);
- IRCallArgs[FirstIRArg] =
- CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this);
+ llvm::Value *Load =
+ CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this);
+
+ if (CallInfo.isCmseNSCall()) {
+ // For certain parameter types, clear padding bits, as they may reveal
+ // sensitive information.
+ // Small struct/union types are passed as integer arrays.
+ auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
+ if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
+ Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
+ }
+ IRCallArgs[FirstIRArg] = Load;
}
break;
@@ -4328,8 +4783,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Update the largest vector width if any arguments have vector types.
for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
}
// Compute the calling convention and attributes.
@@ -4346,6 +4802,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::StrictFP);
+ // Add call-site nomerge attribute if exists.
+ if (InNoMergeAttributedStmt)
+ Attrs =
+ Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoMerge);
+
// Apply some call-site-specific attributes.
// TODO: work this into building the attribute set.
@@ -4378,8 +4840,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CannotThrow = true;
} else {
// Otherwise, nounwind call sites will never throw.
- CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoUnwind);
+ CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind);
}
// If we made a temporary, be sure to clean up after ourselves. Note that we
@@ -4402,6 +4863,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::StrictFP);
+ AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl);
+ Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);
+
+ AllocAlignAttrEmitter AllocAlignAttrEmitter(*this, TargetDecl, CallArgs);
+ Attrs = AllocAlignAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);
+
// Emit the actual call/invoke instruction.
llvm::CallBase *CI;
if (!InvokeDest) {
@@ -4437,8 +4904,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Update largest vector width from the return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of
@@ -4461,7 +4929,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Add metadata for calls to MSAllocator functions
if (getDebugInfo() && TargetDecl &&
TargetDecl->hasAttr<MSAllocatorAttr>())
- getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy, Loc);
+ getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy->getPointeeType(), Loc);
// 4. Finish the call.
@@ -4581,7 +5049,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
DestIsVolatile = false;
}
- BuildAggStore(*this, CI, DestPtr, DestIsVolatile);
+ EmitAggregateStore(CI, DestPtr, DestIsVolatile);
return RValue::getAggregate(DestPtr);
}
case TEK_Scalar: {
@@ -4620,22 +5088,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit the assume_aligned check on the return value.
if (Ret.isScalar() && TargetDecl) {
- if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) {
- llvm::Value *OffsetValue = nullptr;
- if (const auto *Offset = AA->getOffset())
- OffsetValue = EmitScalarExpr(Offset);
-
- llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment());
- llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
- EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
- AlignmentCI, OffsetValue);
- } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
- llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()]
- .getRValue(*this)
- .getScalarVal();
- EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
- AlignmentVal);
- }
+ AssumeAlignedAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret);
+ AllocAlignAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret);
}
// Explicitly call CallLifetimeEnd::Emit just to re-use the code even though
@@ -4643,6 +5097,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall)
LifetimeEnd.Emit(*this, /*Flags=*/{});
+ if (!ReturnValue.isExternallyDestructed() &&
+ RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct)
+ pushDestroy(QualType::DK_nontrivial_c_struct, Ret.getAggregateAddress(),
+ RetTy);
+
return Ret;
}
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index 34558be5adb1..509ca43a9784 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -16,6 +16,7 @@
#include "CGValue.h"
#include "EHScopeStack.h"
+#include "clang/AST/ASTFwd.h"
#include "clang/AST/CanonicalType.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/AST/Type.h"
@@ -357,27 +358,26 @@ class FunctionArgList : public SmallVector<const VarDecl *, 16> {};
/// ReturnValueSlot - Contains the address where the return value of a
/// function can be stored, and whether the address is volatile or not.
class ReturnValueSlot {
- llvm::PointerIntPair<llvm::Value *, 2, unsigned int> Value;
- CharUnits Alignment;
+ Address Addr = Address::invalid();
// Return value slot flags
- enum Flags {
- IS_VOLATILE = 0x1,
- IS_UNUSED = 0x2,
- };
+ unsigned IsVolatile : 1;
+ unsigned IsUnused : 1;
+ unsigned IsExternallyDestructed : 1;
public:
- ReturnValueSlot() {}
- ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false)
- : Value(Addr.isValid() ? Addr.getPointer() : nullptr,
- (IsVolatile ? IS_VOLATILE : 0) | (IsUnused ? IS_UNUSED : 0)),
- Alignment(Addr.isValid() ? Addr.getAlignment() : CharUnits::Zero()) {}
-
- bool isNull() const { return !getValue().isValid(); }
-
- bool isVolatile() const { return Value.getInt() & IS_VOLATILE; }
- Address getValue() const { return Address(Value.getPointer(), Alignment); }
- bool isUnused() const { return Value.getInt() & IS_UNUSED; }
+ ReturnValueSlot()
+ : IsVolatile(false), IsUnused(false), IsExternallyDestructed(false) {}
+ ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false,
+ bool IsExternallyDestructed = false)
+ : Addr(Addr), IsVolatile(IsVolatile), IsUnused(IsUnused),
+ IsExternallyDestructed(IsExternallyDestructed) {}
+
+ bool isNull() const { return !Addr.isValid(); }
+ bool isVolatile() const { return IsVolatile; }
+ Address getValue() const { return Addr; }
+ bool isUnused() const { return IsUnused; }
+ bool isExternallyDestructed() const { return IsExternallyDestructed; }
};
} // end namespace CodeGen
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 3f3825b76275..4d143e3e1bdf 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -35,20 +35,37 @@ using namespace CodeGen;
/// Return the best known alignment for an unknown pointer to a
/// particular class.
CharUnits CodeGenModule::getClassPointerAlignment(const CXXRecordDecl *RD) {
- if (!RD->isCompleteDefinition())
+ if (!RD->hasDefinition())
return CharUnits::One(); // Hopefully won't be used anywhere.
auto &layout = getContext().getASTRecordLayout(RD);
// If the class is final, then we know that the pointer points to an
// object of that type and can use the full alignment.
- if (RD->hasAttr<FinalAttr>()) {
+ if (RD->isEffectivelyFinal())
return layout.getAlignment();
// Otherwise, we have to assume it could be a subclass.
- } else {
- return layout.getNonVirtualAlignment();
- }
+ return layout.getNonVirtualAlignment();
+}
+
+/// Return the smallest possible amount of storage that might be allocated
+/// starting from the beginning of an object of a particular class.
+///
+/// This may be smaller than sizeof(RD) if RD has virtual base classes.
+CharUnits CodeGenModule::getMinimumClassObjectSize(const CXXRecordDecl *RD) {
+ if (!RD->hasDefinition())
+ return CharUnits::One();
+
+ auto &layout = getContext().getASTRecordLayout(RD);
+
+ // If the class is final, then we know that the pointer points to an
+ // object of that type and can use the full alignment.
+ if (RD->isEffectivelyFinal())
+ return layout.getSize();
+
+ // Otherwise, we have to assume it could be a subclass.
+ return std::max(layout.getNonVirtualSize(), CharUnits::One());
}
/// Return the best known alignment for a pointer to a virtual base,
@@ -138,8 +155,8 @@ CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
memberPtr, memberPtrType);
QualType memberType = memberPtrType->getPointeeType();
- CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo,
- TBAAInfo);
+ CharUnits memberAlign =
+ CGM.getNaturalTypeAlignment(memberType, BaseInfo, TBAAInfo);
memberAlign =
CGM.getDynamicOffsetAlignment(base.getAlignment(),
memberPtrType->getClass()->getAsCXXRecordDecl(),
@@ -236,8 +253,13 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr,
// Compute the offset from the static and dynamic components.
llvm::Value *baseOffset;
if (!nonVirtualOffset.isZero()) {
- baseOffset = llvm::ConstantInt::get(CGF.PtrDiffTy,
- nonVirtualOffset.getQuantity());
+ llvm::Type *OffsetType =
+ (CGF.CGM.getTarget().getCXXABI().isItaniumFamily() &&
+ CGF.CGM.getItaniumVTableContext().isRelativeLayout())
+ ? CGF.Int32Ty
+ : CGF.PtrDiffTy;
+ baseOffset =
+ llvm::ConstantInt::get(OffsetType, nonVirtualOffset.getQuantity());
if (virtualOffset) {
baseOffset = CGF.Builder.CreateAdd(virtualOffset, baseOffset);
}
@@ -730,7 +752,7 @@ bool CodeGenFunction::IsConstructorDelegationValid(
// parameters
// - etc.
// If we ever add any of the above cases, remember that:
- // - function-try-blocks will always blacklist this optimization
+ // - function-try-blocks will always exclude this optimization
// - we need to perform the constructor prologue and cleanup in
// EmitConstructorBody.
@@ -2128,7 +2150,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
Address Src(Args[1].getRValue(*this).getScalarVal(),
- getNaturalTypeAlignment(SrcTy));
+ CGM.getNaturalTypeAlignment(SrcTy));
LValue SrcLVal = MakeAddrLValue(Src, SrcTy);
QualType DestTy = getContext().getTypeDeclType(ClassDecl);
LValue DestLVal = MakeAddrLValue(This, DestTy);
@@ -2148,7 +2170,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
}
// Insert any ABI-specific implicit constructor arguments.
- CGCXXABI::AddedStructorArgs ExtraArgs =
+ CGCXXABI::AddedStructorArgCounts ExtraArgs =
CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase,
Delegating, Args);
@@ -2157,7 +2179,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
- EmitCall(Info, Callee, ReturnValueSlot(), Args);
+ EmitCall(Info, Callee, ReturnValueSlot(), Args, nullptr, Loc);
// Generate vtable assumptions if we're constructing a complete object
// with a vtable. We don't do this for base subobjects for two reasons:
@@ -2641,7 +2663,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
if (SanOpts.has(SanitizerKind::CFIVCall))
EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
else if (CGM.getCodeGenOpts().WholeProgramVTables &&
- CGM.HasHiddenLTOVisibility(RD)) {
+ // Don't insert type test assumes if we are forcing public std
+ // visibility.
+ !CGM.HasLTOVisibilityPublicStd(RD)) {
llvm::Metadata *MD =
CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
llvm::Value *TypeId =
@@ -2850,7 +2874,9 @@ void CodeGenFunction::EmitForwardingCallToLambda(
if (!resultType->isVoidType() &&
calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
!hasScalarEvaluationKind(calleeFnInfo.getReturnType()))
- returnSlot = ReturnValueSlot(ReturnValue, resultType.isVolatileQualified());
+ returnSlot =
+ ReturnValueSlot(ReturnValue, resultType.isVolatileQualified(),
+ /*IsUnused=*/false, /*IsExternallyDestructed=*/true);
// We don't need to separately arrange the call arguments because
// the call can't be variadic anyway --- it's impossible to forward
diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp
index c117dd5c25c1..ad543ef86c1a 100644
--- a/clang/lib/CodeGen/CGCleanup.cpp
+++ b/clang/lib/CodeGen/CGCleanup.cpp
@@ -179,12 +179,10 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
char *Buffer = allocate(EHCleanupScope::getSizeForCleanupSize(Size));
bool IsNormalCleanup = Kind & NormalCleanup;
bool IsEHCleanup = Kind & EHCleanup;
- bool IsActive = !(Kind & InactiveCleanup);
bool IsLifetimeMarker = Kind & LifetimeMarker;
EHCleanupScope *Scope =
new (Buffer) EHCleanupScope(IsNormalCleanup,
IsEHCleanup,
- IsActive,
Size,
BranchFixups.size(),
InnermostNormalCleanup,
@@ -309,9 +307,9 @@ static void createStoreInstBefore(llvm::Value *value, Address addr,
static llvm::LoadInst *createLoadInstBefore(Address addr, const Twine &name,
llvm::Instruction *beforeInst) {
- auto load = new llvm::LoadInst(addr.getPointer(), name, beforeInst);
- load->setAlignment(addr.getAlignment().getAsAlign());
- return load;
+ return new llvm::LoadInst(addr.getElementType(), addr.getPointer(), name,
+ false, addr.getAlignment().getAsAlign(),
+ beforeInst);
}
/// All the branch fixups on the EH stack have propagated out past the
@@ -859,6 +857,9 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
// TODO: base this on the number of branch-afters and fixups
const unsigned SwitchCapacity = 10;
+ // pass the abnormal exit flag to Fn (SEH cleanup)
+ cleanupFlags.setHasExitSwitch();
+
llvm::LoadInst *Load =
createLoadInstBefore(getNormalCleanupDestSlot(), "cleanup.dest",
nullptr);
diff --git a/clang/lib/CodeGen/CGCleanup.h b/clang/lib/CodeGen/CGCleanup.h
index ffe0f9d9dd20..ef4f6b9ec133 100644
--- a/clang/lib/CodeGen/CGCleanup.h
+++ b/clang/lib/CodeGen/CGCleanup.h
@@ -102,7 +102,7 @@ protected:
};
public:
- enum Kind { Cleanup, Catch, Terminate, Filter, PadEnd };
+ enum Kind { Cleanup, Catch, Terminate, Filter };
EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope)
: CachedLandingPad(nullptr), CachedEHDispatchBlock(nullptr),
@@ -284,8 +284,8 @@ public:
return sizeof(EHCleanupScope) + CleanupBits.CleanupSize;
}
- EHCleanupScope(bool isNormal, bool isEH, bool isActive,
- unsigned cleanupSize, unsigned fixupDepth,
+ EHCleanupScope(bool isNormal, bool isEH, unsigned cleanupSize,
+ unsigned fixupDepth,
EHScopeStack::stable_iterator enclosingNormal,
EHScopeStack::stable_iterator enclosingEH)
: EHScope(EHScope::Cleanup, enclosingEH),
@@ -293,7 +293,7 @@ public:
ActiveFlag(nullptr), ExtInfo(nullptr), FixupDepth(fixupDepth) {
CleanupBits.IsNormalCleanup = isNormal;
CleanupBits.IsEHCleanup = isEH;
- CleanupBits.IsActive = isActive;
+ CleanupBits.IsActive = true;
CleanupBits.IsLifetimeMarker = false;
CleanupBits.TestFlagInNormalCleanup = false;
CleanupBits.TestFlagInEHCleanup = false;
@@ -487,17 +487,6 @@ public:
}
};
-class EHPadEndScope : public EHScope {
-public:
- EHPadEndScope(EHScopeStack::stable_iterator enclosingEHScope)
- : EHScope(PadEnd, enclosingEHScope) {}
- static size_t getSize() { return sizeof(EHPadEndScope); }
-
- static bool classof(const EHScope *scope) {
- return scope->getKind() == PadEnd;
- }
-};
-
/// A non-stable pointer into the scope stack.
class EHScopeStack::iterator {
char *Ptr;
@@ -535,10 +524,6 @@ public:
case EHScope::Terminate:
Size = EHTerminateScope::getSize();
break;
-
- case EHScope::PadEnd:
- Size = EHPadEndScope::getSize();
- break;
}
Ptr += llvm::alignTo(Size, ScopeStackAlignment);
return *this;
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index aee5a927a055..5c57ad0685d5 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -275,9 +275,9 @@ RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E,
void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) {
++CurCoro.Data->CoreturnCount;
const Expr *RV = S.getOperand();
- if (RV && RV->getType()->isVoidType()) {
- // Make sure to evaluate the expression of a co_return with a void
- // expression for side effects.
+ if (RV && RV->getType()->isVoidType() && !isa<InitListExpr>(RV)) {
+ // Make sure to evaluate the non initlist expression of a co_return
+ // with a void expression for side effects.
RunCleanupsScope cleanupScope(*this);
EmitIgnoredExpr(RV);
}
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index cbd524eda9d0..6965c4a1209c 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -231,9 +231,16 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const {
// If we're emitting codeview, it's important to try to match MSVC's naming so
// that visualizers written for MSVC will trigger for our class names. In
// particular, we can't have spaces between arguments of standard templates
- // like basic_string and vector.
- if (CGM.getCodeGenOpts().EmitCodeView)
+ // like basic_string and vector, but we must have spaces between consecutive
+ // angle brackets that close nested template argument lists.
+ if (CGM.getCodeGenOpts().EmitCodeView) {
PP.MSVCFormatting = true;
+ PP.SplitTemplateClosers = true;
+ } else {
+ // For DWARF, printing rules are underspecified.
+ // SplitTemplateClosers yields better interop with GCC and GDB (PR46052).
+ PP.SplitTemplateClosers = true;
+ }
// Apply -fdebug-prefix-map.
PP.Callbacks = &PrintCB;
@@ -470,10 +477,14 @@ CGDebugInfo::createFile(StringRef FileName,
}
std::string CGDebugInfo::remapDIPath(StringRef Path) const {
+ if (DebugPrefixMap.empty())
+ return Path.str();
+
+ SmallString<256> P = Path;
for (const auto &Entry : DebugPrefixMap)
- if (Path.startswith(Entry.first))
- return (Twine(Entry.second) + Path.substr(Entry.first.size())).str();
- return Path.str();
+ if (llvm::sys::path::replace_path_prefix(P, Entry.first, Entry.second))
+ break;
+ return P.str().str();
}
unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {
@@ -532,11 +543,12 @@ void CGDebugInfo::CreateCompileUnit() {
// file to determine the real absolute path for the file.
std::string MainFileDir;
if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
- MainFileDir = MainFile->getDir()->getName();
+ MainFileDir = std::string(MainFile->getDir()->getName());
if (!llvm::sys::path::is_absolute(MainFileName)) {
llvm::SmallString<1024> MainFileDirSS(MainFileDir);
llvm::sys::path::append(MainFileDirSS, MainFileName);
- MainFileName = llvm::sys::path::remove_leading_dotslash(MainFileDirSS);
+ MainFileName =
+ std::string(llvm::sys::path::remove_leading_dotslash(MainFileDirSS));
}
// If the main file name provided is identical to the input file name, and
// if the input file is a preprocessed source, use the module name for
@@ -610,6 +622,16 @@ void CGDebugInfo::CreateCompileUnit() {
remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo,
getSource(SM, SM.getMainFileID()));
+ StringRef Sysroot, SDK;
+ if (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB) {
+ Sysroot = CGM.getHeaderSearchOpts().Sysroot;
+ auto B = llvm::sys::path::rbegin(Sysroot);
+ auto E = llvm::sys::path::rend(Sysroot);
+ auto It = std::find_if(B, E, [](auto SDK) { return SDK.endswith(".sdk"); });
+ if (It != E)
+ SDK = *It;
+ }
+
// Create new compile unit.
TheCU = DBuilder.createCompileUnit(
LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "",
@@ -620,7 +642,7 @@ void CGDebugInfo::CreateCompileUnit() {
? llvm::DICompileUnit::DebugNameTableKind::None
: static_cast<llvm::DICompileUnit::DebugNameTableKind>(
CGOpts.DebugNameTable),
- CGOpts.DebugRangesBaseAddress);
+ CGOpts.DebugRangesBaseAddress, remapDIPath(Sysroot), SDK);
}
llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -750,6 +772,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::Float:
case BuiltinType::LongDouble:
case BuiltinType::Float16:
+ case BuiltinType::BFloat16:
case BuiltinType::Float128:
case BuiltinType::Double:
// FIXME: For targets where long double and __float128 have the same size,
@@ -811,6 +834,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return DBuilder.createBasicType(BTName, Size, Encoding);
}
+llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) {
+ return DBuilder.createUnspecifiedType("auto");
+}
+
+llvm::DIType *CGDebugInfo::CreateType(const ExtIntType *Ty) {
+
+ StringRef Name = Ty->isUnsigned() ? "unsigned _ExtInt" : "_ExtInt";
+ llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()
+ ? llvm::dwarf::DW_ATE_unsigned
+ : llvm::dwarf::DW_ATE_signed;
+
+ return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty),
+ Encoding);
+}
+
llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
// Bit size and offset of the type.
llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
@@ -976,11 +1014,21 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
uint64_t Size = 0;
uint32_t Align = 0;
+ llvm::DINode::DIFlags Flags = llvm::DINode::FlagFwdDecl;
+
+ // Add flag to nontrivial forward declarations. To be consistent with MSVC,
+ // add the flag if a record has no definition because we don't know whether
+ // it will be trivial or not.
+ if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+ if (!CXXRD->hasDefinition() ||
+ (CXXRD->hasDefinition() && !CXXRD->isTrivial()))
+ Flags |= llvm::DINode::FlagNonTrivial;
+
// Create the type.
SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU);
llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType(
- getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
- llvm::DINode::FlagFwdDecl, Identifier);
+ getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align, Flags,
+ Identifier);
if (CGM.getCodeGenOpts().DebugFwdTemplateParams)
if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
DBuilder.replaceArrays(RetTy, llvm::DINodeArray(),
@@ -1458,16 +1506,18 @@ void CGDebugInfo::CollectRecordFields(
llvm::DISubroutineType *
CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
- llvm::DIFile *Unit) {
+ llvm::DIFile *Unit, bool decl) {
const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>();
if (Method->isStatic())
return cast_or_null<llvm::DISubroutineType>(
getOrCreateType(QualType(Func, 0), Unit));
- return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit);
+ return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit, decl);
}
-llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
- QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit) {
+llvm::DISubroutineType *
+CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr,
+ const FunctionProtoType *Func,
+ llvm::DIFile *Unit, bool decl) {
// Add "this" pointer.
llvm::DITypeRefArray Args(
cast<llvm::DISubroutineType>(getOrCreateType(QualType(Func, 0), Unit))
@@ -1475,9 +1525,12 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
assert(Args.size() && "Invalid number of arguments!");
SmallVector<llvm::Metadata *, 16> Elts;
-
// First element is always return type. For 'void' functions it is NULL.
- Elts.push_back(Args[0]);
+ QualType temp = Func->getReturnType();
+ if (temp->getTypeClass() == Type::Auto && decl)
+ Elts.push_back(CreateType(cast<AutoType>(temp)));
+ else
+ Elts.push_back(Args[0]);
// "this" pointer is always first argument.
const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl();
@@ -1536,7 +1589,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method);
StringRef MethodName = getFunctionName(Method);
- llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit);
+ llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit, true);
// Since a single ctor/dtor corresponds to multiple functions, it doesn't
// make sense to give a single ctor/dtor a linkage name.
@@ -1773,18 +1826,38 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
for (unsigned i = 0, e = TAList.size(); i != e; ++i) {
const TemplateArgument &TA = TAList[i];
StringRef Name;
+ bool defaultParameter = false;
if (TPList)
Name = TPList->getParam(i)->getName();
switch (TA.getKind()) {
case TemplateArgument::Type: {
llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit);
- TemplateParams.push_back(
- DBuilder.createTemplateTypeParameter(TheCU, Name, TTy));
+
+ if (TPList)
+ if (auto *templateType =
+ dyn_cast_or_null<TemplateTypeParmDecl>(TPList->getParam(i)))
+ if (templateType->hasDefaultArgument())
+ defaultParameter =
+ templateType->getDefaultArgument() == TA.getAsType();
+
+ TemplateParams.push_back(DBuilder.createTemplateTypeParameter(
+ TheCU, Name, TTy, defaultParameter));
+
} break;
case TemplateArgument::Integral: {
llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit);
+ if (TPList && CGM.getCodeGenOpts().DwarfVersion >= 5)
+ if (auto *templateType =
+ dyn_cast_or_null<NonTypeTemplateParmDecl>(TPList->getParam(i)))
+ if (templateType->hasDefaultArgument() &&
+ !templateType->getDefaultArgument()->isValueDependent())
+ defaultParameter = llvm::APSInt::isSameValue(
+ templateType->getDefaultArgument()->EvaluateKnownConstInt(
+ CGM.getContext()),
+ TA.getAsIntegral());
+
TemplateParams.push_back(DBuilder.createTemplateValueParameter(
- TheCU, Name, TTy,
+ TheCU, Name, TTy, defaultParameter,
llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral())));
} break;
case TemplateArgument::Declaration: {
@@ -1818,12 +1891,14 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
CharUnits chars =
CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+ } else if (const auto *GD = dyn_cast<MSGuidDecl>(D)) {
+ V = CGM.GetAddrOfMSGuidDecl(GD).getPointer();
}
assert(V && "Failed to find template parameter pointer");
V = V->stripPointerCasts();
}
TemplateParams.push_back(DBuilder.createTemplateValueParameter(
- TheCU, Name, TTy, cast_or_null<llvm::Constant>(V)));
+ TheCU, Name, TTy, defaultParameter, cast_or_null<llvm::Constant>(V)));
} break;
case TemplateArgument::NullPtr: {
QualType T = TA.getNullPtrType();
@@ -1841,8 +1916,8 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
V = CGM.getCXXABI().EmitNullMemberPointer(MPT);
if (!V)
V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
- TemplateParams.push_back(
- DBuilder.createTemplateValueParameter(TheCU, Name, TTy, V));
+ TemplateParams.push_back(DBuilder.createTemplateValueParameter(
+ TheCU, Name, TTy, defaultParameter, V));
} break;
case TemplateArgument::Template:
TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
@@ -1863,7 +1938,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
assert(V && "Expression in template argument isn't constant");
llvm::DIType *TTy = getOrCreateType(T, Unit);
TemplateParams.push_back(DBuilder.createTemplateValueParameter(
- TheCU, Name, TTy, V->stripPointerCasts()));
+ TheCU, Name, TTy, defaultParameter, V->stripPointerCasts()));
} break;
// And the following should never occur:
case TemplateArgument::TemplateExpansion:
@@ -2071,16 +2146,17 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D,
return T;
}
-void CGDebugInfo::addHeapAllocSiteMetadata(llvm::Instruction *CI,
- QualType D,
+void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI,
+ QualType AllocatedTy,
SourceLocation Loc) {
+ if (CGM.getCodeGenOpts().getDebugInfo() <=
+ codegenoptions::DebugLineTablesOnly)
+ return;
llvm::MDNode *node;
- if (D.getTypePtr()->isVoidPointerType()) {
+ if (AllocatedTy->isVoidType())
node = llvm::MDNode::get(CGM.getLLVMContext(), None);
- } else {
- QualType PointeeTy = D.getTypePtr()->getPointeeType();
- node = getOrCreateType(PointeeTy, getOrCreateFile(Loc));
- }
+ else
+ node = getOrCreateType(AllocatedTy, getOrCreateFile(Loc));
CI->setMetadata("heapallocsite", node);
}
@@ -2221,12 +2297,11 @@ static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind,
// constructor is emitted. Skip this optimization if the class or any of
// its methods are marked dllimport.
if (DebugKind == codegenoptions::DebugInfoConstructor &&
- !CXXDecl->isLambda() && !isClassOrMethodDLLImport(CXXDecl)) {
- for (const auto *Ctor : CXXDecl->ctors()) {
+ !CXXDecl->isLambda() && !CXXDecl->hasConstexprNonCopyMoveConstructor() &&
+ !isClassOrMethodDLLImport(CXXDecl))
+ for (const auto *Ctor : CXXDecl->ctors())
if (Ctor->isUserProvided())
return true;
- }
- }
TemplateSpecializationKind Spec = TSK_Undeclared;
if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
@@ -2399,9 +2474,8 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
return CreateTypeDefinition(Ty, Unit);
}
-llvm::DIModule *
-CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
- bool CreateSkeletonCU) {
+llvm::DIModule *CGDebugInfo::getOrCreateModuleRef(ASTSourceDescriptor Mod,
+ bool CreateSkeletonCU) {
// Use the Module pointer as the key into the cache. This is a
// nullptr if the "Module" is a PCH, which is safe because we don't
// support chained PCH debug info, so there can only be a single PCH.
@@ -2446,32 +2520,51 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
assert(StringRef(M->Name).startswith(CGM.getLangOpts().ModuleName) &&
"clang module without ASTFile must be specified by -fmodule-name");
+ // Return a StringRef to the remapped Path.
+ auto RemapPath = [this](StringRef Path) -> std::string {
+ std::string Remapped = remapDIPath(Path);
+ StringRef Relative(Remapped);
+ StringRef CompDir = TheCU->getDirectory();
+ if (Relative.consume_front(CompDir))
+ Relative.consume_front(llvm::sys::path::get_separator());
+
+ return Relative.str();
+ };
+
if (CreateSkeletonCU && IsRootModule && !Mod.getASTFile().empty()) {
// PCH files don't have a signature field in the control block,
// but LLVM detects skeleton CUs by looking for a non-zero DWO id.
// We use the lower 64 bits for debug info.
- uint64_t Signature =
- Mod.getSignature()
- ? (uint64_t)Mod.getSignature()[1] << 32 | Mod.getSignature()[0]
- : ~1ULL;
+
+ uint64_t Signature = 0;
+ if (const auto &ModSig = Mod.getSignature()) {
+ for (unsigned I = 0; I != sizeof(Signature); ++I)
+ Signature |= (uint64_t)ModSig[I] << (I * 8);
+ } else {
+ Signature = ~1ULL;
+ }
llvm::DIBuilder DIB(CGM.getModule());
- DIB.createCompileUnit(TheCU->getSourceLanguage(),
- // TODO: Support "Source" from external AST providers?
- DIB.createFile(Mod.getModuleName(), Mod.getPath()),
- TheCU->getProducer(), true, StringRef(), 0,
- Mod.getASTFile(), llvm::DICompileUnit::FullDebug,
- Signature);
+ SmallString<0> PCM;
+ if (!llvm::sys::path::is_absolute(Mod.getASTFile()))
+ PCM = Mod.getPath();
+ llvm::sys::path::append(PCM, Mod.getASTFile());
+ DIB.createCompileUnit(
+ TheCU->getSourceLanguage(),
+ // TODO: Support "Source" from external AST providers?
+ DIB.createFile(Mod.getModuleName(), TheCU->getDirectory()),
+ TheCU->getProducer(), false, StringRef(), 0, RemapPath(PCM),
+ llvm::DICompileUnit::FullDebug, Signature);
DIB.finalize();
}
llvm::DIModule *Parent =
IsRootModule ? nullptr
- : getOrCreateModuleRef(
- ExternalASTSource::ASTSourceDescriptor(*M->Parent),
- CreateSkeletonCU);
+ : getOrCreateModuleRef(ASTSourceDescriptor(*M->Parent),
+ CreateSkeletonCU);
+ std::string IncludePath = Mod.getPath().str();
llvm::DIModule *DIMod =
DBuilder.createModule(Parent, Mod.getModuleName(), ConfigMacros,
- Mod.getPath(), CGM.getHeaderSearchOpts().Sysroot);
+ RemapPath(IncludePath));
ModuleCache[M].reset(DIMod);
return DIMod;
}
@@ -2649,9 +2742,17 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty,
QualType QTy(Ty, 0);
auto SizeExpr = SizeExprCache.find(QTy);
if (SizeExpr != SizeExprCache.end())
- Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond());
- else
- Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1);
+ Subscript = DBuilder.getOrCreateSubrange(
+ SizeExpr->getSecond() /*count*/, nullptr /*lowerBound*/,
+ nullptr /*upperBound*/, nullptr /*stride*/);
+ else {
+ auto *CountNode =
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+ llvm::Type::getInt64Ty(CGM.getLLVMContext()), Count ? Count : -1));
+ Subscript = DBuilder.getOrCreateSubrange(
+ CountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/,
+ nullptr /*stride*/);
+ }
llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -2660,6 +2761,33 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty,
return DBuilder.createVectorType(Size, Align, ElementTy, SubscriptArray);
}
+llvm::DIType *CGDebugInfo::CreateType(const ConstantMatrixType *Ty,
+ llvm::DIFile *Unit) {
+ // FIXME: Create another debug type for matrices
+ // For the time being, it treats it like a nested ArrayType.
+
+ llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit);
+ uint64_t Size = CGM.getContext().getTypeSize(Ty);
+ uint32_t Align = getTypeAlignIfRequired(Ty, CGM.getContext());
+
+ // Create ranges for both dimensions.
+ llvm::SmallVector<llvm::Metadata *, 2> Subscripts;
+ auto *ColumnCountNode =
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+ llvm::Type::getInt64Ty(CGM.getLLVMContext()), Ty->getNumColumns()));
+ auto *RowCountNode =
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+ llvm::Type::getInt64Ty(CGM.getLLVMContext()), Ty->getNumRows()));
+ Subscripts.push_back(DBuilder.getOrCreateSubrange(
+ ColumnCountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/,
+ nullptr /*stride*/));
+ Subscripts.push_back(DBuilder.getOrCreateSubrange(
+ RowCountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/,
+ nullptr /*stride*/));
+ llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts);
+ return DBuilder.createArrayType(Size, Align, ElementTy, SubscriptArray);
+}
+
llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
uint64_t Size;
uint32_t Align;
@@ -2710,10 +2838,17 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
auto SizeNode = SizeExprCache.find(EltTy);
if (SizeNode != SizeExprCache.end())
- Subscripts.push_back(
- DBuilder.getOrCreateSubrange(0, SizeNode->getSecond()));
- else
- Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
+ Subscripts.push_back(DBuilder.getOrCreateSubrange(
+ SizeNode->getSecond() /*count*/, nullptr /*lowerBound*/,
+ nullptr /*upperBound*/, nullptr /*stride*/));
+ else {
+ auto *CountNode =
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+ llvm::Type::getInt64Ty(CGM.getLLVMContext()), Count));
+ Subscripts.push_back(DBuilder.getOrCreateSubrange(
+ CountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/,
+ nullptr /*stride*/));
+ }
EltTy = Ty->getElementType();
}
@@ -2772,7 +2907,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
return DBuilder.createMemberPointerType(
getOrCreateInstanceMethodType(
CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()),
- FPT, U),
+ FPT, U, false),
ClassType, Size, /*Align=*/0, Flags);
}
@@ -3025,7 +3160,7 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
// option.
if (Module *M = D->getOwningModule()) {
// This is a (sub-)module.
- auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
+ auto Info = ASTSourceDescriptor(*M);
return getOrCreateModuleRef(Info, /*SkeletonCU=*/false);
} else {
// This the precompiled header being built.
@@ -3053,6 +3188,8 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
case Type::ExtVector:
case Type::Vector:
return CreateType(cast<VectorType>(Ty), Unit);
+ case Type::ConstantMatrix:
+ return CreateType(cast<ConstantMatrixType>(Ty), Unit);
case Type::ObjCObjectPointer:
return CreateType(cast<ObjCObjectPointerType>(Ty), Unit);
case Type::ObjCObject:
@@ -3094,6 +3231,8 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
case Type::Atomic:
return CreateType(cast<AtomicType>(Ty), Unit);
+ case Type::ExtInt:
+ return CreateType(cast<ExtIntType>(Ty));
case Type::Pipe:
return CreateType(cast<PipeType>(Ty), Unit);
@@ -3547,7 +3686,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None));
if (const auto *Method = dyn_cast<CXXMethodDecl>(D))
- return getOrCreateMethodType(Method, F);
+ return getOrCreateMethodType(Method, F, false);
const auto *FTy = FnType->getAs<FunctionType>();
CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C;
@@ -3651,8 +3790,11 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
Name = getDynamicInitializerName(cast<VarDecl>(D), GD.getDynamicInitKind(),
Fn);
} else {
- // Use llvm function name.
Name = Fn->getName();
+
+ if (isa<BlockDecl>(D))
+ LinkageName = Name;
+
Flags |= llvm::DINode::FlagPrototyped;
}
if (Name.startswith("\01"))
@@ -3764,7 +3906,7 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
if (IsDeclForCallSite)
Fn->setSubprogram(SP);
- DBuilder.retainType(SP);
+ DBuilder.finalizeSubprogram(SP);
}
void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke,
@@ -3778,12 +3920,12 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke,
if (Func->getSubprogram())
return;
- // Do not emit a declaration subprogram for a builtin or if call site info
- // isn't required. Also, elide declarations for functions with reserved names,
- // as call site-related features aren't interesting in this case (& also, the
- // compiler may emit calls to these functions without debug locations, which
- // makes the verifier complain).
- if (CalleeDecl->getBuiltinID() != 0 ||
+ // Do not emit a declaration subprogram for a builtin, a function with nodebug
+ // attribute, or if call site info isn't required. Also, elide declarations
+ // for functions with reserved names, as call site-related features aren't
+ // interesting in this case (& also, the compiler may emit calls to these
+ // functions without debug locations, which makes the verifier complain).
+ if (CalleeDecl->getBuiltinID() != 0 || CalleeDecl->hasAttr<NoDebugAttr>() ||
getCallSiteRelatedAttrs() == llvm::DINode::FlagZero)
return;
if (const auto *Id = CalleeDecl->getIdentifier())
@@ -4680,7 +4822,7 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB)
return;
if (Module *M = ID.getImportedModule()) {
- auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
+ auto Info = ASTSourceDescriptor(*M);
auto Loc = ID.getLocation();
DBuilder.createImportedDeclaration(
getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())),
@@ -4844,8 +4986,7 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const {
(CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB ||
CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::GDB);
- if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5 &&
- !CGM.getCodeGenOpts().EnableDebugEntryValues)
+ if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5)
return llvm::DINode::FlagZero;
return llvm::DINode::FlagAllCallsDescribed;
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 90e9a61ebe96..96ef6c7c1d27 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -17,9 +17,11 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExternalASTSource.h"
+#include "clang/AST/PrettyPrinter.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/Module.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -60,7 +62,7 @@ class CGDebugInfo {
llvm::DIBuilder DBuilder;
llvm::DICompileUnit *TheCU = nullptr;
ModuleMap *ClangModuleMap = nullptr;
- ExternalASTSource::ASTSourceDescriptor PCHDescriptor;
+ ASTSourceDescriptor PCHDescriptor;
SourceLocation CurLoc;
llvm::MDNode *CurInlinedAt = nullptr;
llvm::DIType *VTablePtrType = nullptr;
@@ -165,6 +167,8 @@ class CGDebugInfo {
/// ivars and property accessors.
llvm::DIType *CreateType(const BuiltinType *Ty);
llvm::DIType *CreateType(const ComplexType *Ty);
+ llvm::DIType *CreateType(const AutoType *Ty);
+ llvm::DIType *CreateType(const ExtIntType *Ty);
llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg);
llvm::DIType *CreateType(const TemplateSpecializationType *Ty,
@@ -188,6 +192,7 @@ class CGDebugInfo {
llvm::DIType *CreateType(const ObjCTypeParamType *Ty, llvm::DIFile *Unit);
llvm::DIType *CreateType(const VectorType *Ty, llvm::DIFile *F);
+ llvm::DIType *CreateType(const ConstantMatrixType *Ty, llvm::DIFile *F);
llvm::DIType *CreateType(const ArrayType *Ty, llvm::DIFile *F);
llvm::DIType *CreateType(const LValueReferenceType *Ty, llvm::DIFile *F);
llvm::DIType *CreateType(const RValueReferenceType *Ty, llvm::DIFile *Unit);
@@ -214,10 +219,10 @@ class CGDebugInfo {
/// not updated to include implicit \c this pointer. Use this routine
/// to get a method type which includes \c this pointer.
llvm::DISubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method,
- llvm::DIFile *F);
+ llvm::DIFile *F, bool decl);
llvm::DISubroutineType *
getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func,
- llvm::DIFile *Unit);
+ llvm::DIFile *Unit, bool decl);
llvm::DISubroutineType *
getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F);
/// \return debug info descriptor for vtable.
@@ -378,9 +383,7 @@ public:
/// When generating debug information for a clang module or
/// precompiled header, this module map will be used to determine
/// the module of origin of each Decl.
- void setPCHDescriptor(ExternalASTSource::ASTSourceDescriptor PCH) {
- PCHDescriptor = PCH;
- }
+ void setPCHDescriptor(ASTSourceDescriptor PCH) { PCHDescriptor = PCH; }
/// @}
/// Update the current source location. If \arg loc is invalid it is
@@ -506,7 +509,7 @@ public:
llvm::DIType *getOrCreateStandaloneType(QualType Ty, SourceLocation Loc);
/// Add heapallocsite metadata for MSAllocator calls.
- void addHeapAllocSiteMetadata(llvm::Instruction *CallSite, QualType Ty,
+ void addHeapAllocSiteMetadata(llvm::CallBase *CallSite, QualType AllocatedTy,
SourceLocation Loc);
void completeType(const EnumDecl *ED);
@@ -589,9 +592,8 @@ private:
/// Get a reference to a clang module. If \p CreateSkeletonCU is true,
/// this also creates a split dwarf skeleton compile unit.
- llvm::DIModule *
- getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
- bool CreateSkeletonCU);
+ llvm::DIModule *getOrCreateModuleRef(ASTSourceDescriptor Mod,
+ bool CreateSkeletonCU);
/// DebugTypeExtRefs: If \p D originated in a clang module, return it.
llvm::DIModule *getParentModuleOrNull(const Decl *D);
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 5aac7a8d54c7..1729c7ed3c31 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -31,6 +31,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/Sema/Sema.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
@@ -40,6 +41,9 @@
using namespace clang;
using namespace CodeGen;
+static_assert(clang::Sema::MaximumAlignment <= llvm::Value::MaximumAlignment,
+ "Clang max alignment greater than what LLVM supports?");
+
void CodeGenFunction::EmitDecl(const Decl &D) {
switch (D.getKind()) {
case Decl::BuiltinTemplate:
@@ -104,6 +108,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::StaticAssert: // static_assert(X, ""); [C++0x]
case Decl::Label: // __label__ x;
case Decl::Import:
+ case Decl::MSGuid: // __declspec(uuid("..."))
case Decl::OMPThreadPrivate:
case Decl::OMPAllocate:
case Decl::OMPCapturedExpr:
@@ -111,6 +116,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::Empty:
case Decl::Concept:
case Decl::LifetimeExtendedTemporary:
+ case Decl::RequiresExprBody:
// None of these decls require codegen support.
return;
@@ -205,9 +211,9 @@ static std::string getStaticDeclName(CodeGenModule &CGM, const VarDecl &D) {
if (auto *CD = dyn_cast<CapturedDecl>(DC))
DC = cast<DeclContext>(CD->getNonClosureContext());
if (const auto *FD = dyn_cast<FunctionDecl>(DC))
- ContextName = CGM.getMangledName(FD);
+ ContextName = std::string(CGM.getMangledName(FD));
else if (const auto *BD = dyn_cast<BlockDecl>(DC))
- ContextName = CGM.getBlockMangledName(GlobalDecl(), BD);
+ ContextName = std::string(CGM.getBlockMangledName(GlobalDecl(), BD));
else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(DC))
ContextName = OMD->getSelector().getAsString();
else
@@ -232,7 +238,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
// Use the label if the variable is renamed with the asm-label extension.
std::string Name;
if (D.hasAttr<AsmLabelAttr>())
- Name = getMangledName(&D);
+ Name = std::string(getMangledName(&D));
else
Name = getStaticDeclName(*this, D);
@@ -244,7 +250,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
// variables cannot have an initializer.
llvm::Constant *Init = nullptr;
if (Ty.getAddressSpace() == LangAS::opencl_local ||
- D.hasAttr<CUDASharedAttr>())
+ D.hasAttr<CUDASharedAttr>() || D.hasAttr<LoaderUninitializedAttr>())
Init = llvm::UndefValue::get(LTy);
else
Init = EmitNullConstant(Ty);
@@ -336,7 +342,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
// the global to match the initializer. (We have to do this
// because some types, like unions, can't be completely represented
// in the LLVM type system.)
- if (GV->getType()->getElementType() != Init->getType()) {
+ if (GV->getValueType() != Init->getType()) {
llvm::GlobalVariable *OldGV = GV;
GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
@@ -756,10 +762,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
// If we're emitting a value with lifetime, we have to do the
// initialization *before* we leave the cleanup scopes.
- if (const FullExpr *fe = dyn_cast<FullExpr>(init)) {
- enterFullExpression(fe);
- init = fe->getSubExpr();
- }
+ if (const ExprWithCleanups *EWC = dyn_cast<ExprWithCleanups>(init))
+ init = EWC->getSubExpr();
CodeGenFunction::RunCleanupsScope Scope(*this);
// We have to maintain the illusion that the variable is
@@ -1045,13 +1049,13 @@ static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern,
llvm::Type *OrigTy = constant->getType();
if (const auto STy = dyn_cast<llvm::StructType>(OrigTy))
return constStructWithPadding(CGM, isPattern, STy, constant);
- if (auto *STy = dyn_cast<llvm::SequentialType>(OrigTy)) {
+ if (auto *ArrayTy = dyn_cast<llvm::ArrayType>(OrigTy)) {
llvm::SmallVector<llvm::Constant *, 8> Values;
- unsigned Size = STy->getNumElements();
+ uint64_t Size = ArrayTy->getNumElements();
if (!Size)
return constant;
- llvm::Type *ElemTy = STy->getElementType();
- bool ZeroInitializer = constant->isZeroValue();
+ llvm::Type *ElemTy = ArrayTy->getElementType();
+ bool ZeroInitializer = constant->isNullValue();
llvm::Constant *OpValue, *PaddedOp;
if (ZeroInitializer) {
OpValue = llvm::Constant::getNullValue(ElemTy);
@@ -1067,13 +1071,12 @@ static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern,
auto *NewElemTy = Values[0]->getType();
if (NewElemTy == ElemTy)
return constant;
- if (OrigTy->isArrayTy()) {
- auto *ArrayTy = llvm::ArrayType::get(NewElemTy, Size);
- return llvm::ConstantArray::get(ArrayTy, Values);
- } else {
- return llvm::ConstantVector::get(Values);
- }
+ auto *NewArrayTy = llvm::ArrayType::get(NewElemTy, Size);
+ return llvm::ConstantArray::get(NewArrayTy, Values);
}
+ // FIXME: Add handling for tail padding in vectors. Vectors don't
+ // have padding between or inside elements, but the total amount of
+ // data can be less than the allocated size.
return constant;
}
@@ -1086,7 +1089,7 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D,
return CC->getNameAsString();
if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD))
return CD->getNameAsString();
- return getMangledName(FD);
+ return std::string(getMangledName(FD));
} else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) {
return OM->getNameAsString();
} else if (isa<BlockDecl>(DC)) {
@@ -1397,10 +1400,15 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
Address address = Address::invalid();
Address AllocaAddr = Address::invalid();
- Address OpenMPLocalAddr =
- getLangOpts().OpenMP
- ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
- : Address::invalid();
+ Address OpenMPLocalAddr = Address::invalid();
+ if (CGM.getLangOpts().OpenMPIRBuilder)
+ OpenMPLocalAddr = OMPBuilderCBHelpers::getAddressOfLocalVariable(*this, &D);
+ else
+ OpenMPLocalAddr =
+ getLangOpts().OpenMP
+ ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+ : Address::invalid();
+
bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable();
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
@@ -1512,9 +1520,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// is rare.
if (!Bypasses.IsBypassed(&D) &&
!(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) {
- uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy);
+ llvm::TypeSize size =
+ CGM.getDataLayout().getTypeAllocSize(allocaTy);
emission.SizeForLifetimeMarkers =
- EmitLifetimeStart(size, AllocaAddr.getPointer());
+ size.isScalable() ? EmitLifetimeStart(-1, AllocaAddr.getPointer())
+ : EmitLifetimeStart(size.getFixedSize(),
+ AllocaAddr.getPointer());
}
} else {
assert(!emission.useLifetimeMarkers());
@@ -1671,9 +1682,13 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type,
case LangOptions::TrivialAutoVarInitKind::Uninitialized:
llvm_unreachable("Uninitialized handled by caller");
case LangOptions::TrivialAutoVarInitKind::Zero:
+ if (CGM.stopAutoInit())
+ return;
emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder);
break;
case LangOptions::TrivialAutoVarInitKind::Pattern:
+ if (CGM.stopAutoInit())
+ return;
emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder);
break;
}
@@ -1696,6 +1711,8 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type,
llvm_unreachable("Uninitialized handled by caller");
case LangOptions::TrivialAutoVarInitKind::Zero:
+ if (CGM.stopAutoInit())
+ return;
if (!EltSize.isOne())
SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize));
Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
@@ -1703,6 +1720,8 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type,
break;
case LangOptions::TrivialAutoVarInitKind::Pattern: {
+ if (CGM.stopAutoInit())
+ return;
llvm::Type *ElTy = Loc.getElementType();
llvm::Constant *Constant = constWithPadding(
CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy));
@@ -1861,9 +1880,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
///
/// \param init the initializing expression
/// \param D the object to act as if we're initializing
-/// \param loc the address to initialize; its type is a pointer
-/// to the LLVM mapping of the object's type
-/// \param alignment the alignment of the address
+/// \param lvalue the lvalue to initialize
/// \param capturedByInit true if \p D is a __block variable
/// whose address is potentially changed by the initializer
void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
@@ -2532,5 +2549,5 @@ void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D,
}
void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
- getOpenMPRuntime().checkArchForUnifiedAddressing(D);
+ getOpenMPRuntime().processRequiresDirective(D);
}
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 3baa0a080f5d..5a8500364295 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -16,11 +16,12 @@
#include "CodeGenFunction.h"
#include "TargetInfo.h"
#include "clang/AST/Attr.h"
-#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace clang;
using namespace CodeGen;
@@ -239,7 +240,7 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD,
}
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
+ llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction(
ty, FnName.str(), FI, VD.getLocation());
CodeGenFunction CGF(CGM);
@@ -249,7 +250,7 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD,
llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
- // Make sure the call and the callee agree on calling convention.
+ // Make sure the call and the callee agree on calling convention.
if (auto *dtorFn = dyn_cast<llvm::Function>(
dtor.getCallee()->stripPointerCastsAndAliases()))
call->setCallingConv(dtorFn->getCallingConv());
@@ -270,8 +271,12 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
// extern "C" int atexit(void (*f)(void));
+ assert(cast<llvm::Function>(dtorStub)->getFunctionType() ==
+ llvm::FunctionType::get(CGM.VoidTy, false) &&
+ "Argument to atexit has a wrong type.");
+
llvm::FunctionType *atexitTy =
- llvm::FunctionType::get(IntTy, dtorStub->getType(), false);
+ llvm::FunctionType::get(IntTy, dtorStub->getType(), false);
llvm::FunctionCallee atexit =
CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(),
@@ -282,6 +287,30 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
EmitNounwindRuntimeCall(atexit, dtorStub);
}
+llvm::Value *
+CodeGenFunction::unregisterGlobalDtorWithUnAtExit(llvm::Function *dtorStub) {
+ // The unatexit subroutine unregisters __dtor functions that were previously
+ // registered by the atexit subroutine. If the referenced function is found,
+ // it is removed from the list of functions that are called at normal program
+ // termination and the unatexit returns a value of 0, otherwise a non-zero
+ // value is returned.
+ //
+ // extern "C" int unatexit(void (*f)(void));
+ assert(dtorStub->getFunctionType() ==
+ llvm::FunctionType::get(CGM.VoidTy, false) &&
+ "Argument to unatexit has a wrong type.");
+
+ llvm::FunctionType *unatexitTy =
+ llvm::FunctionType::get(IntTy, {dtorStub->getType()}, /*isVarArg=*/false);
+
+ llvm::FunctionCallee unatexit =
+ CGM.CreateRuntimeFunction(unatexitTy, "unatexit", llvm::AttributeList());
+
+ cast<llvm::Function>(unatexit.getCallee())->setDoesNotThrow();
+
+ return EmitNounwindRuntimeCall(unatexit, dtorStub);
+}
+
void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D,
llvm::GlobalVariable *DeclPtr,
bool PerformInit) {
@@ -333,19 +362,23 @@ void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,
Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights);
}
-llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
+llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction(
llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI,
- SourceLocation Loc, bool TLS) {
- llvm::Function *Fn =
- llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
- Name, &getModule());
+ SourceLocation Loc, bool TLS, bool IsExternalLinkage) {
+ llvm::Function *Fn = llvm::Function::Create(
+ FTy,
+ IsExternalLinkage ? llvm::GlobalValue::ExternalLinkage
+ : llvm::GlobalValue::InternalLinkage,
+ Name, &getModule());
+
if (!getLangOpts().AppleKext && !TLS) {
// Set the section if needed.
if (const char *Section = getTarget().getStaticInitSectionSpecifier())
Fn->setSection(Section);
}
- SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ if (Fn->hasInternalLinkage())
+ SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
Fn->setCallingConv(getRuntimeCC());
@@ -392,20 +425,20 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
- auto RASignKind = getCodeGenOpts().getSignReturnAddress();
- if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) {
+ auto RASignKind = getLangOpts().getSignReturnAddressScope();
+ if (RASignKind != LangOptions::SignReturnAddressScopeKind::None) {
Fn->addFnAttr("sign-return-address",
- RASignKind == CodeGenOptions::SignReturnAddressScope::All
+ RASignKind == LangOptions::SignReturnAddressScopeKind::All
? "all"
: "non-leaf");
- auto RASignKey = getCodeGenOpts().getSignReturnAddressKey();
+ auto RASignKey = getLangOpts().getSignReturnAddressKey();
Fn->addFnAttr("sign-return-address-key",
- RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ RASignKey == LangOptions::SignReturnAddressKeyKind::AKey
? "a_key"
: "b_key");
}
- if (getCodeGenOpts().BranchTargetEnforcement)
+ if (getLangOpts().BranchTargetEnforcement)
Fn->addFnAttr("branch-target-enforcement");
return Fn;
@@ -461,10 +494,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
}
// Create a variable initialization function.
- llvm::Function *Fn =
- CreateGlobalInitOrDestructFunction(FTy, FnName.str(),
- getTypes().arrangeNullaryFunction(),
- D->getLocation());
+ llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction(
+ FTy, FnName.str(), getTypes().arrangeNullaryFunction(), D->getLocation());
auto *ISA = D->getAttr<InitSegAttr>();
CodeGenFunction(*this).GenerateCXXGlobalVarDeclInitFunc(Fn, D, Addr,
@@ -533,6 +564,22 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() {
CXXThreadLocals.clear();
}
+static SmallString<128> getTransformedFileName(llvm::Module &M) {
+ SmallString<128> FileName = llvm::sys::path::filename(M.getName());
+
+ if (FileName.empty())
+ FileName = "<null>";
+
+ for (size_t i = 0; i < FileName.size(); ++i) {
+ // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+ // to be the set of C preprocessing numbers.
+ if (!isPreprocessingNumberBody(FileName[i]))
+ FileName[i] = '_';
+ }
+
+ return FileName;
+}
+
void
CodeGenModule::EmitCXXGlobalInitFunc() {
while (!CXXGlobalInits.empty() && !CXXGlobalInits.back())
@@ -541,11 +588,27 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
if (CXXGlobalInits.empty() && PrioritizedCXXGlobalInits.empty())
return;
+ const bool UseSinitAndSterm = getCXXABI().useSinitAndSterm();
+ if (UseSinitAndSterm) {
+ GlobalUniqueModuleId = getUniqueModuleId(&getModule());
+
+ // FIXME: We need to figure out what to hash on or encode into the unique ID
+ // we need.
+ if (GlobalUniqueModuleId.compare("") == 0)
+ llvm::report_fatal_error(
+ "cannot produce a unique identifier for this module"
+ " based on strong external symbols");
+ GlobalUniqueModuleId = GlobalUniqueModuleId.substr(1);
+ }
+
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction();
- // Create our global initialization function.
+ // Create our global prioritized initialization function.
if (!PrioritizedCXXGlobalInits.empty()) {
+ assert(!UseSinitAndSterm && "Prioritized sinit and sterm functions are not"
+ " supported yet.");
+
SmallVector<llvm::Function *, 8> LocalCXXGlobalInits;
llvm::array_pod_sort(PrioritizedCXXGlobalInits.begin(),
PrioritizedCXXGlobalInits.end());
@@ -565,7 +628,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
std::string PrioritySuffix = llvm::utostr(Priority);
// Priority is always <= 65535 (enforced by sema).
PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix;
- llvm::Function *Fn = CreateGlobalInitOrDestructFunction(
+ llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction(
FTy, "_GLOBAL__I_" + PrioritySuffix, FI);
for (; I < PrioE; ++I)
@@ -577,22 +640,27 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
PrioritizedCXXGlobalInits.clear();
}
- // Include the filename in the symbol name. Including "sub_" matches gcc and
- // makes sure these symbols appear lexicographically behind the symbols with
- // priority emitted above.
- SmallString<128> FileName = llvm::sys::path::filename(getModule().getName());
- if (FileName.empty())
- FileName = "<null>";
+ if (UseSinitAndSterm && CXXGlobalInits.empty())
+ return;
- for (size_t i = 0; i < FileName.size(); ++i) {
- // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
- // to be the set of C preprocessing numbers.
- if (!isPreprocessingNumberBody(FileName[i]))
- FileName[i] = '_';
+ // Create our global initialization function.
+ SmallString<128> FuncName;
+ bool IsExternalLinkage = false;
+ if (UseSinitAndSterm) {
+ llvm::Twine("__sinit80000000_clang_", GlobalUniqueModuleId)
+ .toVector(FuncName);
+ IsExternalLinkage = true;
+ } else {
+ // Include the filename in the symbol name. Including "sub_" matches gcc
+ // and makes sure these symbols appear lexicographically behind the symbols
+ // with priority emitted above.
+ llvm::Twine("_GLOBAL__sub_I_", getTransformedFileName(getModule()))
+ .toVector(FuncName);
}
- llvm::Function *Fn = CreateGlobalInitOrDestructFunction(
- FTy, llvm::Twine("_GLOBAL__sub_I_", FileName), FI);
+ llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction(
+ FTy, FuncName, FI, SourceLocation(), false /* TLS */,
+ IsExternalLinkage);
CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits);
AddGlobalCtor(Fn);
@@ -618,19 +686,38 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
CXXGlobalInits.clear();
}
-void CodeGenModule::EmitCXXGlobalDtorFunc() {
- if (CXXGlobalDtors.empty())
+void CodeGenModule::EmitCXXGlobalCleanUpFunc() {
+ if (CXXGlobalDtorsOrStermFinalizers.empty())
return;
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
-
- // Create our global destructor function.
const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction();
- llvm::Function *Fn =
- CreateGlobalInitOrDestructFunction(FTy, "_GLOBAL__D_a", FI);
- CodeGenFunction(*this).GenerateCXXGlobalDtorsFunc(Fn, CXXGlobalDtors);
+ // Create our global cleanup function.
+ llvm::Function *Fn = nullptr;
+ if (getCXXABI().useSinitAndSterm()) {
+ if (GlobalUniqueModuleId.empty()) {
+ GlobalUniqueModuleId = getUniqueModuleId(&getModule());
+ // FIXME: We need to figure out what to hash on or encode into the unique
+ // ID we need.
+ if (GlobalUniqueModuleId.compare("") == 0)
+ llvm::report_fatal_error(
+ "cannot produce a unique identifier for this module"
+ " based on strong external symbols");
+ GlobalUniqueModuleId = GlobalUniqueModuleId.substr(1);
+ }
+
+ Fn = CreateGlobalInitOrCleanUpFunction(
+ FTy, llvm::Twine("__sterm80000000_clang_", GlobalUniqueModuleId), FI,
+ SourceLocation(), false /* TLS */, true /* IsExternalLinkage */);
+ } else {
+ Fn = CreateGlobalInitOrCleanUpFunction(FTy, "_GLOBAL__D_a", FI);
+ }
+
+ CodeGenFunction(*this).GenerateCXXGlobalCleanUpFunc(
+ Fn, CXXGlobalDtorsOrStermFinalizers);
AddGlobalDtor(Fn);
+ CXXGlobalDtorsOrStermFinalizers.clear();
}
/// Emit the code necessary to initialize the given global variable.
@@ -726,10 +813,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
FinishFunction();
}
-void CodeGenFunction::GenerateCXXGlobalDtorsFunc(
+void CodeGenFunction::GenerateCXXGlobalCleanUpFunc(
llvm::Function *Fn,
const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH,
- llvm::Constant *>> &DtorsAndObjects) {
+ llvm::Constant *>> &DtorsOrStermFinalizers) {
{
auto NL = ApplyDebugLocation::CreateEmpty(*this);
StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -737,13 +824,22 @@ void CodeGenFunction::GenerateCXXGlobalDtorsFunc(
// Emit an artificial location for this function.
auto AL = ApplyDebugLocation::CreateArtificial(*this);
- // Emit the dtors, in reverse order from construction.
- for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) {
+ // Emit the cleanups, in reverse order from construction.
+ for (unsigned i = 0, e = DtorsOrStermFinalizers.size(); i != e; ++i) {
llvm::FunctionType *CalleeTy;
llvm::Value *Callee;
llvm::Constant *Arg;
- std::tie(CalleeTy, Callee, Arg) = DtorsAndObjects[e - i - 1];
- llvm::CallInst *CI = Builder.CreateCall(CalleeTy, Callee, Arg);
+ std::tie(CalleeTy, Callee, Arg) = DtorsOrStermFinalizers[e - i - 1];
+
+ llvm::CallInst *CI = nullptr;
+ if (Arg == nullptr) {
+ assert(
+ CGM.getCXXABI().useSinitAndSterm() &&
+ "Arg could not be nullptr unless using sinit and sterm functions.");
+ CI = Builder.CreateCall(CalleeTy, Callee);
+ } else
+ CI = Builder.CreateCall(CalleeTy, Callee, Arg);
+
// Make sure the call and the callee agree on calling convention.
if (llvm::Function *F = dyn_cast<llvm::Function>(Callee))
CI->setCallingConv(F->getCallingConv());
@@ -767,7 +863,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
const CGFunctionInfo &FI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args);
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
+ llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, "__cxx_global_array_dtor", FI, VD->getLocation());
CurEHLocation = VD->getBeginLoc();
diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 53fafab3e0e6..bdf70252b5ad 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -468,6 +469,18 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) {
// encode these in an object file but MSVC doesn't do anything with it.
if (getTarget().getCXXABI().isMicrosoft())
return;
+ // In wasm we currently treat 'throw()' in the same way as 'noexcept'. In
+ // case of throw with types, we ignore it and print a warning for now.
+ // TODO Correctly handle exception specification in wasm
+ if (CGM.getLangOpts().WasmExceptions) {
+ if (EST == EST_DynamicNone)
+ EHStack.pushTerminate();
+ else
+ CGM.getDiags().Report(D->getLocation(),
+ diag::warn_wasm_dynamic_exception_spec_ignored)
+ << FD->getExceptionSpecSourceRange();
+ return;
+ }
unsigned NumExceptions = Proto->getNumExceptions();
EHFilterScope *Filter = EHStack.pushFilter(NumExceptions);
@@ -544,6 +557,14 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
// encode these in an object file but MSVC doesn't do anything with it.
if (getTarget().getCXXABI().isMicrosoft())
return;
+ // In wasm we currently treat 'throw()' in the same way as 'noexcept'. In
+ // case of throw with types, we ignore it and print a warning for now.
+ // TODO Correctly handle exception specification in wasm
+ if (CGM.getLangOpts().WasmExceptions) {
+ if (EST == EST_DynamicNone)
+ EHStack.popTerminate();
+ return;
+ }
EHFilterScope &filterScope = cast<EHFilterScope>(*EHStack.begin());
emitFilterDispatchBlock(*this, filterScope);
EHStack.popFilter();
@@ -630,9 +651,6 @@ CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
case EHScope::Terminate:
dispatchBlock = getTerminateHandler();
break;
-
- case EHScope::PadEnd:
- llvm_unreachable("PadEnd unnecessary for Itanium!");
}
scope.setCachedEHDispatchBlock(dispatchBlock);
}
@@ -674,9 +692,6 @@ CodeGenFunction::getFuncletEHDispatchBlock(EHScopeStack::stable_iterator SI) {
case EHScope::Terminate:
DispatchBlock->setName("terminate");
break;
-
- case EHScope::PadEnd:
- llvm_unreachable("PadEnd dispatch block missing!");
}
EHS.setCachedEHDispatchBlock(DispatchBlock);
return DispatchBlock;
@@ -692,7 +707,6 @@ static bool isNonEHScope(const EHScope &S) {
case EHScope::Filter:
case EHScope::Catch:
case EHScope::Terminate:
- case EHScope::PadEnd:
return false;
}
@@ -703,12 +717,12 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
assert(EHStack.requiresLandingPad());
assert(!EHStack.empty());
- // If exceptions are disabled and SEH is not in use, then there is no invoke
- // destination. SEH "works" even if exceptions are off. In practice, this
- // means that C++ destructors and other EH cleanups don't run, which is
+ // If exceptions are disabled/ignored and SEH is not in use, then there is no
+ // invoke destination. SEH "works" even if exceptions are off. In practice,
+ // this means that C++ destructors and other EH cleanups don't run, which is
// consistent with MSVC's behavior.
const LangOptions &LO = CGM.getLangOpts();
- if (!LO.Exceptions) {
+ if (!LO.Exceptions || LO.IgnoreExceptions) {
if (!LO.Borland && !LO.MicrosoftExt)
return nullptr;
if (!currentFunctionUsesSEHTry())
@@ -751,15 +765,14 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
assert(EHStack.requiresLandingPad());
-
+ assert(!CGM.getLangOpts().IgnoreExceptions &&
+ "LandingPad should not be emitted when -fignore-exceptions are in "
+ "effect.");
EHScope &innermostEHScope = *EHStack.find(EHStack.getInnermostEHScope());
switch (innermostEHScope.getKind()) {
case EHScope::Terminate:
return getTerminateLandingPad();
- case EHScope::PadEnd:
- llvm_unreachable("PadEnd unnecessary for Itanium!");
-
case EHScope::Catch:
case EHScope::Cleanup:
case EHScope::Filter:
@@ -825,9 +838,6 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
case EHScope::Catch:
break;
-
- case EHScope::PadEnd:
- llvm_unreachable("PadEnd unnecessary for Itanium!");
}
EHCatchScope &catchScope = cast<EHCatchScope>(*I);
@@ -1637,6 +1647,19 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup {
llvm::Value *IsForEH =
llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup());
+
+ // Except _leave and fall-through at the end, all other exits in a _try
+ // (return/goto/continue/break) are considered as abnormal terminations
+ // since _leave/fall-through is always Indexed 0,
+ // just use NormalCleanupDestSlot (>= 1 for goto/return/..),
+ // as 1st Arg to indicate abnormal termination
+ if (!F.isForEHCleanup() && F.hasExitSwitch()) {
+ Address Addr = CGF.getNormalCleanupDestSlot();
+ llvm::Value *Load = CGF.Builder.CreateLoad(Addr, "cleanup.dest");
+ llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int32Ty);
+ IsForEH = CGF.Builder.CreateICmpNE(Load, Zero);
+ }
+
Args.add(RValue::get(IsForEH), ArgTys[0]);
Args.add(RValue::get(FP), ArgTys[1]);
@@ -1792,6 +1815,48 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
llvm::Constant *ParentI8Fn =
llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
+
+ // if the parent is a _finally, the passed-in ParentFP is the FP
+ // of parent _finally, not Establisher's FP (FP of outermost function).
+ // Establkisher FP is 2nd paramenter passed into parent _finally.
+ // Fortunately, it's always saved in parent's frame. The following
+ // code retrieves it, and escapes it so that spill instruction won't be
+ // optimized away.
+ if (ParentCGF.ParentCGF != nullptr) {
+ // Locate and escape Parent's frame_pointer.addr alloca
+ // Depending on target, should be 1st/2nd one in LocalDeclMap.
+ // Let's just scan for ImplicitParamDecl with VoidPtrTy.
+ llvm::AllocaInst *FramePtrAddrAlloca = nullptr;
+ for (auto &I : ParentCGF.LocalDeclMap) {
+ const VarDecl *D = cast<VarDecl>(I.first);
+ if (isa<ImplicitParamDecl>(D) &&
+ D->getType() == getContext().VoidPtrTy) {
+ assert(D->getName().startswith("frame_pointer"));
+ FramePtrAddrAlloca = cast<llvm::AllocaInst>(I.second.getPointer());
+ break;
+ }
+ }
+ assert(FramePtrAddrAlloca);
+ auto InsertPair = ParentCGF.EscapedLocals.insert(
+ std::make_pair(FramePtrAddrAlloca, ParentCGF.EscapedLocals.size()));
+ int FrameEscapeIdx = InsertPair.first->second;
+
+ // an example of a filter's prolog::
+ // %0 = call i8* @llvm.eh.recoverfp(bitcast(@"?fin$0@0@main@@"),..)
+ // %1 = call i8* @llvm.localrecover(bitcast(@"?fin$0@0@main@@"),..)
+ // %2 = bitcast i8* %1 to i8**
+ // %3 = load i8*, i8* *%2, align 8
+ // ==> %3 is the frame-pointer of outermost host function
+ llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::localrecover);
+ llvm::Constant *ParentI8Fn =
+ llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+ ParentFP = Builder.CreateCall(
+ FrameRecoverFn, {ParentI8Fn, ParentFP,
+ llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
+ ParentFP = Builder.CreateBitCast(ParentFP, CGM.VoidPtrPtrTy);
+ ParentFP = Builder.CreateLoad(Address(ParentFP, getPointerAlign()));
+ }
}
// Create llvm.localrecover calls for all captures.
@@ -1885,7 +1950,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc());
CurSEHParent = ParentCGF.CurSEHParent;
- CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), CurFn, FnInfo);
EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter);
}
@@ -1990,6 +2055,7 @@ void CodeGenFunction::pushSEHCleanup(CleanupKind Kind,
void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
+ HelperCGF.ParentCGF = this;
if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
// Outline the finally block.
llvm::Function *FinallyFunc =
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 8e0604181fb1..9e8770573d70 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -27,6 +27,7 @@
#include "clang/AST/NSAPI.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
@@ -125,8 +126,8 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) {
assert(isa<llvm::AllocaInst>(Var.getPointer()));
- auto *Store = new llvm::StoreInst(Init, Var.getPointer());
- Store->setAlignment(Var.getAlignment().getAsAlign());
+ auto *Store = new llvm::StoreInst(Init, Var.getPointer(), /*volatile*/ false,
+ Var.getAlignment().getAsAlign());
llvm::BasicBlock *Block = AllocaInsertPt->getParent();
Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store);
}
@@ -144,8 +145,19 @@ Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
const Twine &Name, Address *Alloca) {
- return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name,
- /*ArraySize=*/nullptr, Alloca);
+ Address Result = CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name,
+ /*ArraySize=*/nullptr, Alloca);
+
+ if (Ty->isConstantMatrixType()) {
+ auto *ArrayTy = cast<llvm::ArrayType>(Result.getType()->getElementType());
+ auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(),
+ ArrayTy->getNumElements());
+
+ Result = Address(
+ Builder.CreateBitCast(Result.getPointer(), VectorTy->getPointerTo()),
+ Result.getAlignment());
+ }
+ return Result;
}
Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty, CharUnits Align,
@@ -415,6 +427,11 @@ static Address createReferenceTemporary(CodeGenFunction &CGF,
llvm_unreachable("unknown storage duration");
}
+/// Helper method to check if the underlying ABI is AAPCS
+static bool isAAPCS(const TargetInfo &TargetInfo) {
+ return TargetInfo.getABI().startswith("aapcs");
+}
+
LValue CodeGenFunction::
EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
const Expr *E = M->getSubExpr();
@@ -711,7 +728,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
if (SanOpts.has(SanitizerKind::ObjectSize) &&
!SkippedChecks.has(SanitizerKind::ObjectSize) &&
!Ty->isIncompleteType()) {
- uint64_t TySize = getContext().getTypeSizeInChars(Ty).getQuantity();
+ uint64_t TySize = CGM.getMinimumObjectSize(Ty).getQuantity();
llvm::Value *Size = llvm::ConstantInt::get(IntPtrTy, TySize);
if (ArraySize)
Size = Builder.CreateMul(Size, ArraySize);
@@ -742,7 +759,9 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
!SkippedChecks.has(SanitizerKind::Alignment)) {
AlignVal = Alignment.getQuantity();
if (!Ty->isIncompleteType() && !AlignVal)
- AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity();
+ AlignVal = CGM.getNaturalTypeAlignment(Ty, nullptr, nullptr,
+ /*ForPointeeType=*/true)
+ .getQuantity();
// The glvalue must be suitably aligned.
if (AlignVal > 1 &&
@@ -858,8 +877,12 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
static bool isFlexibleArrayMemberExpr(const Expr *E) {
// For compatibility with existing code, we treat arrays of length 0 or
// 1 as flexible array members.
+ // FIXME: This is inconsistent with the warning code in SemaChecking. Unify
+ // the two mechanisms.
const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe();
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
+ // FIXME: Sema doesn't treat [1] as a flexible array member if the bound
+ // was produced by macro expansion.
if (CAT->getSize().ugt(1))
return false;
} else if (!isa<IncompleteArrayType>(AT))
@@ -872,6 +895,10 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) {
// FIXME: If the base type of the member expr is not FD->getParent(),
// this should not be treated as a flexible array member access.
if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+ // FIXME: Sema doesn't treat a T[1] union member as a flexible array
+ // member, only a T[0] or T[] member gets that treatment.
+ if (FD->getParent()->isUnion())
+ return true;
RecordDecl::field_iterator FI(
DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
return ++FI == FD->getParent()->field_end();
@@ -1069,9 +1096,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
if (isa<ExplicitCastExpr>(CE)) {
LValueBaseInfo TargetTypeBaseInfo;
TBAAAccessInfo TargetTypeTBAAInfo;
- CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(),
- &TargetTypeBaseInfo,
- &TargetTypeTBAAInfo);
+ CharUnits Align = CGM.getNaturalPointeeTypeAlignment(
+ E->getType(), &TargetTypeBaseInfo, &TargetTypeTBAAInfo);
if (TBAAInfo)
*TBAAInfo = CGM.mergeTBAAInfoForCast(*TBAAInfo,
TargetTypeTBAAInfo);
@@ -1139,8 +1165,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
// TODO: conditional operators, comma.
// Otherwise, use the alignment of the type.
- CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo,
- TBAAInfo);
+ CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, TBAAInfo);
return Address(EmitScalarExpr(E), Align);
}
@@ -1276,8 +1302,15 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
return EmitVAArgExprLValue(cast<VAArgExpr>(E));
case Expr::DeclRefExprClass:
return EmitDeclRefLValue(cast<DeclRefExpr>(E));
- case Expr::ConstantExprClass:
+ case Expr::ConstantExprClass: {
+ const ConstantExpr *CE = cast<ConstantExpr>(E);
+ if (llvm::Value *Result = ConstantEmitter(*this).tryEmitConstantExpr(CE)) {
+ QualType RetType = cast<CallExpr>(CE->getSubExpr()->IgnoreImplicit())
+ ->getCallReturnType(getContext());
+ return MakeNaturalAlignAddrLValue(Result, RetType);
+ }
return EmitLValue(cast<ConstantExpr>(E)->getSubExpr());
+ }
case Expr::ParenExprClass:
return EmitLValue(cast<ParenExpr>(E)->getSubExpr());
case Expr::GenericSelectionExprClass:
@@ -1304,7 +1337,6 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
case Expr::ExprWithCleanupsClass: {
const auto *cleanups = cast<ExprWithCleanups>(E);
- enterFullExpression(cleanups);
RunCleanupsScope Scope(*this);
LValue LV = EmitLValue(cleanups->getSubExpr());
if (LV.isSimple()) {
@@ -1343,6 +1375,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
return EmitUnaryOpLValue(cast<UnaryOperator>(E));
case Expr::ArraySubscriptExprClass:
return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E));
+ case Expr::MatrixSubscriptExprClass:
+ return EmitMatrixSubscriptExpr(cast<MatrixSubscriptExpr>(E));
case Expr::OMPArraySectionExprClass:
return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E));
case Expr::ExtVectorElementExprClass:
@@ -1368,6 +1402,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
case Expr::CXXDynamicCastExprClass:
case Expr::CXXReinterpretCastExprClass:
case Expr::CXXConstCastExprClass:
+ case Expr::CXXAddrspaceCastExprClass:
case Expr::ObjCBridgedCastExprClass:
return EmitCastLValue(cast<CastExpr>(E));
@@ -1651,15 +1686,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
if (VTy->getNumElements() == 3) {
// Bitcast to vec4 type.
- llvm::VectorType *vec4Ty =
- llvm::VectorType::get(VTy->getElementType(), 4);
+ auto *vec4Ty = llvm::FixedVectorType::get(VTy->getElementType(), 4);
Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
// Now load value.
llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
// Shuffle vector to get vec3.
V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
- {0, 1, 2}, "extractVec");
+ ArrayRef<int>{0, 1, 2}, "extractVec");
return EmitFromMemory(V, Ty);
}
}
@@ -1716,6 +1750,42 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
return Value;
}
+// Convert the pointer of \p Addr to a pointer to a vector (the value type of
+// MatrixType), if it points to a array (the memory type of MatrixType).
+static Address MaybeConvertMatrixAddress(Address Addr, CodeGenFunction &CGF,
+ bool IsVector = true) {
+ auto *ArrayTy = dyn_cast<llvm::ArrayType>(
+ cast<llvm::PointerType>(Addr.getPointer()->getType())->getElementType());
+ if (ArrayTy && IsVector) {
+ auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(),
+ ArrayTy->getNumElements());
+
+ return Address(CGF.Builder.CreateElementBitCast(Addr, VectorTy));
+ }
+ auto *VectorTy = dyn_cast<llvm::VectorType>(
+ cast<llvm::PointerType>(Addr.getPointer()->getType())->getElementType());
+ if (VectorTy && !IsVector) {
+ auto *ArrayTy = llvm::ArrayType::get(VectorTy->getElementType(),
+ VectorTy->getNumElements());
+
+ return Address(CGF.Builder.CreateElementBitCast(Addr, ArrayTy));
+ }
+
+ return Addr;
+}
+
+// Emit a store of a matrix LValue. This may require casting the original
+// pointer to memory address (ArrayType) to a pointer to the value type
+// (VectorType).
+static void EmitStoreOfMatrixScalar(llvm::Value *value, LValue lvalue,
+ bool isInit, CodeGenFunction &CGF) {
+ Address Addr = MaybeConvertMatrixAddress(lvalue.getAddress(CGF), CGF,
+ value->getType()->isVectorTy());
+ CGF.EmitStoreOfScalar(value, Addr, lvalue.isVolatile(), lvalue.getType(),
+ lvalue.getBaseInfo(), lvalue.getTBAAInfo(), isInit,
+ lvalue.isNontemporal());
+}
+
void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
bool Volatile, QualType Ty,
LValueBaseInfo BaseInfo,
@@ -1729,13 +1799,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
// Handle vec3 special.
if (VecTy && VecTy->getNumElements() == 3) {
// Our source is a vec3, do a shuffle vector to make it a vec4.
- llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
- Builder.getInt32(2),
- llvm::UndefValue::get(Builder.getInt32Ty())};
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
- MaskV, "extractVec");
- SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ ArrayRef<int>{0, 1, 2, -1},
+ "extractVec");
+ SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4);
}
if (Addr.getElementType() != SrcTy) {
Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
@@ -1766,11 +1833,26 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
bool isInit) {
+ if (lvalue.getType()->isConstantMatrixType()) {
+ EmitStoreOfMatrixScalar(value, lvalue, isInit, *this);
+ return;
+ }
+
EmitStoreOfScalar(value, lvalue.getAddress(*this), lvalue.isVolatile(),
lvalue.getType(), lvalue.getBaseInfo(),
lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal());
}
+// Emit a load of a LValue of matrix type. This may require casting the pointer
+// to memory address (ArrayType) to a pointer to the value type (VectorType).
+static RValue EmitLoadOfMatrixLValue(LValue LV, SourceLocation Loc,
+ CodeGenFunction &CGF) {
+ assert(LV.getType()->isConstantMatrixType());
+ Address Addr = MaybeConvertMatrixAddress(LV.getAddress(CGF), CGF);
+ LV.setAddress(Addr);
+ return RValue::get(CGF.EmitLoadOfScalar(LV, Loc));
+}
+
/// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
/// method emits the address of the lvalue, then loads the result as an rvalue,
/// returning the rvalue.
@@ -1796,6 +1878,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
if (LV.isSimple()) {
assert(!LV.getType()->isFunctionType());
+ if (LV.getType()->isConstantMatrixType())
+ return EmitLoadOfMatrixLValue(LV, Loc, *this);
+
// Everything needs a load.
return RValue::get(EmitLoadOfScalar(LV, Loc));
}
@@ -1809,13 +1894,21 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
// If this is a reference to a subset of the elements of a vector, either
// shuffle the input or extract/insert them as appropriate.
- if (LV.isExtVectorElt())
+ if (LV.isExtVectorElt()) {
return EmitLoadOfExtVectorElementLValue(LV);
+ }
// Global Register variables always invoke intrinsics
if (LV.isGlobalReg())
return EmitLoadOfGlobalRegLValue(LV);
+ if (LV.isMatrixElt()) {
+ llvm::LoadInst *Load =
+ Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
+ return RValue::get(
+ Builder.CreateExtractElement(Load, LV.getMatrixIdx(), "matrixext"));
+ }
+
assert(LV.isBitField() && "Unknown LValue type!");
return EmitLoadOfBitfieldLValue(LV, Loc);
}
@@ -1870,13 +1963,12 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
// Always use shuffle vector to try to retain the original program structure
unsigned NumResultElts = ExprVT->getNumElements();
- SmallVector<llvm::Constant*, 4> Mask;
+ SmallVector<int, 4> Mask;
for (unsigned i = 0; i != NumResultElts; ++i)
- Mask.push_back(Builder.getInt32(getAccessedFieldNo(i, Elts)));
+ Mask.push_back(getAccessedFieldNo(i, Elts));
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
Vec = Builder.CreateShuffleVector(Vec, llvm::UndefValue::get(Vec->getType()),
- MaskV);
+ Mask);
return RValue::get(Vec);
}
@@ -1922,7 +2014,6 @@ RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) {
return RValue::get(Call);
}
-
/// EmitStoreThroughLValue - Store the specified rvalue into the specified
/// lvalue, where both are guaranteed to the have the same type, and that type
/// is 'Ty'.
@@ -1948,6 +2039,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
if (Dst.isGlobalReg())
return EmitStoreThroughGlobalRegLValue(Src, Dst);
+ if (Dst.isMatrixElt()) {
+ llvm::Value *Vec = Builder.CreateLoad(Dst.getMatrixAddress());
+ Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
+ Dst.getMatrixIdx(), "matins");
+ Builder.CreateStore(Vec, Dst.getMatrixAddress(),
+ Dst.isVolatileQualified());
+ return;
+ }
+
assert(Dst.isBitField() && "Unknown LValue type");
return EmitStoreThroughBitfieldLValue(Src, Dst);
}
@@ -2066,6 +2166,14 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set");
} else {
assert(Info.Offset == 0);
+ // According to the AACPS:
+ // When a volatile bit-field is written, and its container does not overlap
+ // with any non-bit-field member, its container must be read exactly once and
+ // written exactly once using the access width appropriate to the type of the
+ // container. The two accesses are not atomic.
+ if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) &&
+ CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad)
+ Builder.CreateLoad(Ptr, true, "bf.load");
}
// Write the new value back out.
@@ -2103,37 +2211,33 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
unsigned NumSrcElts = VTy->getNumElements();
- unsigned NumDstElts = Vec->getType()->getVectorNumElements();
+ unsigned NumDstElts =
+ cast<llvm::VectorType>(Vec->getType())->getNumElements();
if (NumDstElts == NumSrcElts) {
// Use shuffle vector is the src and destination are the same number of
// elements and restore the vector mask since it is on the side it will be
// stored.
- SmallVector<llvm::Constant*, 4> Mask(NumDstElts);
+ SmallVector<int, 4> Mask(NumDstElts);
for (unsigned i = 0; i != NumSrcElts; ++i)
- Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i);
+ Mask[getAccessedFieldNo(i, Elts)] = i;
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
- Vec = Builder.CreateShuffleVector(SrcVal,
- llvm::UndefValue::get(Vec->getType()),
- MaskV);
+ Vec = Builder.CreateShuffleVector(
+ SrcVal, llvm::UndefValue::get(Vec->getType()), Mask);
} else if (NumDstElts > NumSrcElts) {
// Extended the source vector to the same length and then shuffle it
// into the destination.
// FIXME: since we're shuffling with undef, can we just use the indices
// into that? This could be simpler.
- SmallVector<llvm::Constant*, 4> ExtMask;
+ SmallVector<int, 4> ExtMask;
for (unsigned i = 0; i != NumSrcElts; ++i)
- ExtMask.push_back(Builder.getInt32(i));
- ExtMask.resize(NumDstElts, llvm::UndefValue::get(Int32Ty));
- llvm::Value *ExtMaskV = llvm::ConstantVector::get(ExtMask);
- llvm::Value *ExtSrcVal =
- Builder.CreateShuffleVector(SrcVal,
- llvm::UndefValue::get(SrcVal->getType()),
- ExtMaskV);
+ ExtMask.push_back(i);
+ ExtMask.resize(NumDstElts, -1);
+ llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(
+ SrcVal, llvm::UndefValue::get(SrcVal->getType()), ExtMask);
// build identity
- SmallVector<llvm::Constant*, 4> Mask;
+ SmallVector<int, 4> Mask;
for (unsigned i = 0; i != NumDstElts; ++i)
- Mask.push_back(Builder.getInt32(i));
+ Mask.push_back(i);
// When the vector size is odd and .odd or .hi is used, the last element
// of the Elts constant array will be one past the size of the vector.
@@ -2143,9 +2247,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
// modify when what gets shuffled in
for (unsigned i = 0; i != NumSrcElts; ++i)
- Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i+NumDstElts);
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
- Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, MaskV);
+ Mask[getAccessedFieldNo(i, Elts)] = i + NumDstElts;
+ Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, Mask);
} else {
// We should never shorten the vector
llvm_unreachable("unexpected shorten vector length");
@@ -2295,7 +2398,13 @@ EmitBitCastOfLValueToProperType(CodeGenFunction &CGF,
static LValue EmitThreadPrivateVarDeclLValue(
CodeGenFunction &CGF, const VarDecl *VD, QualType T, Address Addr,
llvm::Type *RealVarTy, SourceLocation Loc) {
- Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc);
+ if (CGF.CGM.getLangOpts().OpenMPIRBuilder)
+ Addr = CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
+ CGF, VD, Addr, Loc);
+ else
+ Addr =
+ CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc);
+
Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy);
return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
}
@@ -2327,9 +2436,9 @@ CodeGenFunction::EmitLoadOfReference(LValue RefLVal,
Builder.CreateLoad(RefLVal.getAddress(*this), RefLVal.isVolatile());
CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo());
- CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(),
- PointeeBaseInfo, PointeeTBAAInfo,
- /* forPointeeType= */ true);
+ CharUnits Align = CGM.getNaturalTypeAlignment(
+ RefLVal.getType()->getPointeeType(), PointeeBaseInfo, PointeeTBAAInfo,
+ /* forPointeeType= */ true);
return Address(Load, Align);
}
@@ -2347,9 +2456,9 @@ Address CodeGenFunction::EmitLoadOfPointer(Address Ptr,
LValueBaseInfo *BaseInfo,
TBAAAccessInfo *TBAAInfo) {
llvm::Value *Addr = Builder.CreateLoad(Ptr);
- return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(),
- BaseInfo, TBAAInfo,
- /*forPointeeType=*/true));
+ return Address(Addr, CGM.getNaturalTypeAlignment(PtrTy->getPointeeType(),
+ BaseInfo, TBAAInfo,
+ /*forPointeeType=*/true));
}
LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr,
@@ -2397,13 +2506,14 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
}
static llvm::Constant *EmitFunctionDeclPointer(CodeGenModule &CGM,
- const FunctionDecl *FD) {
+ GlobalDecl GD) {
+ const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
if (FD->hasAttr<WeakRefAttr>()) {
ConstantAddress aliasee = CGM.GetWeakRefReference(FD);
return aliasee.getPointer();
}
- llvm::Constant *V = CGM.GetAddrOfFunction(FD);
+ llvm::Constant *V = CGM.GetAddrOfFunction(GD);
if (!FD->hasPrototype()) {
if (const FunctionProtoType *Proto =
FD->getType()->getAs<FunctionProtoType>()) {
@@ -2420,9 +2530,10 @@ static llvm::Constant *EmitFunctionDeclPointer(CodeGenModule &CGM,
return V;
}
-static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF,
- const Expr *E, const FunctionDecl *FD) {
- llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD);
+static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E,
+ GlobalDecl GD) {
+ const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+ llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, GD);
CharUnits Alignment = CGF.getContext().getDeclAlign(FD);
return CGF.MakeAddrLValue(V, E->getType(), Alignment,
AlignmentSource::Decl);
@@ -2552,10 +2663,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
} else {
// Should we be using the alignment of the constant pointer we emitted?
CharUnits Alignment =
- getNaturalTypeAlignment(E->getType(),
- /* BaseInfo= */ nullptr,
- /* TBAAInfo= */ nullptr,
- /* forPointeeType= */ true);
+ CGM.getNaturalTypeAlignment(E->getType(),
+ /* BaseInfo= */ nullptr,
+ /* TBAAInfo= */ nullptr,
+ /* forPointeeType= */ true);
Addr = Address(Val, Alignment);
}
return MakeAddrLValue(Addr, T, AlignmentSource::Decl);
@@ -2689,6 +2800,12 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
if (const auto *BD = dyn_cast<BindingDecl>(ND))
return EmitLValue(BD->getBinding());
+ // We can form DeclRefExprs naming GUID declarations when reconstituting
+ // non-type template parameters into expressions.
+ if (const auto *GD = dyn_cast<MSGuidDecl>(ND))
+ return MakeAddrLValue(CGM.GetAddrOfMSGuidDecl(GD), T,
+ AlignmentSource::Decl);
+
llvm_unreachable("Unhandled DeclRefExpr");
}
@@ -2779,7 +2896,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
- std::string Name = SL->getString();
+ std::string Name = std::string(SL->getString());
if (!Name.empty()) {
unsigned Discriminator =
CGM.getCXXABI().getMangleContext().getBlockId(BD, true);
@@ -2788,7 +2905,8 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
auto C = CGM.GetAddrOfConstantCString(Name, GVName.c_str());
return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
} else {
- auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str());
+ auto C =
+ CGM.GetAddrOfConstantCString(std::string(FnName), GVName.c_str());
return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
}
}
@@ -2918,7 +3036,8 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
FilenameString = llvm::sys::path::filename(FilenameString);
}
- auto FilenameGV = CGM.GetAddrOfConstantCString(FilenameString, ".src");
+ auto FilenameGV =
+ CGM.GetAddrOfConstantCString(std::string(FilenameString), ".src");
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(
cast<llvm::GlobalVariable>(FilenameGV.getPointer()));
Filename = FilenameGV.getPointer();
@@ -3665,6 +3784,23 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
return LV;
}
+LValue CodeGenFunction::EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E) {
+ assert(
+ !E->isIncomplete() &&
+ "incomplete matrix subscript expressions should be rejected during Sema");
+ LValue Base = EmitLValue(E->getBase());
+ llvm::Value *RowIdx = EmitScalarExpr(E->getRowIdx());
+ llvm::Value *ColIdx = EmitScalarExpr(E->getColumnIdx());
+ llvm::Value *NumRows = Builder.getIntN(
+ RowIdx->getType()->getScalarSizeInBits(),
+ E->getBase()->getType()->getAs<ConstantMatrixType>()->getNumRows());
+ llvm::Value *FinalIdx =
+ Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx);
+ return LValue::MakeMatrixElt(
+ MaybeConvertMatrixAddress(Base.getAddress(*this), *this), FinalIdx,
+ E->getBase()->getType(), Base.getBaseInfo(), TBAAAccessInfo());
+}
+
static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
LValueBaseInfo &BaseInfo,
TBAAAccessInfo &TBAAInfo,
@@ -3695,8 +3831,8 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
}
LValueBaseInfo TypeBaseInfo;
TBAAAccessInfo TypeTBAAInfo;
- CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeBaseInfo,
- &TypeTBAAInfo);
+ CharUnits Align =
+ CGF.CGM.getNaturalTypeAlignment(ElTy, &TypeBaseInfo, &TypeTBAAInfo);
BaseInfo.mergeForCast(TypeBaseInfo);
TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo);
return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress(CGF)), Align);
@@ -3713,7 +3849,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
else
ResultExprTy = BaseTy->getPointeeType();
llvm::Value *Idx = nullptr;
- if (IsLowerBound || E->getColonLoc().isInvalid()) {
+ if (IsLowerBound || E->getColonLocFirst().isInvalid()) {
// Requesting lower bound or upper bound, but without provided length and
// without ':' symbol for the default length -> length = 1.
// Idx = LowerBound ?: 0;
@@ -4020,17 +4156,17 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
return CGF.Builder.CreateStructGEP(base, idx, field->getName());
}
-static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base,
- const FieldDecl *field) {
+static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base,
+ Address addr, const FieldDecl *field) {
const RecordDecl *rec = field->getParent();
- llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateRecordType(
- CGF.getContext().getRecordType(rec), rec->getLocation());
+ llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateStandaloneType(
+ base.getType(), rec->getLocation());
unsigned idx =
CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
return CGF.Builder.CreatePreserveStructAccessIndex(
- base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo);
+ addr, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo);
}
static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
@@ -4154,8 +4290,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
if (IsInPreservedAIRegion ||
(getDebugInfo() && rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
// Remember the original union field index
- llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
- getContext().getRecordType(rec), rec->getLocation());
+ llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(base.getType(),
+ rec->getLocation());
addr = Address(
Builder.CreatePreserveUnionAccessIndex(
addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo),
@@ -4172,7 +4308,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
addr = emitAddrOfFieldStorage(*this, addr, field);
else
// Remember the original struct field index
- addr = emitPreserveStructAccess(*this, addr, field);
+ addr = emitPreserveStructAccess(*this, base, addr, field);
}
// If this is a reference field, load the reference right now.
@@ -4248,6 +4384,14 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(),
/*Init*/ true);
+ // Block-scope compound literals are destroyed at the end of the enclosing
+ // scope in C.
+ if (!getLangOpts().CPlusPlus)
+ if (QualType::DestructionKind DtorKind = E->getType().isDestructedType())
+ pushLifetimeExtendedDestroy(getCleanupKind(DtorKind), DeclPtr,
+ E->getType(), getDestroyer(DtorKind),
+ DtorKind & EHCleanup);
+
return Result;
}
@@ -4295,6 +4439,16 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
// If the true case is live, we need to track its region.
if (CondExprBool)
incrementProfileCounter(expr);
+ // If a throw expression we emit it and return an undefined lvalue
+ // because it can't be used.
+ if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(live->IgnoreParens())) {
+ EmitCXXThrowExpr(ThrowExpr);
+ llvm::Type *Ty =
+ llvm::PointerType::getUnqual(ConvertType(dead->getType()));
+ return MakeAddrLValue(
+ Address(llvm::UndefValue::get(Ty), CharUnits::One()),
+ dead->getType());
+ }
return EmitLValue(live);
}
}
@@ -4620,7 +4774,8 @@ RValue CodeGenFunction::EmitSimpleCallExpr(const CallExpr *E,
return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue);
}
-static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) {
+static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
+ const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
if (auto builtinID = FD->getBuiltinID()) {
// Replaceable builtin provide their own implementation of a builtin. Unless
@@ -4632,8 +4787,8 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) {
return CGCallee::forBuiltin(builtinID, FD);
}
- llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD);
- return CGCallee::forDirect(calleePtr, GlobalDecl(FD));
+ llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
+ return CGCallee::forDirect(calleePtr, GD);
}
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
@@ -4774,7 +4929,7 @@ CodeGenFunction::EmitCXXTypeidLValue(const CXXTypeidExpr *E) {
}
Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) {
- return Builder.CreateElementBitCast(CGM.GetAddrOfUuidDescriptor(E),
+ return Builder.CreateElementBitCast(CGM.GetAddrOfMSGuidDecl(E->getGuidDecl()),
ConvertType(E->getType()));
}
@@ -5019,7 +5174,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
// to the function type.
if (isa<FunctionNoProtoType>(FnType) || Chain) {
llvm::Type *CalleeTy = getTypes().GetFunctionType(FnInfo);
- CalleeTy = CalleeTy->getPointerTo();
+ int AS = Callee.getFunctionPointer()->getType()->getPointerAddressSpace();
+ CalleeTy = CalleeTy->getPointerTo(AS);
llvm::Value *CalleePtr = Callee.getFunctionPointer();
CalleePtr = Builder.CreateBitCast(CalleePtr, CalleeTy, "callee.knr.cast");
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 8de609a2ccd9..fb96d70732e8 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -15,6 +15,7 @@
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
+#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"
@@ -126,6 +127,11 @@ public:
}
void VisitConstantExpr(ConstantExpr *E) {
+ if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
+ CGF.EmitAggregateStore(Result, Dest.getAddress(),
+ E->getType().isVolatileQualified());
+ return;
+ }
return Visit(E->getSubExpr());
}
@@ -249,7 +255,7 @@ void AggExprEmitter::withReturnValueSlot(
const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) {
QualType RetTy = E->getType();
bool RequiresDestruction =
- Dest.isIgnored() &&
+ !Dest.isExternallyDestructed() &&
RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct;
// If it makes no observable difference, save a memcpy + temporary.
@@ -287,10 +293,8 @@ void AggExprEmitter::withReturnValueSlot(
}
RValue Src =
- EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused));
-
- if (RequiresDestruction)
- CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy);
+ EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused,
+ Dest.isExternallyDestructed()));
if (!UseTemp)
return;
@@ -659,22 +663,32 @@ AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
}
AggValueSlot Slot = EnsureSlot(E->getType());
+
+ // Block-scope compound literals are destroyed at the end of the enclosing
+ // scope in C.
+ bool Destruct =
+ !CGF.getLangOpts().CPlusPlus && !Slot.isExternallyDestructed();
+ if (Destruct)
+ Slot.setExternallyDestructed();
+
CGF.EmitAggExpr(E->getInitializer(), Slot);
+
+ if (Destruct)
+ if (QualType::DestructionKind DtorKind = E->getType().isDestructedType())
+ CGF.pushLifetimeExtendedDestroy(
+ CGF.getCleanupKind(DtorKind), Slot.getAddress(), E->getType(),
+ CGF.getDestroyer(DtorKind), DtorKind & EHCleanup);
}
/// Attempt to look through various unimportant expressions to find a
/// cast of the given kind.
-static Expr *findPeephole(Expr *op, CastKind kind) {
- while (true) {
- op = op->IgnoreParens();
- if (CastExpr *castE = dyn_cast<CastExpr>(op)) {
- if (castE->getCastKind() == kind)
- return castE->getSubExpr();
- if (castE->getCastKind() == CK_NoOp)
- continue;
- }
- return nullptr;
+static Expr *findPeephole(Expr *op, CastKind kind, const ASTContext &ctx) {
+ op = op->IgnoreParenNoopCasts(ctx);
+ if (auto castE = dyn_cast<CastExpr>(op)) {
+ if (castE->getCastKind() == kind)
+ return castE->getSubExpr();
}
+ return nullptr;
}
void AggExprEmitter::VisitCastExpr(CastExpr *E) {
@@ -763,7 +777,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
(isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic);
// These two cases are reverses of each other; try to peephole them.
- if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) {
+ if (Expr *op =
+ findPeephole(E->getSubExpr(), peepholeTarget, CGF.getContext())) {
assert(CGF.getContext().hasSameUnqualifiedType(op->getType(),
E->getType()) &&
"peephole significantly changed types?");
@@ -813,8 +828,19 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
// If we're loading from a volatile type, force the destination
// into existence.
if (E->getSubExpr()->getType().isVolatileQualified()) {
+ bool Destruct =
+ !Dest.isExternallyDestructed() &&
+ E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
+ if (Destruct)
+ Dest.setExternallyDestructed();
EnsureDest(E->getType());
- return Visit(E->getSubExpr());
+ Visit(E->getSubExpr());
+
+ if (Destruct)
+ CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(),
+ E->getType());
+
+ return;
}
LLVM_FALLTHROUGH;
@@ -1328,7 +1354,6 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) {
}
void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
- CGF.enterFullExpression(E);
CodeGenFunction::RunCleanupsScope cleanups(CGF);
Visit(E->getSubExpr());
}
@@ -1923,6 +1948,18 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
}
}
+ if (getLangOpts().CUDAIsDevice) {
+ if (Ty->isCUDADeviceBuiltinSurfaceType()) {
+ if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest,
+ Src))
+ return;
+ } else if (Ty->isCUDADeviceBuiltinTextureType()) {
+ if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest,
+ Src))
+ return;
+ }
+ }
+
// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 42c1c34c57ad..d59aa6ce0fb9 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -112,7 +112,8 @@ RValue CodeGenFunction::EmitCXXDestructorCall(
commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam,
ImplicitParamTy, CE, Args, nullptr);
return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee,
- ReturnValueSlot(), Args);
+ ReturnValueSlot(), Args, nullptr,
+ CE ? CE->getExprLoc() : SourceLocation{});
}
RValue CodeGenFunction::EmitCXXPseudoDestructorExpr(
@@ -380,7 +381,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
IsArrow ? Base->getType()->getPointeeType() : Base->getType();
EmitCXXDestructorCall(GD, Callee, This.getPointer(*this), ThisTy,
/*ImplicitParam=*/nullptr,
- /*ImplicitParamTy=*/QualType(), nullptr);
+ /*ImplicitParamTy=*/QualType(), CE);
}
return RValue::get(nullptr);
}
@@ -1637,6 +1638,12 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
RValue RV =
EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
+ // Set !heapallocsite metadata on the call to operator new.
+ if (getDebugInfo())
+ if (auto *newCall = dyn_cast<llvm::CallBase>(RV.getScalarVal()))
+ getDebugInfo()->addHeapAllocSiteMetadata(newCall, allocType,
+ E->getExprLoc());
+
// If this was a call to a global replaceable allocation function that does
// not take an alignment argument, the allocator is known to produce
// storage that's suitably aligned for any object that fits, up to a known
@@ -1866,10 +1873,13 @@ static void EmitDestroyingObjectDelete(CodeGenFunction &CGF,
}
/// Emit the code for deleting a single object.
-static void EmitObjectDelete(CodeGenFunction &CGF,
+/// \return \c true if we started emitting UnconditionalDeleteBlock, \c false
+/// if not.
+static bool EmitObjectDelete(CodeGenFunction &CGF,
const CXXDeleteExpr *DE,
Address Ptr,
- QualType ElementType) {
+ QualType ElementType,
+ llvm::BasicBlock *UnconditionalDeleteBlock) {
// C++11 [expr.delete]p3:
// If the static type of the object to be deleted is different from its
// dynamic type, the static type shall be a base class of the dynamic type
@@ -1916,7 +1926,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
if (UseVirtualCall) {
CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType,
Dtor);
- return;
+ return false;
}
}
}
@@ -1951,7 +1961,15 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
}
}
+ // When optimizing for size, call 'operator delete' unconditionally.
+ if (CGF.CGM.getCodeGenOpts().OptimizeSize > 1) {
+ CGF.EmitBlock(UnconditionalDeleteBlock);
+ CGF.PopCleanupBlock();
+ return true;
+ }
+
CGF.PopCleanupBlock();
+ return false;
}
namespace {
@@ -2028,6 +2046,12 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
Address Ptr = EmitPointerWithAlignment(Arg);
// Null check the pointer.
+ //
+ // We could avoid this null check if we can determine that the object
+ // destruction is trivial and doesn't require an array cookie; we can
+ // unconditionally perform the operator delete call in that case. For now, we
+ // assume that deleted pointers are null rarely enough that it's better to
+ // keep the branch. This might be worth revisiting for a -O0 code size win.
llvm::BasicBlock *DeleteNotNull = createBasicBlock("delete.notnull");
llvm::BasicBlock *DeleteEnd = createBasicBlock("delete.end");
@@ -2073,11 +2097,11 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
if (E->isArrayForm()) {
EmitArrayDelete(*this, E, Ptr, DeleteTy);
+ EmitBlock(DeleteEnd);
} else {
- EmitObjectDelete(*this, E, Ptr, DeleteTy);
+ if (!EmitObjectDelete(*this, E, Ptr, DeleteTy, DeleteEnd))
+ EmitBlock(DeleteEnd);
}
-
- EmitBlock(DeleteEnd);
}
static bool isGLValueFromPointerDeref(const Expr *E) {
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index f7a4e9e94712..a49817898ae3 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -13,6 +13,7 @@
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "ConstantEmitter.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
@@ -97,11 +98,14 @@ public:
}
ComplexPairTy VisitStmt(Stmt *S) {
- S->dump(CGF.getContext().getSourceManager());
+ S->dump(llvm::errs(), CGF.getContext());
llvm_unreachable("Stmt can't have complex result type!");
}
ComplexPairTy VisitExpr(Expr *S);
ComplexPairTy VisitConstantExpr(ConstantExpr *E) {
+ if (llvm::Constant *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E))
+ return ComplexPairTy(Result->getAggregateElement(0U),
+ Result->getAggregateElement(1U));
return Visit(E->getSubExpr());
}
ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());}
@@ -222,7 +226,6 @@ public:
return Visit(DIE->getExpr());
}
ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) {
- CGF.enterFullExpression(E);
CodeGenFunction::RunCleanupsScope Scope(CGF);
ComplexPairTy Vals = Visit(E->getSubExpr());
// Defend against dominance problems caused by jumps out of expression
@@ -431,8 +434,10 @@ ComplexPairTy ComplexExprEmitter::EmitComplexToComplexCast(ComplexPairTy Val,
// C99 6.3.1.6: When a value of complex type is converted to another
// complex type, both the real and imaginary parts follow the conversion
// rules for the corresponding real types.
- Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType, Loc);
- Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType, Loc);
+ if (Val.first)
+ Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType, Loc);
+ if (Val.second)
+ Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType, Loc);
return Val;
}
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 46ed90a20264..c6b2930faece 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -318,12 +318,17 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) {
CharUnits Offset = Offsets[Index];
if (auto *CA = dyn_cast<llvm::ConstantAggregate>(C)) {
+ // Expand the sequence into its contained elements.
+ // FIXME: This assumes vector elements are byte-sized.
replace(Elems, Index, Index + 1,
llvm::map_range(llvm::seq(0u, CA->getNumOperands()),
[&](unsigned Op) { return CA->getOperand(Op); }));
- if (auto *Seq = dyn_cast<llvm::SequentialType>(CA->getType())) {
+ if (isa<llvm::ArrayType>(CA->getType()) ||
+ isa<llvm::VectorType>(CA->getType())) {
// Array or vector.
- CharUnits ElemSize = getSize(Seq->getElementType());
+ llvm::Type *ElemTy =
+ llvm::GetElementPtrInst::getTypeAtIndex(CA->getType(), (uint64_t)0);
+ CharUnits ElemSize = getSize(ElemTy);
replace(
Offsets, Index, Index + 1,
llvm::map_range(llvm::seq(0u, CA->getNumOperands()),
@@ -344,6 +349,8 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) {
}
if (auto *CDS = dyn_cast<llvm::ConstantDataSequential>(C)) {
+ // Expand the sequence into its contained elements.
+ // FIXME: This assumes vector elements are byte-sized.
// FIXME: If possible, split into two ConstantDataSequentials at Hint.
CharUnits ElemSize = getSize(CDS->getElementType());
replace(Elems, Index, Index + 1,
@@ -359,6 +366,7 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) {
}
if (isa<llvm::ConstantAggregateZero>(C)) {
+ // Split into two zeros at the hinted offset.
CharUnits ElemSize = getSize(C);
assert(Hint > Offset && Hint < Offset + ElemSize && "nothing to split");
replace(Elems, Index, Index + 1,
@@ -368,6 +376,7 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) {
}
if (isa<llvm::UndefValue>(C)) {
+ // Drop undef; it doesn't contribute to the final layout.
replace(Elems, Index, Index + 1, {});
replace(Offsets, Index, Index + 1, {});
return true;
@@ -589,19 +598,21 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
bool ConstStructBuilder::AppendBitField(
const FieldDecl *Field, uint64_t FieldOffset, llvm::ConstantInt *CI,
bool AllowOverwrite) {
- uint64_t FieldSize = Field->getBitWidthValue(CGM.getContext());
+ const CGRecordLayout &RL =
+ CGM.getTypes().getCGRecordLayout(Field->getParent());
+ const CGBitFieldInfo &Info = RL.getBitFieldInfo(Field);
llvm::APInt FieldValue = CI->getValue();
// Promote the size of FieldValue if necessary
// FIXME: This should never occur, but currently it can because initializer
// constants are cast to bool, and because clang is not enforcing bitfield
// width limits.
- if (FieldSize > FieldValue.getBitWidth())
- FieldValue = FieldValue.zext(FieldSize);
+ if (Info.Size > FieldValue.getBitWidth())
+ FieldValue = FieldValue.zext(Info.Size);
// Truncate the size of FieldValue to the bit field size.
- if (FieldSize < FieldValue.getBitWidth())
- FieldValue = FieldValue.trunc(FieldSize);
+ if (Info.Size < FieldValue.getBitWidth())
+ FieldValue = FieldValue.trunc(Info.Size);
return Builder.addBits(FieldValue,
CGM.getContext().toBits(StartOffset) + FieldOffset,
@@ -766,7 +777,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
if (const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD)) {
// Add a vtable pointer, if we need one and it hasn't already been added.
- if (CD->isDynamicClass() && !IsPrimaryBase) {
+ if (Layout.hasOwnVFPtr()) {
llvm::Constant *VTableAddressPoint =
CGM.getCXXABI().getVTableAddressPointForConstExpr(
BaseSubobject(CD, Offset), VTableClass);
@@ -1000,6 +1011,8 @@ public:
}
llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) {
+ if (llvm::Constant *Result = Emitter.tryEmitConstantExpr(CE))
+ return Result;
return Visit(CE->getSubExpr(), T);
}
@@ -1167,9 +1180,7 @@ public:
}
llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E, QualType T) {
- if (!E->cleanupsHaveSideEffects())
- return Visit(E->getSubExpr(), T);
- return nullptr;
+ return Visit(E->getSubExpr(), T);
}
llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E,
@@ -1269,19 +1280,7 @@ public:
if (!E->getConstructor()->isTrivial())
return nullptr;
- // FIXME: We should not have to call getBaseElementType here.
- const auto *RT =
- CGM.getContext().getBaseElementType(Ty)->castAs<RecordType>();
- const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
-
- // If the class doesn't have a trivial destructor, we can't emit it as a
- // constant expr.
- if (!RD->hasTrivialDestructor())
- return nullptr;
-
- // Only copy and default constructors can be trivial.
-
-
+ // Only default and copy/move constructors can be trivial.
if (E->getNumArgs()) {
assert(E->getNumArgs() == 1 && "trivial ctor with > 1 argument");
assert(E->getConstructor()->isCopyOrMoveConstructor() &&
@@ -1361,6 +1360,20 @@ ConstantEmitter::tryEmitAbstract(const APValue &value, QualType destType) {
return validateAndPopAbstract(C, state);
}
+llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) {
+ if (!CE->hasAPValueResult())
+ return nullptr;
+ const Expr *Inner = CE->getSubExpr()->IgnoreImplicit();
+ QualType RetType;
+ if (auto *Call = dyn_cast<CallExpr>(Inner))
+ RetType = Call->getCallReturnType(CGF->getContext());
+ else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Inner))
+ RetType = Ctor->getType();
+ llvm::Constant *Res =
+ emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType);
+ return Res;
+}
+
llvm::Constant *
ConstantEmitter::emitAbstract(const Expr *E, QualType destType) {
auto state = pushAbstract();
@@ -1769,7 +1782,6 @@ private:
ConstantLValue VisitCallExpr(const CallExpr *E);
ConstantLValue VisitBlockExpr(const BlockExpr *E);
ConstantLValue VisitCXXTypeidExpr(const CXXTypeidExpr *E);
- ConstantLValue VisitCXXUuidofExpr(const CXXUuidofExpr *E);
ConstantLValue VisitMaterializeTemporaryExpr(
const MaterializeTemporaryExpr *E);
@@ -1884,6 +1896,9 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
}
}
+ if (auto *GD = dyn_cast<MSGuidDecl>(D))
+ return CGM.GetAddrOfMSGuidDecl(GD);
+
return nullptr;
}
@@ -1904,6 +1919,8 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
ConstantLValue
ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) {
+ if (llvm::Constant *Result = Emitter.tryEmitConstantExpr(E))
+ return Result;
return Visit(E->getSubExpr());
}
@@ -1994,11 +2011,6 @@ ConstantLValueEmitter::VisitCXXTypeidExpr(const CXXTypeidExpr *E) {
}
ConstantLValue
-ConstantLValueEmitter::VisitCXXUuidofExpr(const CXXUuidofExpr *E) {
- return CGM.GetAddrOfUuidDescriptor(E);
-}
-
-ConstantLValue
ConstantLValueEmitter::VisitMaterializeTemporaryExpr(
const MaterializeTemporaryExpr *E) {
assert(E->getStorageDuration() == SD_Static);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 3f23fe11e4f5..6131f97995dc 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/MatrixBuilder.h"
#include "llvm/IR/Module.h"
#include <cstdarg>
@@ -129,11 +130,10 @@ struct BinOpInfo {
return true;
}
- /// Check if either operand is a fixed point type or integer type, with at
- /// least one being a fixed point type. In any case, this
- /// operation did not follow usual arithmetic conversion and both operands may
- /// not be the same.
- bool isFixedPointBinOp() const {
+ /// Check if at least one operand is a fixed point type. In such cases, this
+ /// operation did not follow usual arithmetic conversion and both operands
+ /// might not be of the same type.
+ bool isFixedPointOp() const {
// We cannot simply check the result type since comparison operations return
// an int.
if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) {
@@ -141,6 +141,8 @@ struct BinOpInfo {
QualType RHSType = BinOp->getRHS()->getType();
return LHSType->isFixedPointType() || RHSType->isFixedPointType();
}
+ if (const auto *UnOp = dyn_cast<UnaryOperator>(E))
+ return UnOp->getSubExpr()->getType()->isFixedPointType();
return false;
}
};
@@ -213,22 +215,6 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
(2 * Ctx.getTypeSize(RHSTy)) < PromotedSize;
}
-/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions.
-static void updateFastMathFlags(llvm::FastMathFlags &FMF,
- FPOptions FPFeatures) {
- FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement());
-}
-
-/// Propagate fast-math flags from \p Op to the instruction in \p V.
-static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) {
- if (auto *I = dyn_cast<llvm::Instruction>(V)) {
- llvm::FastMathFlags FMF = I->getFastMathFlags();
- updateFastMathFlags(FMF, Op.FPFeatures);
- I->setFastMathFlags(FMF);
- }
- return V;
-}
-
class ScalarExprEmitter
: public StmtVisitor<ScalarExprEmitter, Value*> {
CodeGenFunction &CGF;
@@ -297,7 +283,7 @@ public:
Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue);
- CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI);
+ CGF.emitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI);
}
/// EmitLoadOfLValue - Given an expression with complex type that represents a
@@ -427,12 +413,18 @@ public:
}
Value *VisitStmt(Stmt *S) {
- S->dump(CGF.getContext().getSourceManager());
+ S->dump(llvm::errs(), CGF.getContext());
llvm_unreachable("Stmt can't have complex result type!");
}
Value *VisitExpr(Expr *S);
Value *VisitConstantExpr(ConstantExpr *E) {
+ if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
+ if (E->isGLValue())
+ return CGF.Builder.CreateLoad(Address(
+ Result, CGF.getContext().getTypeAlignInChars(E->getType())));
+ return Result;
+ }
return Visit(E->getSubExpr());
}
Value *VisitParenExpr(ParenExpr *PE) {
@@ -551,11 +543,17 @@ public:
}
Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E);
+ Value *VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E);
Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E);
Value *VisitConvertVectorExpr(ConvertVectorExpr *E);
Value *VisitMemberExpr(MemberExpr *E);
Value *VisitExtVectorElementExpr(Expr *E) { return EmitLoadOfLValue(E); }
Value *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
+ // Strictly speaking, we shouldn't be calling EmitLoadOfLValue, which
+ // transitively calls EmitCompoundLiteralLValue, here in C++ since compound
+ // literals aren't l-values in C++. We do so simply because that's the
+ // cleanest way to handle compound literals in C++.
+ // See the discussion here: https://reviews.llvm.org/D64464
return EmitLoadOfLValue(E);
}
@@ -680,6 +678,10 @@ public:
return Builder.getInt1(E->isSatisfied());
}
+ Value *VisitRequiresExpr(const RequiresExpr *E) {
+ return Builder.getInt1(E->isSatisfied());
+ }
+
Value *VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) {
return llvm::ConstantInt::get(Builder.getInt32Ty(), E->getValue());
}
@@ -728,15 +730,34 @@ public:
}
}
+ if (Ops.Ty->isConstantMatrixType()) {
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ // We need to check the types of the operands of the operator to get the
+ // correct matrix dimensions.
+ auto *BO = cast<BinaryOperator>(Ops.E);
+ auto *LHSMatTy = dyn_cast<ConstantMatrixType>(
+ BO->getLHS()->getType().getCanonicalType());
+ auto *RHSMatTy = dyn_cast<ConstantMatrixType>(
+ BO->getRHS()->getType().getCanonicalType());
+ if (LHSMatTy && RHSMatTy)
+ return MB.CreateMatrixMultiply(Ops.LHS, Ops.RHS, LHSMatTy->getNumRows(),
+ LHSMatTy->getNumColumns(),
+ RHSMatTy->getNumColumns());
+ return MB.CreateScalarMultiply(Ops.LHS, Ops.RHS);
+ }
+
if (Ops.Ty->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
!CanElideOverflowCheck(CGF.getContext(), Ops))
return EmitOverflowCheckedBinOp(Ops);
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
- Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
- return propagateFMFlags(V, Ops);
+ // Preserve the old values
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+ return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
}
+ if (Ops.isFixedPointOp())
+ return EmitFixedPointBinOp(Ops);
return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul");
}
/// Create a binary op that checks for overflow.
@@ -748,6 +769,11 @@ public:
llvm::Value *Zero,bool isDiv);
// Common helper for getting how wide LHS of shift is.
static Value *GetWidthMinusOneValue(Value* LHS,Value* RHS);
+
+ // Used for shifting constraints for OpenCL, do mask for powers of 2, URem for
+ // non powers of two.
+ Value *ConstrainShiftValue(Value *LHS, Value *RHS, const Twine &Name);
+
Value *EmitDiv(const BinOpInfo &Ops);
Value *EmitRem(const BinOpInfo &Ops);
Value *EmitAdd(const BinOpInfo &Ops);
@@ -1297,7 +1323,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
"Splatted expr doesn't match with vector element type?");
// Splat the element across to all elements
- unsigned NumElements = DstTy->getVectorNumElements();
+ unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
return Builder.CreateVectorSplat(NumElements, Src, "splat");
}
@@ -1315,8 +1341,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
// short or half vector.
// Source and destination are both expected to be vectors.
- llvm::Type *SrcElementTy = SrcTy->getVectorElementType();
- llvm::Type *DstElementTy = DstTy->getVectorElementType();
+ llvm::Type *SrcElementTy = cast<llvm::VectorType>(SrcTy)->getElementType();
+ llvm::Type *DstElementTy = cast<llvm::VectorType>(DstTy)->getElementType();
(void)DstElementTy;
assert(((SrcElementTy->isIntegerTy() &&
@@ -1622,8 +1648,8 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
// n = extract mask i
// x = extract val n
// newv = insert newv, x, i
- llvm::VectorType *RTy = llvm::VectorType::get(LTy->getElementType(),
- MTy->getNumElements());
+ auto *RTy = llvm::FixedVectorType::get(LTy->getElementType(),
+ MTy->getNumElements());
Value* NewV = llvm::UndefValue::get(RTy);
for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) {
Value *IIndx = llvm::ConstantInt::get(CGF.SizeTy, i);
@@ -1638,18 +1664,17 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
Value* V1 = CGF.EmitScalarExpr(E->getExpr(0));
Value* V2 = CGF.EmitScalarExpr(E->getExpr(1));
- SmallVector<llvm::Constant*, 32> indices;
+ SmallVector<int, 32> Indices;
for (unsigned i = 2; i < E->getNumSubExprs(); ++i) {
llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2);
// Check for -1 and output it as undef in the IR.
if (Idx.isSigned() && Idx.isAllOnesValue())
- indices.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+ Indices.push_back(-1);
else
- indices.push_back(Builder.getInt32(Idx.getZExtValue()));
+ Indices.push_back(Idx.getZExtValue());
}
- Value *SV = llvm::ConstantVector::get(indices);
- return Builder.CreateShuffleVector(V1, V2, SV, "shuffle");
+ return Builder.CreateShuffleVector(V1, V2, Indices, "shuffle");
}
Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
@@ -1682,8 +1707,8 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
assert(DstTy->isVectorTy() &&
"ConvertVector destination IR type must be a vector");
- llvm::Type *SrcEltTy = SrcTy->getVectorElementType(),
- *DstEltTy = DstTy->getVectorElementType();
+ llvm::Type *SrcEltTy = cast<llvm::VectorType>(SrcTy)->getElementType(),
+ *DstEltTy = cast<llvm::VectorType>(DstTy)->getElementType();
if (DstEltType->isBooleanType()) {
assert((SrcEltTy->isFloatingPointTy() ||
@@ -1764,22 +1789,34 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
return Builder.CreateExtractElement(Base, Idx, "vecext");
}
-static llvm::Constant *getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
- unsigned Off, llvm::Type *I32Ty) {
+Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
+ TestAndClearIgnoreResultAssign();
+
+ // Handle the vector case. The base must be a vector, the index must be an
+ // integer value.
+ Value *RowIdx = Visit(E->getRowIdx());
+ Value *ColumnIdx = Visit(E->getColumnIdx());
+ Value *Matrix = Visit(E->getBase());
+
+ // TODO: Should we emit bounds checks with SanitizerKind::ArrayBounds?
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ return MB.CreateExtractElement(
+ Matrix, RowIdx, ColumnIdx,
+ E->getBase()->getType()->getAs<ConstantMatrixType>()->getNumRows());
+}
+
+static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
+ unsigned Off) {
int MV = SVI->getMaskValue(Idx);
if (MV == -1)
- return llvm::UndefValue::get(I32Ty);
- return llvm::ConstantInt::get(I32Ty, Off+MV);
+ return -1;
+ return Off + MV;
}
-static llvm::Constant *getAsInt32(llvm::ConstantInt *C, llvm::Type *I32Ty) {
- if (C->getBitWidth() != 32) {
- assert(llvm::ConstantInt::isValueValidForType(I32Ty,
- C->getZExtValue()) &&
- "Index operand too large for shufflevector mask!");
- return llvm::ConstantInt::get(I32Ty, C->getZExtValue());
- }
- return C;
+static int getAsInt32(llvm::ConstantInt *C, llvm::Type *I32Ty) {
+ assert(llvm::ConstantInt::isValueValidForType(I32Ty, C->getZExtValue()) &&
+ "Index operand too large for shufflevector mask!");
+ return C->getZExtValue();
}
Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
@@ -1816,7 +1853,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
for (unsigned i = 0; i != NumInitElements; ++i) {
Expr *IE = E->getInit(i);
Value *Init = Visit(IE);
- SmallVector<llvm::Constant*, 16> Args;
+ SmallVector<int, 16> Args;
llvm::VectorType *VVT = dyn_cast<llvm::VectorType>(Init->getType());
@@ -1834,7 +1871,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
// insert into undef -> shuffle (src, undef)
// shufflemask must use an i32
Args.push_back(getAsInt32(C, CGF.Int32Ty));
- Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+ Args.resize(ResElts, -1);
LHS = EI->getVectorOperand();
RHS = V;
@@ -1843,17 +1880,16 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
// insert into undefshuffle && size match -> shuffle (v, src)
llvm::ShuffleVectorInst *SVV = cast<llvm::ShuffleVectorInst>(V);
for (unsigned j = 0; j != CurIdx; ++j)
- Args.push_back(getMaskElt(SVV, j, 0, CGF.Int32Ty));
- Args.push_back(Builder.getInt32(ResElts + C->getZExtValue()));
- Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+ Args.push_back(getMaskElt(SVV, j, 0));
+ Args.push_back(ResElts + C->getZExtValue());
+ Args.resize(ResElts, -1);
LHS = cast<llvm::ShuffleVectorInst>(V)->getOperand(0);
RHS = EI->getVectorOperand();
VIsUndefShuffle = false;
}
if (!Args.empty()) {
- llvm::Constant *Mask = llvm::ConstantVector::get(Args);
- V = Builder.CreateShuffleVector(LHS, RHS, Mask);
+ V = Builder.CreateShuffleVector(LHS, RHS, Args);
++CurIdx;
continue;
}
@@ -1882,15 +1918,14 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
// If the current vector initializer is a shuffle with undef, merge
// this shuffle directly into it.
if (VIsUndefShuffle) {
- Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0,
- CGF.Int32Ty));
+ Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0));
} else {
- Args.push_back(Builder.getInt32(j));
+ Args.push_back(j);
}
}
for (unsigned j = 0, je = InitElts; j != je; ++j)
- Args.push_back(getMaskElt(SVI, j, Offset, CGF.Int32Ty));
- Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+ Args.push_back(getMaskElt(SVI, j, Offset));
+ Args.resize(ResElts, -1);
if (VIsUndefShuffle)
V = cast<llvm::ShuffleVectorInst>(V)->getOperand(0);
@@ -1903,26 +1938,24 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
// to the vector initializer into V.
if (Args.empty()) {
for (unsigned j = 0; j != InitElts; ++j)
- Args.push_back(Builder.getInt32(j));
- Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
- llvm::Constant *Mask = llvm::ConstantVector::get(Args);
- Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT),
- Mask, "vext");
+ Args.push_back(j);
+ Args.resize(ResElts, -1);
+ Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT), Args,
+ "vext");
Args.clear();
for (unsigned j = 0; j != CurIdx; ++j)
- Args.push_back(Builder.getInt32(j));
+ Args.push_back(j);
for (unsigned j = 0; j != InitElts; ++j)
- Args.push_back(Builder.getInt32(j+Offset));
- Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+ Args.push_back(j + Offset);
+ Args.resize(ResElts, -1);
}
// If V is undef, make sure it ends up on the RHS of the shuffle to aid
// merging subsequent shuffles into this one.
if (CurIdx == 0)
std::swap(V, Init);
- llvm::Constant *Mask = llvm::ConstantVector::get(Args);
- V = Builder.CreateShuffleVector(V, Init, Mask, "vecinit");
+ V = Builder.CreateShuffleVector(V, Init, Args, "vecinit");
VIsUndefShuffle = isa<llvm::UndefValue>(Init);
CurIdx += InitElts;
}
@@ -2036,11 +2069,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
}
}
- // Update heapallocsite metadata when there is an explicit cast.
- if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(Src))
- if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE))
- CGF.getDebugInfo()->
- addHeapAllocSiteMetadata(CI, CE->getType(), CE->getExprLoc());
+ // Update heapallocsite metadata when there is an explicit pointer cast.
+ if (auto *CI = dyn_cast<llvm::CallBase>(Src)) {
+ if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) {
+ QualType PointeeType = DestTy->getPointeeType();
+ if (!PointeeType.isNull())
+ CGF.getDebugInfo()->addHeapAllocSiteMetadata(CI, PointeeType,
+ CE->getExprLoc());
+ }
+ }
return Builder.CreateBitCast(Src, DstTy);
}
@@ -2210,7 +2247,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
llvm::Type *DstTy = ConvertType(DestTy);
Value *Elt = Visit(const_cast<Expr*>(E));
// Splat the element across to all elements
- unsigned NumElements = DstTy->getVectorNumElements();
+ unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
return Builder.CreateVectorSplat(NumElements, Elt, "splat");
}
@@ -2311,7 +2348,6 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) {
}
Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
- CGF.enterFullExpression(E);
CodeGenFunction::RunCleanupsScope Scope(CGF);
Value *V = Visit(E->getSubExpr());
// Defend against dominance problems caused by jumps out of expression
@@ -2325,13 +2361,14 @@ Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
//===----------------------------------------------------------------------===//
static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E,
- llvm::Value *InVal, bool IsInc) {
+ llvm::Value *InVal, bool IsInc,
+ FPOptions FPFeatures) {
BinOpInfo BinOp;
BinOp.LHS = InVal;
BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false);
BinOp.Ty = E->getType();
BinOp.Opcode = IsInc ? BO_Add : BO_Sub;
- // FIXME: once UnaryOperator carries FPFeatures, copy it here.
+ BinOp.FPFeatures = FPFeatures;
BinOp.E = E;
return BinOp;
}
@@ -2351,7 +2388,8 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
case LangOptions::SOB_Trapping:
if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
- return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
+ return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(
+ E, InVal, IsInc, E->getFPFeaturesInEffect(CGF.getLangOpts())));
}
llvm_unreachable("Unknown SignedOverflowBehaviorTy");
}
@@ -2497,8 +2535,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
} else if (E->canOverflow() && type->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
- value =
- EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc));
+ value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(
+ E, value, isInc, E->getFPFeaturesInEffect(CGF.getLangOpts())));
} else {
llvm::Value *amt = llvm::ConstantInt::get(value->getType(), amount, true);
value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec");
@@ -2609,6 +2647,36 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
}
}
+ // Fixed-point types.
+ } else if (type->isFixedPointType()) {
+ // Fixed-point types are tricky. In some cases, it isn't possible to
+ // represent a 1 or a -1 in the type at all. Piggyback off of
+ // EmitFixedPointBinOp to avoid having to reimplement saturation.
+ BinOpInfo Info;
+ Info.E = E;
+ Info.Ty = E->getType();
+ Info.Opcode = isInc ? BO_Add : BO_Sub;
+ Info.LHS = value;
+ Info.RHS = llvm::ConstantInt::get(value->getType(), 1, false);
+ // If the type is signed, it's better to represent this as +(-1) or -(-1),
+ // since -1 is guaranteed to be representable.
+ if (type->isSignedFixedPointType()) {
+ Info.Opcode = isInc ? BO_Sub : BO_Add;
+ Info.RHS = Builder.CreateNeg(Info.RHS);
+ }
+ // Now, convert from our invented integer literal to the type of the unary
+ // op. This will upscale and saturate if necessary. This value can become
+ // undef in some cases.
+ FixedPointSemantics SrcSema =
+ FixedPointSemantics::GetIntegerSemantics(value->getType()
+ ->getScalarSizeInBits(),
+ /*IsSigned=*/true);
+ FixedPointSemantics DstSema =
+ CGF.getContext().getFixedPointSemantics(Info.Ty);
+ Info.RHS = EmitFixedPointConversion(Info.RHS, SrcSema, DstSema,
+ E->getExprLoc());
+ value = EmitFixedPointBinOp(Info);
+
// Objective-C pointer types.
} else {
const ObjCObjectPointerType *OPT = type->castAs<ObjCObjectPointerType>();
@@ -2668,7 +2736,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType());
BinOp.Ty = E->getType();
BinOp.Opcode = BO_Sub;
- // FIXME: once UnaryOperator carries FPFeatures, copy it here.
+ BinOp.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
BinOp.E = E;
return EmitSub(BinOp);
}
@@ -2681,13 +2749,17 @@ Value *ScalarExprEmitter::VisitUnaryNot(const UnaryOperator *E) {
Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
// Perform vector logical not on comparison with zero vector.
- if (E->getType()->isExtVectorType()) {
+ if (E->getType()->isVectorType() &&
+ E->getType()->castAs<VectorType>()->getVectorKind() ==
+ VectorType::GenericVector) {
Value *Oper = Visit(E->getSubExpr());
Value *Zero = llvm::Constant::getNullValue(Oper->getType());
Value *Result;
- if (Oper->getType()->isFPOrFPVectorTy())
+ if (Oper->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(
+ CGF, E->getFPFeaturesInEffect(CGF.getLangOpts()));
Result = Builder.CreateFCmp(llvm::CmpInst::FCMP_OEQ, Oper, Zero, "cmp");
- else
+ } else
Result = Builder.CreateICmp(llvm::CmpInst::ICMP_EQ, Oper, Zero, "cmp");
return Builder.CreateSExt(Result, ConvertType(E->getType()), "sext");
}
@@ -2888,7 +2960,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
Result.RHS = Visit(E->getRHS());
Result.Ty = E->getType();
Result.Opcode = E->getOpcode();
- Result.FPFeatures = E->getFPFeatures();
+ Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
Result.E = E;
return Result;
}
@@ -2908,7 +2980,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
OpInfo.RHS = Visit(E->getRHS());
OpInfo.Ty = E->getComputationResultType();
OpInfo.Opcode = E->getOpcode();
- OpInfo.FPFeatures = E->getFPFeatures();
+ OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
OpInfo.E = E;
// Load/convert the LHS.
LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
@@ -3096,7 +3168,9 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
}
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
- llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
+ llvm::Value *Val;
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+ Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
if (CGF.getLangOpts().OpenCL &&
!CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
@@ -3112,6 +3186,8 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
}
return Val;
}
+ else if (Ops.isFixedPointOp())
+ return EmitFixedPointBinOp(Ops);
else if (Ops.Ty->hasUnsignedIntegerRepresentation())
return Builder.CreateUDiv(Ops.LHS, Ops.RHS, "div");
else
@@ -3361,7 +3437,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
// the add operand respectively. This allows fmuladd to represent a*b-c, or
// c-a*b. Patterns in LLVM should catch the negated forms and translate them to
// efficient operations.
-static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend,
+static Value* buildFMulAdd(llvm::Instruction *MulOp, Value *Addend,
const CodeGenFunction &CGF, CGBuilderTy &Builder,
bool negMul, bool negAdd) {
assert(!(negMul && negAdd) && "Only one of negMul and negAdd should be set.");
@@ -3373,12 +3449,23 @@ static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend,
if (negAdd)
Addend = Builder.CreateFNeg(Addend, "neg");
- Value *FMulAdd = Builder.CreateCall(
- CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()),
- {MulOp0, MulOp1, Addend});
- MulOp->eraseFromParent();
+ Value *FMulAdd = nullptr;
+ if (Builder.getIsFPConstrained()) {
+ assert(isa<llvm::ConstrainedFPIntrinsic>(MulOp) &&
+ "Only constrained operation should be created when Builder is in FP "
+ "constrained mode");
+ FMulAdd = Builder.CreateConstrainedFPCall(
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::experimental_constrained_fmuladd,
+ Addend->getType()),
+ {MulOp0, MulOp1, Addend});
+ } else {
+ FMulAdd = Builder.CreateCall(
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()),
+ {MulOp0, MulOp1, Addend});
+ }
+ MulOp->eraseFromParent();
- return FMulAdd;
+ return FMulAdd;
}
// Check whether it would be legal to emit an fmuladd intrinsic call to
@@ -3413,6 +3500,19 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
}
+ if (auto *LHSBinOp = dyn_cast<llvm::CallBase>(op.LHS)) {
+ if (LHSBinOp->getIntrinsicID() ==
+ llvm::Intrinsic::experimental_constrained_fmul &&
+ LHSBinOp->use_empty())
+ return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub);
+ }
+ if (auto *RHSBinOp = dyn_cast<llvm::CallBase>(op.RHS)) {
+ if (RHSBinOp->getIntrinsicID() ==
+ llvm::Intrinsic::experimental_constrained_fmul &&
+ RHSBinOp->use_empty())
+ return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
+ }
+
return nullptr;
}
@@ -3436,21 +3536,26 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
}
}
+ if (op.Ty->isConstantMatrixType()) {
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ return MB.CreateAdd(op.LHS, op.RHS);
+ }
+
if (op.Ty->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
!CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);
if (op.LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
// Try to form an fmuladd.
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
return FMulAdd;
- Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add");
- return propagateFMFlags(V, op);
+ return Builder.CreateFAdd(op.LHS, op.RHS, "add");
}
- if (op.isFixedPointBinOp())
+ if (op.isFixedPointOp())
return EmitFixedPointBinOp(op);
return Builder.CreateAdd(op.LHS, op.RHS, "add");
@@ -3462,14 +3567,27 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) {
using llvm::APSInt;
using llvm::ConstantInt;
- const auto *BinOp = cast<BinaryOperator>(op.E);
-
- // The result is a fixed point type and at least one of the operands is fixed
- // point while the other is either fixed point or an int. This resulting type
- // should be determined by Sema::handleFixedPointConversions().
+ // This is either a binary operation where at least one of the operands is
+ // a fixed-point type, or a unary operation where the operand is a fixed-point
+ // type. The result type of a binary operation is determined by
+ // Sema::handleFixedPointConversions().
QualType ResultTy = op.Ty;
- QualType LHSTy = BinOp->getLHS()->getType();
- QualType RHSTy = BinOp->getRHS()->getType();
+ QualType LHSTy, RHSTy;
+ if (const auto *BinOp = dyn_cast<BinaryOperator>(op.E)) {
+ RHSTy = BinOp->getRHS()->getType();
+ if (const auto *CAO = dyn_cast<CompoundAssignOperator>(BinOp)) {
+ // For compound assignment, the effective type of the LHS at this point
+ // is the computation LHS type, not the actual LHS type, and the final
+ // result type is not the type of the expression but rather the
+ // computation result type.
+ LHSTy = CAO->getComputationLHSType();
+ ResultTy = CAO->getComputationResultType();
+ } else
+ LHSTy = BinOp->getLHS()->getType();
+ } else if (const auto *UnOp = dyn_cast<UnaryOperator>(op.E)) {
+ LHSTy = UnOp->getSubExpr()->getType();
+ RHSTy = UnOp->getSubExpr()->getType();
+ }
ASTContext &Ctx = CGF.getContext();
Value *LHS = op.LHS;
Value *RHS = op.RHS;
@@ -3481,16 +3599,17 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) {
// Convert the operands to the full precision type.
Value *FullLHS = EmitFixedPointConversion(LHS, LHSFixedSema, CommonFixedSema,
- BinOp->getExprLoc());
+ op.E->getExprLoc());
Value *FullRHS = EmitFixedPointConversion(RHS, RHSFixedSema, CommonFixedSema,
- BinOp->getExprLoc());
+ op.E->getExprLoc());
- // Perform the actual addition.
+ // Perform the actual operation.
Value *Result;
- switch (BinOp->getOpcode()) {
+ switch (op.Opcode) {
+ case BO_AddAssign:
case BO_Add: {
- if (ResultFixedSema.isSaturated()) {
- llvm::Intrinsic::ID IID = ResultFixedSema.isSigned()
+ if (CommonFixedSema.isSaturated()) {
+ llvm::Intrinsic::ID IID = CommonFixedSema.isSigned()
? llvm::Intrinsic::sadd_sat
: llvm::Intrinsic::uadd_sat;
Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS);
@@ -3499,9 +3618,10 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) {
}
break;
}
+ case BO_SubAssign:
case BO_Sub: {
- if (ResultFixedSema.isSaturated()) {
- llvm::Intrinsic::ID IID = ResultFixedSema.isSigned()
+ if (CommonFixedSema.isSaturated()) {
+ llvm::Intrinsic::ID IID = CommonFixedSema.isSigned()
? llvm::Intrinsic::ssub_sat
: llvm::Intrinsic::usub_sat;
Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS);
@@ -3510,6 +3630,32 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) {
}
break;
}
+ case BO_MulAssign:
+ case BO_Mul: {
+ llvm::Intrinsic::ID IID;
+ if (CommonFixedSema.isSaturated())
+ IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::smul_fix_sat
+ : llvm::Intrinsic::umul_fix_sat;
+ else
+ IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::smul_fix
+ : llvm::Intrinsic::umul_fix;
+ Result = Builder.CreateIntrinsic(IID, {FullLHS->getType()},
+ {FullLHS, FullRHS, Builder.getInt32(CommonFixedSema.getScale())});
+ break;
+ }
+ case BO_DivAssign:
+ case BO_Div: {
+ llvm::Intrinsic::ID IID;
+ if (CommonFixedSema.isSaturated())
+ IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::sdiv_fix_sat
+ : llvm::Intrinsic::udiv_fix_sat;
+ else
+ IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::sdiv_fix
+ : llvm::Intrinsic::udiv_fix;
+ Result = Builder.CreateIntrinsic(IID, {FullLHS->getType()},
+ {FullLHS, FullRHS, Builder.getInt32(CommonFixedSema.getScale())});
+ break;
+ }
case BO_LT:
return CommonFixedSema.isSigned() ? Builder.CreateICmpSLT(FullLHS, FullRHS)
: Builder.CreateICmpULT(FullLHS, FullRHS);
@@ -3529,17 +3675,11 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) {
return Builder.CreateICmpEQ(FullLHS, FullRHS);
case BO_NE:
return Builder.CreateICmpNE(FullLHS, FullRHS);
- case BO_Mul:
- case BO_Div:
case BO_Shl:
case BO_Shr:
case BO_Cmp:
case BO_LAnd:
case BO_LOr:
- case BO_MulAssign:
- case BO_DivAssign:
- case BO_AddAssign:
- case BO_SubAssign:
case BO_ShlAssign:
case BO_ShrAssign:
llvm_unreachable("Found unimplemented fixed point binary operation");
@@ -3560,7 +3700,7 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) {
// Convert to the result type.
return EmitFixedPointConversion(Result, CommonFixedSema, ResultFixedSema,
- BinOp->getExprLoc());
+ op.E->getExprLoc());
}
Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
@@ -3581,20 +3721,25 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
}
}
+ if (op.Ty->isConstantMatrixType()) {
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ return MB.CreateSub(op.LHS, op.RHS);
+ }
+
if (op.Ty->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
!CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);
if (op.LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
// Try to form an fmuladd.
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
return FMulAdd;
- Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub");
- return propagateFMFlags(V, op);
+ return Builder.CreateFSub(op.LHS, op.RHS, "sub");
}
- if (op.isFixedPointBinOp())
+ if (op.isFixedPointOp())
return EmitFixedPointBinOp(op);
return Builder.CreateSub(op.LHS, op.RHS, "sub");
@@ -3666,6 +3811,21 @@ Value *ScalarExprEmitter::GetWidthMinusOneValue(Value* LHS,Value* RHS) {
return llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth() - 1);
}
+Value *ScalarExprEmitter::ConstrainShiftValue(Value *LHS, Value *RHS,
+ const Twine &Name) {
+ llvm::IntegerType *Ty;
+ if (auto *VT = dyn_cast<llvm::VectorType>(LHS->getType()))
+ Ty = cast<llvm::IntegerType>(VT->getElementType());
+ else
+ Ty = cast<llvm::IntegerType>(LHS->getType());
+
+ if (llvm::isPowerOf2_64(Ty->getBitWidth()))
+ return Builder.CreateAnd(RHS, GetWidthMinusOneValue(LHS, RHS), Name);
+
+ return Builder.CreateURem(
+ RHS, llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth()), Name);
+}
+
Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
// LLVM requires the LHS and RHS to be the same type: promote or truncate the
// RHS to the same size as the LHS.
@@ -3676,12 +3836,11 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) &&
Ops.Ty->hasSignedIntegerRepresentation() &&
!CGF.getLangOpts().isSignedOverflowDefined() &&
- !CGF.getLangOpts().CPlusPlus2a;
+ !CGF.getLangOpts().CPlusPlus20;
bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
// OpenCL 6.3j: shift values are effectively % word size of LHS.
if (CGF.getLangOpts().OpenCL)
- RHS =
- Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
+ RHS = ConstrainShiftValue(Ops.LHS, RHS, "shl.mask");
else if ((SanitizeBase || SanitizeExponent) &&
isa<llvm::IntegerType>(Ops.LHS->getType())) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
@@ -3743,8 +3902,7 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
// OpenCL 6.3j: shift values are effectively % word size of LHS.
if (CGF.getLangOpts().OpenCL)
- RHS =
- Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
+ RHS = ConstrainShiftValue(Ops.LHS, RHS, "shr.mask");
else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) &&
isa<llvm::IntegerType>(Ops.LHS->getType())) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
@@ -3897,9 +4055,10 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,
E->getExprLoc());
}
- if (BOInfo.isFixedPointBinOp()) {
+ if (BOInfo.isFixedPointOp()) {
Result = EmitFixedPointBinOp(BOInfo);
} else if (LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, BOInfo.FPFeatures);
if (!IsSignaling)
Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp");
else
@@ -4052,6 +4211,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
if (LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(
+ CGF, E->getFPFeaturesInEffect(CGF.getLangOpts()));
LHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, LHS, Zero, "cmp");
RHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, RHS, Zero, "cmp");
} else {
@@ -4136,6 +4297,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
if (LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(
+ CGF, E->getFPFeaturesInEffect(CGF.getLangOpts()));
LHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, LHS, Zero, "cmp");
RHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, RHS, Zero, "cmp");
} else {
@@ -4269,8 +4432,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// OpenCL: If the condition is a vector, we can treat this condition like
// the select function.
- if (CGF.getLangOpts().OpenCL
- && condExpr->getType()->isVectorType()) {
+ if ((CGF.getLangOpts().OpenCL && condExpr->getType()->isVectorType()) ||
+ condExpr->getType()->isExtVectorType()) {
CGF.incrementProfileCounter(E);
llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
@@ -4285,10 +4448,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
llvm::Value *zeroVec = llvm::Constant::getNullValue(vecTy);
llvm::Value *TestMSB = Builder.CreateICmpSLT(CondV, zeroVec);
- llvm::Value *tmp = Builder.CreateSExt(TestMSB,
- llvm::VectorType::get(elemType,
- numElem),
- "sext");
+ llvm::Value *tmp = Builder.CreateSExt(
+ TestMSB, llvm::FixedVectorType::get(elemType, numElem), "sext");
llvm::Value *tmp2 = Builder.CreateNot(tmp);
// Cast float to int to perform ANDs if necessary.
@@ -4427,14 +4588,9 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF,
Value *Src, unsigned NumElementsDst) {
llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
- SmallVector<llvm::Constant*, 4> Args;
- Args.push_back(Builder.getInt32(0));
- Args.push_back(Builder.getInt32(1));
- Args.push_back(Builder.getInt32(2));
- if (NumElementsDst == 4)
- Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
- llvm::Constant *Mask = llvm::ConstantVector::get(Args);
- return Builder.CreateShuffleVector(Src, UnV, Mask);
+ static constexpr int Mask[] = {0, 1, 2, -1};
+ return Builder.CreateShuffleVector(Src, UnV,
+ llvm::makeArrayRef(Mask, NumElementsDst));
}
// Create cast instructions for converting LLVM value \p Src to LLVM type \p
@@ -4512,7 +4668,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// get a vec3.
if (NumElementsSrc != 3 && NumElementsDst == 3) {
if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
- auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+ auto *Vec4Ty = llvm::FixedVectorType::get(
+ cast<llvm::VectorType>(DstTy)->getElementType(), 4);
Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
Vec4Ty);
}
@@ -4655,7 +4812,7 @@ struct GEPOffsetAndOverflow {
static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
llvm::LLVMContext &VMContext,
CodeGenModule &CGM,
- CGBuilderTy Builder) {
+ CGBuilderTy &Builder) {
const auto &DL = CGM.getDataLayout();
// The total (signed) byte offset for the GEP.
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index d7e267630762..f860623e2bc3 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
using namespace clang;
using namespace CodeGen;
@@ -110,7 +111,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal();
- Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
+ Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
}
BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
}
@@ -120,3 +121,36 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
return RValue::get(Builder.CreateCall(
VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
}
+
+RValue
+CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue) {
+ assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
+ assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+ E->getBuiltinCallee() == Builtin::BI__builtin_printf);
+ assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+ CallArgList CallArgs;
+ EmitCallArgs(CallArgs,
+ E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+ E->arguments(), E->getDirectCallee(),
+ /* ParamsToSkip = */ 0);
+
+ SmallVector<llvm::Value *, 8> Args;
+ for (auto A : CallArgs) {
+ // We don't know how to emit non-scalar varargs.
+ if (!A.getRValue(*this).isScalar()) {
+ CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+ return RValue::get(llvm::ConstantInt::get(IntTy, -1));
+ }
+
+ llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+ Args.push_back(Arg);
+ }
+
+ llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+ IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+ auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
+ Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
+ return RValue::get(Printf);
+}
diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp
index e4b184eb8798..78da72eda0cf 100644
--- a/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -9,6 +9,8 @@
#include "CGLoopInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -572,6 +574,7 @@ void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc,
}
void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
+ const clang::CodeGenOptions &CGOpts,
ArrayRef<const clang::Attr *> Attrs,
const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc) {
@@ -752,6 +755,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
}
}
+ if (CGOpts.OptimizationLevel > 0)
+ // Disable unrolling for the loop, if unrolling is disabled (via
+ // -fno-unroll-loops) and no pragmas override the decision.
+ if (!CGOpts.UnrollLoops &&
+ (StagedAttrs.UnrollEnable == LoopAttributes::Unspecified &&
+ StagedAttrs.UnrollCount == 0))
+ setUnrollState(LoopAttributes::Disable);
+
/// Stage the attributes.
push(Header, StartLoc, EndLoc);
}
diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h
index 5abcf37c5433..e379c64c99a8 100644
--- a/clang/lib/CodeGen/CGLoopInfo.h
+++ b/clang/lib/CodeGen/CGLoopInfo.h
@@ -29,6 +29,7 @@ class MDNode;
namespace clang {
class Attr;
class ASTContext;
+class CodeGenOptions;
namespace CodeGen {
/// Attributes that may be specified on loops.
@@ -202,6 +203,7 @@ public:
/// Begin a new structured loop. Stage attributes from the Attrs list.
/// The staged attributes are applied to the loop and then cleared.
void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx,
+ const clang::CodeGenOptions &CGOpts,
llvm::ArrayRef<const Attr *> Attrs, const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc);
diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp
index d5f378c52232..d134be83a9dc 100644
--- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -254,6 +254,10 @@ struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>,
void visitVolatileTrivial(QualType FT, const FieldDecl *FD,
CharUnits CurStructOffset) {
+ // Zero-length bit-fields don't need to be copied/assigned.
+ if (FD && FD->isZeroLengthBitField(this->Ctx))
+ return;
+
// Because volatile fields can be bit-fields and are individually copied,
// their offset and width are in bits.
uint64_t OffsetInBits =
@@ -317,6 +321,16 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM,
return CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
}
+template <size_t N, size_t... Ints>
+static std::array<Address, N> getParamAddrs(std::index_sequence<Ints...> IntSeq,
+ std::array<CharUnits, N> Alignments,
+ FunctionArgList Args,
+ CodeGenFunction *CGF) {
+ return std::array<Address, N>{{
+ Address(CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[Ints])),
+ Alignments[Ints])...}};
+}
+
// Template classes that are used as bases for classes that emit special
// functions.
template <class Derived> struct GenFuncBase {
@@ -424,9 +438,9 @@ template <class Derived> struct GenFuncBase {
}
template <size_t N>
- llvm::Function *
- getFunction(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
- std::array<CharUnits, N> Alignments, CodeGenModule &CGM) {
+ llvm::Function *getFunction(StringRef FuncName, QualType QT,
+ std::array<CharUnits, N> Alignments,
+ CodeGenModule &CGM) {
// If the special function already exists in the module, return it.
if (llvm::Function *F = CGM.getModule().getFunction(FuncName)) {
bool WrongType = false;
@@ -439,7 +453,7 @@ template <class Derived> struct GenFuncBase {
}
if (WrongType) {
- std::string FuncName = F->getName();
+ std::string FuncName = std::string(F->getName());
SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation();
CGM.Error(Loc, "special function " + FuncName +
" for non-trivial C struct has incorrect type");
@@ -466,12 +480,8 @@ template <class Derived> struct GenFuncBase {
CodeGenFunction NewCGF(CGM);
setCGF(&NewCGF);
CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
-
- for (unsigned I = 0; I < N; ++I) {
- llvm::Value *V = CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[I]));
- Addrs[I] = Address(V, Alignments[I]);
- }
-
+ std::array<Address, N> Addrs =
+ getParamAddrs<N>(std::make_index_sequence<N>{}, Alignments, Args, CGF);
asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs);
CGF->FinishFunction();
return F;
@@ -491,7 +501,7 @@ template <class Derived> struct GenFuncBase {
}
if (llvm::Function *F =
- getFunction(FuncName, QT, Addrs, Alignments, CallerCGF.CGM))
+ getFunction(FuncName, QT, Alignments, CallerCGF.CGM))
CallerCGF.EmitNounwindRuntimeCall(F, Ptrs);
}
@@ -543,6 +553,10 @@ struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>,
std::array<Address, 2> Addrs) {
LValue DstLV, SrcLV;
if (FD) {
+ // No need to copy zero-length bit-fields.
+ if (FD->isZeroLengthBitField(this->CGF->getContext()))
+ return;
+
QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0);
llvm::PointerType *PtrTy = this->CGF->ConvertType(RT)->getPointerTo();
Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset);
@@ -825,17 +839,6 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT,
Gen.callFunc(FuncName, QT, Addrs, CGF);
}
-template <size_t N> static std::array<Address, N> createNullAddressArray();
-
-template <> std::array<Address, 1> createNullAddressArray() {
- return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}});
-}
-
-template <> std::array<Address, 2> createNullAddressArray() {
- return std::array<Address, 2>({{Address(nullptr, CharUnits::Zero()),
- Address(nullptr, CharUnits::Zero())}});
-}
-
template <class G, size_t N>
static llvm::Function *
getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile,
@@ -844,8 +847,7 @@ getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile,
// The following call requires an array of addresses as arguments, but doesn't
// actually use them (it overwrites them with the addresses of the arguments
// of the created function).
- return Gen.getFunction(FuncName, QT, createNullAddressArray<N>(), Alignments,
- CGM);
+ return Gen.getFunction(FuncName, QT, Alignments, CGM);
}
// Functions to emit calls to the special functions of a non-trivial C struct.
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 90fca2836d99..cd2b84f5dd20 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1491,11 +1491,10 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
argLoad.getType()))
finalArg = &argCast;
-
- BinaryOperator assign(&ivarRef, finalArg, BO_Assign,
- ivarRef.getType(), VK_RValue, OK_Ordinary,
- SourceLocation(), FPOptions());
- EmitStmt(&assign);
+ BinaryOperator *assign = BinaryOperator::Create(
+ getContext(), &ivarRef, finalArg, BO_Assign, ivarRef.getType(), VK_RValue,
+ OK_Ordinary, SourceLocation(), FPOptionsOverride());
+ EmitStmt(assign);
}
/// Generate an Objective-C property setter function.
@@ -1837,6 +1836,40 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
llvm::Value *CurrentItem =
Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign());
+ if (SanOpts.has(SanitizerKind::ObjCCast)) {
+ // Before using an item from the collection, check that the implicit cast
+ // from id to the element type is valid. This is done with instrumentation
+ // roughly corresponding to:
+ //
+ // if (![item isKindOfClass:expectedCls]) { /* emit diagnostic */ }
+ const ObjCObjectPointerType *ObjPtrTy =
+ elementType->getAsObjCInterfacePointerType();
+ const ObjCInterfaceType *InterfaceTy =
+ ObjPtrTy ? ObjPtrTy->getInterfaceType() : nullptr;
+ if (InterfaceTy) {
+ SanitizerScope SanScope(this);
+ auto &C = CGM.getContext();
+ assert(InterfaceTy->getDecl() && "No decl for ObjC interface type");
+ Selector IsKindOfClassSel = GetUnarySelector("isKindOfClass", C);
+ CallArgList IsKindOfClassArgs;
+ llvm::Value *Cls =
+ CGM.getObjCRuntime().GetClass(*this, InterfaceTy->getDecl());
+ IsKindOfClassArgs.add(RValue::get(Cls), C.getObjCClassType());
+ llvm::Value *IsClass =
+ CGM.getObjCRuntime()
+ .GenerateMessageSend(*this, ReturnValueSlot(), C.BoolTy,
+ IsKindOfClassSel, CurrentItem,
+ IsKindOfClassArgs)
+ .getScalarVal();
+ llvm::Constant *StaticData[] = {
+ EmitCheckSourceLocation(S.getBeginLoc()),
+ EmitCheckTypeDescriptor(QualType(InterfaceTy, 0))};
+ EmitCheck({{IsClass, SanitizerKind::ObjCCast}},
+ SanitizerHandler::InvalidObjCCast,
+ ArrayRef<llvm::Constant *>(StaticData), CurrentItem);
+ }
+ }
+
// Cast that value to the right type.
CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType,
"currentitem");
@@ -2160,7 +2193,8 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
if (!mandatory && isa<llvm::Instruction>(result)) {
llvm::CallInst *call
= cast<llvm::CallInst>(result->stripPointerCasts());
- assert(call->getCalledValue() == CGM.getObjCEntrypoints().objc_retainBlock);
+ assert(call->getCalledOperand() ==
+ CGM.getObjCEntrypoints().objc_retainBlock);
call->setMetadata("clang.arc.copy_on_escape",
llvm::MDNode::get(Builder.getContext(), None));
@@ -3255,7 +3289,6 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
llvm::Value *CodeGenFunction::EmitARCRetainScalarExpr(const Expr *e) {
// The retain needs to happen within the full-expression.
if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
- enterFullExpression(cleanups);
RunCleanupsScope scope(*this);
return EmitARCRetainScalarExpr(cleanups->getSubExpr());
}
@@ -3271,7 +3304,6 @@ llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseScalarExpr(const Expr *e) {
// The retain needs to happen within the full-expression.
if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
- enterFullExpression(cleanups);
RunCleanupsScope scope(*this);
return EmitARCRetainAutoreleaseScalarExpr(cleanups->getSubExpr());
}
@@ -3382,7 +3414,6 @@ static llvm::Value *emitARCUnsafeUnretainedScalarExpr(CodeGenFunction &CGF,
llvm::Value *CodeGenFunction::EmitARCUnsafeUnretainedScalarExpr(const Expr *e) {
// Look through full-expressions.
if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
- enterFullExpression(cleanups);
RunCleanupsScope scope(*this);
return emitARCUnsafeUnretainedScalarExpr(*this, cleanups->getSubExpr());
}
@@ -3505,7 +3536,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
if (!Ty->isRecordType())
return nullptr;
const ObjCPropertyDecl *PD = PID->getPropertyDecl();
- if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
+ if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic)))
return nullptr;
llvm::Constant *HelperFn = nullptr;
if (hasTrivialSetExpr(PID))
@@ -3555,21 +3586,21 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
StartFunction(FD, ReturnTy, Fn, FI, args);
- DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue,
- SourceLocation());
- UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation(), false);
+ DeclRefExpr DstExpr(C, &DstDecl, false, DestTy, VK_RValue, SourceLocation());
+ UnaryOperator *DST = UnaryOperator::Create(
+ C, &DstExpr, UO_Deref, DestTy->getPointeeType(), VK_LValue, OK_Ordinary,
+ SourceLocation(), false, FPOptionsOverride());
- DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue,
- SourceLocation());
- UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation(), false);
+ DeclRefExpr SrcExpr(C, &SrcDecl, false, SrcTy, VK_RValue, SourceLocation());
+ UnaryOperator *SRC = UnaryOperator::Create(
+ C, &SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary,
+ SourceLocation(), false, FPOptionsOverride());
- Expr *Args[2] = { &DST, &SRC };
+ Expr *Args[2] = {DST, SRC};
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create(
C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(),
- VK_LValue, SourceLocation(), FPOptions());
+ VK_LValue, SourceLocation(), FPOptionsOverride());
EmitStmt(TheCall);
@@ -3589,7 +3620,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
QualType Ty = PD->getType();
if (!Ty->isRecordType())
return nullptr;
- if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
+ if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic)))
return nullptr;
llvm::Constant *HelperFn = nullptr;
if (hasTrivialGetExpr(PID))
@@ -3641,14 +3672,15 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue,
SourceLocation());
- UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
- VK_LValue, OK_Ordinary, SourceLocation(), false);
+ UnaryOperator *SRC = UnaryOperator::Create(
+ C, &SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary,
+ SourceLocation(), false, FPOptionsOverride());
CXXConstructExpr *CXXConstExpr =
cast<CXXConstructExpr>(PID->getGetterCXXConstructor());
SmallVector<Expr*, 4> ConstructorArgs;
- ConstructorArgs.push_back(&SRC);
+ ConstructorArgs.push_back(SRC);
ConstructorArgs.append(std::next(CXXConstExpr->arg_begin()),
CXXConstExpr->arg_end());
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index a27b6d4ed637..bb9c494ae68e 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -203,7 +203,8 @@ protected:
/// the start of the string. The result of this function can be used anywhere
/// where the C code specifies const char*.
llvm::Constant *MakeConstantString(StringRef Str, const char *Name = "") {
- ConstantAddress Array = CGM.GetAddrOfConstantCString(Str, Name);
+ ConstantAddress Array =
+ CGM.GetAddrOfConstantCString(std::string(Str), Name);
return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(),
Array.getPointer(), Zeros);
}
@@ -254,11 +255,11 @@ protected:
isDynamic=true) {
int attrs = property->getPropertyAttributes();
// For read-only properties, clear the copy and retain flags
- if (attrs & ObjCPropertyDecl::OBJC_PR_readonly) {
- attrs &= ~ObjCPropertyDecl::OBJC_PR_copy;
- attrs &= ~ObjCPropertyDecl::OBJC_PR_retain;
- attrs &= ~ObjCPropertyDecl::OBJC_PR_weak;
- attrs &= ~ObjCPropertyDecl::OBJC_PR_strong;
+ if (attrs & ObjCPropertyAttribute::kind_readonly) {
+ attrs &= ~ObjCPropertyAttribute::kind_copy;
+ attrs &= ~ObjCPropertyAttribute::kind_retain;
+ attrs &= ~ObjCPropertyAttribute::kind_weak;
+ attrs &= ~ObjCPropertyAttribute::kind_strong;
}
// The first flags field has the same attribute values as clang uses internally
Fields.addInt(Int8Ty, attrs & 0xff);
@@ -616,6 +617,13 @@ public:
llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
const ObjCProtocolDecl *PD) override;
void GenerateProtocol(const ObjCProtocolDecl *PD) override;
+
+ virtual llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD);
+
+ llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override {
+ return GenerateProtocolRef(PD);
+ }
+
llvm::Function *ModuleInitFunction() override;
llvm::FunctionCallee GetPropertyGetFunction() override;
llvm::FunctionCallee GetPropertySetFunction() override;
@@ -820,7 +828,7 @@ class CGObjCGNUstep : public CGObjCGNU {
// Slot_t objc_slot_lookup_super(struct objc_super*, SEL);
SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
PtrToObjCSuperTy, SelectorTy);
- // If we're in ObjC++ mode, then we want to make
+ // If we're in ObjC++ mode, then we want to make
if (usesSEHExceptions) {
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
// void objc_exception_rethrow(void)
@@ -1347,7 +1355,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
void GenerateProtocol(const ObjCProtocolDecl *PD) override {
// Do nothing - we only emit referenced protocols.
}
- llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) {
+ llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) override {
std::string ProtocolName = PD->getNameAsString();
auto *&Protocol = ExistingProtocols[ProtocolName];
if (Protocol)
@@ -1433,7 +1441,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) {
if (TypeEncoding.empty())
return NULLPtr;
- std::string MangledTypes = TypeEncoding;
+ std::string MangledTypes = std::string(TypeEncoding);
std::replace(MangledTypes.begin(), MangledTypes.end(),
'@', '\1');
std::string TypesVarName = ".objc_sel_types_" + MangledTypes;
@@ -1556,7 +1564,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
// We have to do this by hand, rather than with @llvm.ctors, so that the
// linker can remove the duplicate invocations.
auto *InitVar = new llvm::GlobalVariable(TheModule, LoadFunction->getType(),
- /*isConstant*/true, llvm::GlobalValue::LinkOnceAnyLinkage,
+ /*isConstant*/false, llvm::GlobalValue::LinkOnceAnyLinkage,
LoadFunction, ".objc_ctor");
// Check that this hasn't been renamed. This shouldn't happen, because
// this function should be called precisely once.
@@ -1647,14 +1655,16 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
for (const auto &lateInit : EarlyInitList) {
auto *global = TheModule.getGlobalVariable(lateInit.first);
if (global) {
- b.CreateAlignedStore(global,
- b.CreateStructGEP(lateInit.second.first, lateInit.second.second), CGM.getPointerAlign().getQuantity());
+ b.CreateAlignedStore(
+ global,
+ b.CreateStructGEP(lateInit.second.first, lateInit.second.second),
+ CGM.getPointerAlign().getAsAlign());
}
}
b.CreateRetVoid();
// We can't use the normal LLVM global initialisation array, because we
// need to specify that this runs early in library initialisation.
- auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
+ auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
/*isConstant*/true, llvm::GlobalValue::InternalLinkage,
Init, ".objc_early_init_ptr");
InitVar->setSection(".CRT$XCLb");
@@ -1943,7 +1953,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
if (SuperClass) {
std::pair<llvm::Constant*, int> v{classStruct, 1};
- EarlyInitList.emplace_back(SuperClass->getName(), std::move(v));
+ EarlyInitList.emplace_back(std::string(SuperClass->getName()),
+ std::move(v));
}
}
@@ -2410,7 +2421,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
assert(PT && "Invalid @catch type.");
const ObjCInterfaceType *IT = PT->getInterfaceType();
assert(IT && "Invalid @catch type.");
- std::string className = IT->getDecl()->getIdentifier()->getName();
+ std::string className =
+ std::string(IT->getDecl()->getIdentifier()->getName());
std::string typeinfoName = "__objc_eh_typeinfo_" + className;
@@ -3034,13 +3046,18 @@ CGObjCGNU::GenerateProtocolList(ArrayRef<std::string> Protocols) {
llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF,
const ObjCProtocolDecl *PD) {
+ auto protocol = GenerateProtocolRef(PD);
+ llvm::Type *T =
+ CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType());
+ return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T));
+}
+
+llvm::Constant *CGObjCGNU::GenerateProtocolRef(const ObjCProtocolDecl *PD) {
llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()];
if (!protocol)
GenerateProtocol(PD);
assert(protocol && "Unknown protocol");
- llvm::Type *T =
- CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType());
- return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T));
+ return protocol;
}
llvm::Constant *
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index f36c28a85a68..1d0379afb4b5 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -1107,11 +1107,6 @@ public:
void GenerateProtocol(const ObjCProtocolDecl *PD) override;
- /// GetOrEmitProtocol - Get the protocol object for the given
- /// declaration, emitting it if necessary. The return value has type
- /// ProtocolPtrTy.
- virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD)=0;
-
/// GetOrEmitProtocolRef - Get a forward reference to the protocol
/// object for the given declaration, emitting it if needed. These
/// forward references will be filled in with empty bodies if no
@@ -2035,7 +2030,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) {
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// Don't enforce the target's minimum global alignment, since the only use
// of the string is via this class initializer.
- GV->setAlignment(llvm::Align::None());
+ GV->setAlignment(llvm::Align(1));
Fields.addBitCast(GV, CGM.Int8PtrTy);
// String length.
@@ -2558,9 +2553,8 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
}
if (FQT->isRecordType() && ElCount) {
int OldIndex = RunSkipBlockVars.size() - 1;
- const RecordType *RT = FQT->getAs<RecordType>();
- BuildRCBlockVarRecordLayout(RT, BytePos + FieldOffset,
- HasUnion);
+ auto *RT = FQT->castAs<RecordType>();
+ BuildRCBlockVarRecordLayout(RT, BytePos + FieldOffset, HasUnion);
// Replicate layout information for each array element. Note that
// one element is already done.
@@ -3047,9 +3041,10 @@ llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime(
ObjCCommonTypesHelper &ObjCTypes) {
llvm::FunctionCallee lookUpClassFn = ObjCTypes.getLookUpClassFn();
- llvm::Value *className =
- CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString())
- .getPointer();
+ llvm::Value *className = CGF.CGM
+ .GetAddrOfConstantCString(std::string(
+ ID->getObjCRuntimeNameAsString()))
+ .getPointer();
ASTContext &ctx = CGF.CGM.getContext();
className =
CGF.Builder.CreateBitCast(className,
@@ -3291,6 +3286,8 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
for (auto *PD : ClassExt->properties()) {
if (IsClassProperty != PD->isClassProperty())
continue;
+ if (PD->isDirectProperty())
+ continue;
PropertySet.insert(PD->getIdentifier());
Properties.push_back(PD);
}
@@ -3302,6 +3299,8 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
// class extension.
if (!PropertySet.insert(PD->getIdentifier()).second)
continue;
+ if (PD->isDirectProperty())
+ continue;
Properties.push_back(PD);
}
@@ -3327,8 +3326,6 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
values.addInt(ObjCTypes.IntTy, Properties.size());
auto propertiesArray = values.beginArray(ObjCTypes.PropertyTy);
for (auto PD : Properties) {
- if (PD->isDirectProperty())
- continue;
auto property = propertiesArray.beginStruct(ObjCTypes.PropertyTy);
property.add(GetPropertyName(PD->getIdentifier()));
property.add(GetPropertyTypeString(PD, Container));
@@ -3637,7 +3634,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
// Check for a forward reference.
llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
if (GV) {
- assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+ assert(GV->getValueType() == ObjCTypes.ClassTy &&
"Forward metaclass reference has incorrect type.");
values.finishAndSetAsInitializer(GV);
GV->setSection(Section);
@@ -3700,7 +3697,7 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID,
// Check for a forward reference.
llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
if (GV) {
- assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+ assert(GV->getValueType() == ObjCTypes.ClassTy &&
"Forward metaclass reference has incorrect type.");
values.finishAndSetAsInitializer(GV);
} else {
@@ -3731,7 +3728,7 @@ llvm::Constant *CGObjCMac::EmitMetaClassRef(const ObjCInterfaceDecl *ID) {
llvm::GlobalValue::PrivateLinkage, nullptr,
Name);
- assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+ assert(GV->getValueType() == ObjCTypes.ClassTy &&
"Forward metaclass reference has incorrect type.");
return GV;
}
@@ -3745,7 +3742,7 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
llvm::GlobalValue::PrivateLinkage, nullptr,
Name);
- assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+ assert(GV->getValueType() == ObjCTypes.ClassTy &&
"Forward class metadata reference has incorrect type.");
return GV;
}
@@ -4029,22 +4026,49 @@ llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD,
llvm::Function *
CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD,
const ObjCContainerDecl *CD) {
- auto I = DirectMethodDefinitions.find(OMD->getCanonicalDecl());
- if (I != DirectMethodDefinitions.end())
- return I->second;
+ auto *COMD = OMD->getCanonicalDecl();
+ auto I = DirectMethodDefinitions.find(COMD);
+ llvm::Function *OldFn = nullptr, *Fn = nullptr;
- SmallString<256> Name;
- GetNameForMethod(OMD, CD, Name, /*ignoreCategoryNamespace*/true);
+ if (I != DirectMethodDefinitions.end()) {
+ // Objective-C allows for the declaration and implementation types
+ // to differ slightly.
+ //
+ // If we're being asked for the Function associated for a method
+ // implementation, a previous value might have been cached
+ // based on the type of the canonical declaration.
+ //
+ // If these do not match, then we'll replace this function with
+ // a new one that has the proper type below.
+ if (!OMD->getBody() || COMD->getReturnType() == OMD->getReturnType())
+ return I->second;
+ OldFn = I->second;
+ }
CodeGenTypes &Types = CGM.getTypes();
llvm::FunctionType *MethodTy =
Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD));
- llvm::Function *Method =
- llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
- Name.str(), &CGM.getModule());
- DirectMethodDefinitions.insert(std::make_pair(OMD->getCanonicalDecl(), Method));
- return Method;
+ if (OldFn) {
+ Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
+ "", &CGM.getModule());
+ Fn->takeName(OldFn);
+ OldFn->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(Fn, OldFn->getType()));
+ OldFn->eraseFromParent();
+
+ // Replace the cached function in the map.
+ I->second = Fn;
+ } else {
+ SmallString<256> Name;
+ GetNameForMethod(OMD, CD, Name, /*ignoreCategoryNamespace*/ true);
+
+ Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
+ Name.str(), &CGM.getModule());
+ DirectMethodDefinitions.insert(std::make_pair(COMD, Fn));
+ }
+
+ return Fn;
}
void CGObjCCommonMac::GenerateDirectMethodPrologue(
@@ -4195,7 +4219,8 @@ CGObjCCommonMac::CreateCStringLiteral(StringRef Name, ObjCLabelType Type,
: "__TEXT,__cstring,cstring_literals";
break;
case ObjCLabelType::PropertyName:
- Section = "__TEXT,__cstring,cstring_literals";
+ Section = NonFragile ? "__TEXT,__objc_methname,cstring_literals"
+ : "__TEXT,__cstring,cstring_literals";
break;
}
@@ -5128,15 +5153,18 @@ void CGObjCCommonMac::EmitImageInfo() {
Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Section",
llvm::MDString::get(VMContext, Section));
+ auto Int8Ty = llvm::Type::getInt8Ty(VMContext);
if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
// Non-GC overrides those files which specify GC.
- Mod.addModuleFlag(llvm::Module::Override,
- "Objective-C Garbage Collection", (uint32_t)0);
+ Mod.addModuleFlag(llvm::Module::Error,
+ "Objective-C Garbage Collection",
+ llvm::ConstantInt::get(Int8Ty,0));
} else {
// Add the ObjC garbage collection value.
Mod.addModuleFlag(llvm::Module::Error,
"Objective-C Garbage Collection",
- eImageInfo_GarbageCollected);
+ llvm::ConstantInt::get(Int8Ty,
+ (uint8_t)eImageInfo_GarbageCollected));
if (CGM.getLangOpts().getGC() == LangOptions::GCOnly) {
// Add the ObjC GC Only value.
@@ -5147,7 +5175,7 @@ void CGObjCCommonMac::EmitImageInfo() {
llvm::Metadata *Ops[2] = {
llvm::MDString::get(VMContext, "Objective-C Garbage Collection"),
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- llvm::Type::getInt32Ty(VMContext), eImageInfo_GarbageCollected))};
+ Int8Ty, eImageInfo_GarbageCollected))};
Mod.addModuleFlag(llvm::Module::Require, "Objective-C GC Only",
llvm::MDNode::get(VMContext, Ops));
}
@@ -5423,7 +5451,7 @@ llvm::Constant *IvarLayoutBuilder::buildBitmap(CGObjCCommonMac &CGObjC,
// This isn't a stable sort, but our algorithm should handle it fine.
llvm::array_pod_sort(IvarsInfo.begin(), IvarsInfo.end());
} else {
- assert(std::is_sorted(IvarsInfo.begin(), IvarsInfo.end()));
+ assert(llvm::is_sorted(IvarsInfo));
}
assert(IvarsInfo.back().Offset < InstanceEnd);
@@ -6217,11 +6245,9 @@ void CGObjCNonFragileABIMac::AddModuleClassList(
assert((!CGM.getTriple().isOSBinFormatMachO() ||
SectionName.startswith("__DATA")) &&
"SectionName expected to start with __DATA on MachO");
- llvm::GlobalValue::LinkageTypes LT =
- getLinkageTypeForObjCMetadata(CGM, SectionName);
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, LT, Init,
- SymbolName);
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ CGM.getModule(), Init->getType(), false,
+ llvm::GlobalValue::PrivateLinkage, Init, SymbolName);
GV->setAlignment(
llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType())));
GV->setSection(SectionName);
@@ -6350,7 +6376,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
unsigned InstanceStart,
unsigned InstanceSize,
const ObjCImplementationDecl *ID) {
- std::string ClassName = ID->getObjCRuntimeNameAsString();
+ std::string ClassName = std::string(ID->getObjCRuntimeNameAsString());
CharUnits beginInstance = CharUnits::fromQuantity(InstanceStart);
CharUnits endInstance = CharUnits::fromQuantity(InstanceSize);
@@ -7509,10 +7535,9 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
llvm::Constant *ClassGV = GetClassGlobalForClassRef(ID);
std::string SectionName =
GetSectionName("__objc_superrefs", "regular,no_dead_strip");
- Entry = new llvm::GlobalVariable(
- CGM.getModule(), ClassGV->getType(), false,
- getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV,
- "OBJC_CLASSLIST_SUP_REFS_$_");
+ Entry = new llvm::GlobalVariable(CGM.getModule(), ClassGV->getType(), false,
+ llvm::GlobalValue::PrivateLinkage, ClassGV,
+ "OBJC_CLASSLIST_SUP_REFS_$_");
Entry->setAlignment(CGF.getPointerAlign().getAsAlign());
Entry->setSection(SectionName);
CGM.addCompilerUsedGlobal(Entry);
@@ -7533,10 +7558,9 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
auto MetaClassGV = GetClassGlobal(ID, /*metaclass*/ true, NotForDefinition);
std::string SectionName =
GetSectionName("__objc_superrefs", "regular,no_dead_strip");
- Entry = new llvm::GlobalVariable(
- CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false,
- getLinkageTypeForObjCMetadata(CGM, SectionName), MetaClassGV,
- "OBJC_CLASSLIST_SUP_REFS_$_");
+ Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
+ false, llvm::GlobalValue::PrivateLinkage,
+ MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
Entry->setAlignment(Align.getAsAlign());
Entry->setSection(SectionName);
CGM.addCompilerUsedGlobal(Entry);
diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp
index f8b831d0e9be..39efe040302d 100644
--- a/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "CGObjCRuntime.h"
-#include "CGCleanup.h"
#include "CGCXXABI.h"
+#include "CGCleanup.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtObjC.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/CodeGenABITypes.h"
#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
@@ -211,7 +212,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CGF.pushSEHCleanup(NormalAndEHCleanup, FinallyFunc);
}
-
+
// Emit the try body.
CGF.EmitStmt(S.getTryBody());
@@ -271,7 +272,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
cleanups.ForceCleanup();
CGF.EmitBranchThroughCleanup(Cont);
- }
+ }
// Go back to the try-statement fallthrough.
CGF.Builder.restoreIP(SavedIP);
@@ -383,3 +384,9 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method,
CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo();
return MessageSendInfo(argsInfo, signatureType);
}
+
+llvm::Constant *
+clang::CodeGen::emitObjCProtocolObject(CodeGenModule &CGM,
+ const ObjCProtocolDecl *protocol) {
+ return CGM.getObjCRuntime().GetOrEmitProtocol(protocol);
+}
diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h
index f0b3525cfde2..a2c189585f7b 100644
--- a/clang/lib/CodeGen/CGObjCRuntime.h
+++ b/clang/lib/CodeGen/CGObjCRuntime.h
@@ -211,6 +211,11 @@ public:
/// implementations.
virtual void GenerateProtocol(const ObjCProtocolDecl *OPD) = 0;
+ /// GetOrEmitProtocol - Get the protocol object for the given
+ /// declaration, emitting it if necessary. The return value has type
+ /// ProtocolPtrTy.
+ virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) = 0;
+
/// Generate a function preamble for a method with the specified
/// types.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 97b17799a03e..43cbe9c720ea 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -21,17 +21,24 @@
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/BitmaskEnum.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/OpenMPKinds.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
+#include <numeric>
using namespace clang;
using namespace CodeGen;
@@ -562,205 +569,6 @@ enum OpenMPSchedType {
OMP_sch_modifier_nonmonotonic = (1 << 30),
};
-enum OpenMPRTLFunction {
- /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
- /// kmpc_micro microtask, ...);
- OMPRTL__kmpc_fork_call,
- /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
- /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
- OMPRTL__kmpc_threadprivate_cached,
- /// Call to void __kmpc_threadprivate_register( ident_t *,
- /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
- OMPRTL__kmpc_threadprivate_register,
- // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
- OMPRTL__kmpc_global_thread_num,
- // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *crit);
- OMPRTL__kmpc_critical,
- // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
- // global_tid, kmp_critical_name *crit, uintptr_t hint);
- OMPRTL__kmpc_critical_with_hint,
- // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *crit);
- OMPRTL__kmpc_end_critical,
- // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
- // global_tid);
- OMPRTL__kmpc_cancel_barrier,
- // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_barrier,
- // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_for_static_fini,
- // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
- // global_tid);
- OMPRTL__kmpc_serialized_parallel,
- // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
- // global_tid);
- OMPRTL__kmpc_end_serialized_parallel,
- // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 num_threads);
- OMPRTL__kmpc_push_num_threads,
- // Call to void __kmpc_flush(ident_t *loc);
- OMPRTL__kmpc_flush,
- // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
- OMPRTL__kmpc_master,
- // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
- OMPRTL__kmpc_end_master,
- // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
- // int end_part);
- OMPRTL__kmpc_omp_taskyield,
- // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
- OMPRTL__kmpc_single,
- // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
- OMPRTL__kmpc_end_single,
- // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
- // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- // kmp_routine_entry_t *task_entry);
- OMPRTL__kmpc_omp_task_alloc,
- // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
- // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
- // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
- // kmp_int64 device_id);
- OMPRTL__kmpc_omp_target_task_alloc,
- // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
- // new_task);
- OMPRTL__kmpc_omp_task,
- // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
- // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
- // kmp_int32 didit);
- OMPRTL__kmpc_copyprivate,
- // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
- // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
- OMPRTL__kmpc_reduce,
- // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
- // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
- // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
- // *lck);
- OMPRTL__kmpc_reduce_nowait,
- // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *lck);
- OMPRTL__kmpc_end_reduce,
- // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *lck);
- OMPRTL__kmpc_end_reduce_nowait,
- // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
- // kmp_task_t * new_task);
- OMPRTL__kmpc_omp_task_begin_if0,
- // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
- // kmp_task_t * new_task);
- OMPRTL__kmpc_omp_task_complete_if0,
- // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_ordered,
- // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_end_ordered,
- // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
- // global_tid);
- OMPRTL__kmpc_omp_taskwait,
- // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_taskgroup,
- // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_end_taskgroup,
- // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
- // int proc_bind);
- OMPRTL__kmpc_push_proc_bind,
- // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
- // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
- // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
- OMPRTL__kmpc_omp_task_with_deps,
- // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
- // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
- // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
- OMPRTL__kmpc_omp_wait_deps,
- // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
- // global_tid, kmp_int32 cncl_kind);
- OMPRTL__kmpc_cancellationpoint,
- // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 cncl_kind);
- OMPRTL__kmpc_cancel,
- // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 num_teams, kmp_int32 thread_limit);
- OMPRTL__kmpc_push_num_teams,
- // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
- // microtask, ...);
- OMPRTL__kmpc_fork_teams,
- // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
- // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
- // sched, kmp_uint64 grainsize, void *task_dup);
- OMPRTL__kmpc_taskloop,
- // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
- // num_dims, struct kmp_dim *dims);
- OMPRTL__kmpc_doacross_init,
- // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
- OMPRTL__kmpc_doacross_fini,
- // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
- // *vec);
- OMPRTL__kmpc_doacross_post,
- // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
- // *vec);
- OMPRTL__kmpc_doacross_wait,
- // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
- // *data);
- OMPRTL__kmpc_task_reduction_init,
- // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
- // *d);
- OMPRTL__kmpc_task_reduction_get_th_data,
- // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
- OMPRTL__kmpc_alloc,
- // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
- OMPRTL__kmpc_free,
-
- //
- // Offloading related calls
- //
- // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
- // size);
- OMPRTL__kmpc_push_target_tripcount,
- // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- OMPRTL__tgt_target,
- // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
- // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- OMPRTL__tgt_target_nowait,
- // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
- // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types, int32_t num_teams, int32_t thread_limit);
- OMPRTL__tgt_target_teams,
- // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
- // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
- // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
- OMPRTL__tgt_target_teams_nowait,
- // Call to void __tgt_register_requires(int64_t flags);
- OMPRTL__tgt_register_requires,
- // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
- // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
- OMPRTL__tgt_target_data_begin,
- // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- OMPRTL__tgt_target_data_begin_nowait,
- // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
- OMPRTL__tgt_target_data_end,
- // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- OMPRTL__tgt_target_data_end_nowait,
- // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
- // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
- OMPRTL__tgt_target_data_update,
- // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- OMPRTL__tgt_target_data_update_nowait,
- // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
- OMPRTL__tgt_mapper_num_components,
- // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
- // *base, void *begin, int64_t size, int64_t type);
- OMPRTL__tgt_push_mapper_component,
-};
-
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
/// region.
class CleanupTy final : public EHScopeStack::Cleanup {
@@ -971,27 +779,37 @@ void ReductionCodeGen::emitAggregateInitialization(
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+ ArrayRef<const Expr *> Origs,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> ReductionOps) {
ClausesData.reserve(Shareds.size());
SharedAddresses.reserve(Shareds.size());
Sizes.reserve(Shareds.size());
BaseDecls.reserve(Shareds.size());
- auto IPriv = Privates.begin();
- auto IRed = ReductionOps.begin();
+ const auto *IOrig = Origs.begin();
+ const auto *IPriv = Privates.begin();
+ const auto *IRed = ReductionOps.begin();
for (const Expr *Ref : Shareds) {
- ClausesData.emplace_back(Ref, *IPriv, *IRed);
+ ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
+ std::advance(IOrig, 1);
std::advance(IPriv, 1);
std::advance(IRed, 1);
}
}
-void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
- assert(SharedAddresses.size() == N &&
+void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
+ assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
"Number of generated lvalues must be exactly N.");
- LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
- LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
+ LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
+ LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
SharedAddresses.emplace_back(First, Second);
+ if (ClausesData[N].Shared == ClausesData[N].Ref) {
+ OrigAddresses.emplace_back(First, Second);
+ } else {
+ LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
+ LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
+ OrigAddresses.emplace_back(First, Second);
+ }
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
@@ -1001,26 +819,25 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
- CGF.getTypeSize(
- SharedAddresses[N].first.getType().getNonReferenceType()),
+ CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
nullptr);
return;
}
llvm::Value *Size;
llvm::Value *SizeInChars;
- auto *ElemType = cast<llvm::PointerType>(
- SharedAddresses[N].first.getPointer(CGF)->getType())
- ->getElementType();
+ auto *ElemType =
+ cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
+ ->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
- Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
- SharedAddresses[N].first.getPointer(CGF));
+ Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
+ OrigAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
- SizeInChars = CGF.getTypeSize(
- SharedAddresses[N].first.getType().getNonReferenceType());
+ SizeInChars =
+ CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
Sizes.emplace_back(SizeInChars, Size);
@@ -1243,7 +1060,7 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
StringRef Separator)
: CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
- OffloadEntriesInfoManager(CGM) {
+ OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
ASTContext &C = CGM.getContext();
RecordDecl *RD = C.buildImplicitRecord("ident_t");
QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
@@ -1263,55 +1080,11 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
+ // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
+ OMPBuilder.initialize();
loadOffloadInfoMetadata();
}
-bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
- const GlobalDecl &OldGD,
- llvm::GlobalValue *OrigAddr,
- bool IsForDefinition) {
- // Emit at least a definition for the aliasee if the the address of the
- // original function is requested.
- if (IsForDefinition || OrigAddr)
- (void)CGM.GetAddrOfGlobal(NewGD);
- StringRef NewMangledName = CGM.getMangledName(NewGD);
- llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
- if (Addr && !Addr->isDeclaration()) {
- const auto *D = cast<FunctionDecl>(OldGD.getDecl());
- const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
- llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
-
- // Create a reference to the named value. This ensures that it is emitted
- // if a deferred decl.
- llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
-
- // Create the new alias itself, but don't set a name yet.
- auto *GA =
- llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
-
- if (OrigAddr) {
- assert(OrigAddr->isDeclaration() && "Expected declaration");
-
- GA->takeName(OrigAddr);
- OrigAddr->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
- OrigAddr->eraseFromParent();
- } else {
- GA->setName(CGM.getMangledName(OldGD));
- }
-
- // Set attributes which are particular to an alias; this is a
- // specialization of the attributes which may be set on a global function.
- if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
- D->isWeakImported())
- GA->setLinkage(llvm::Function::WeakAnyLinkage);
-
- CGM.SetCommonAttributes(OldGD, GA);
- return true;
- }
- return false;
-}
-
void CGOpenMPRuntime::clear() {
InternalVars.clear();
// Clean non-target variable declarations possibly used only in debug info.
@@ -1325,14 +1098,6 @@ void CGOpenMPRuntime::clear() {
continue;
GV->eraseFromParent();
}
- // Emit aliases for the deferred aliasees.
- for (const auto &Pair : DeferredVariantFunction) {
- StringRef MangledName = CGM.getMangledName(Pair.second.second);
- llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
- // If not able to emit alias, just emit original declaration.
- (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
- /*IsForDefinition=*/false);
- }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1343,7 +1108,7 @@ std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
OS << Sep << Part;
Sep = Separator;
}
- return OS.str();
+ return std::string(OS.str());
}
static llvm::Function *
@@ -1494,6 +1259,8 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
bool HasCancel = false;
if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
HasCancel = OPD->hasCancel();
+ else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
+ HasCancel = OPD->hasCancel();
else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
HasCancel = OPSD->hasCancel();
else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
@@ -1511,12 +1278,12 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
// TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
// parallel region to make cancellation barriers work properly.
- llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
- PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
+ return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
}
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
@@ -1549,7 +1316,9 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
TaskTVar->getType()->castAs<PointerType>())
.getPointer(CGF)};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_task),
+ TaskArgs);
};
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
UntiedCodeGen);
@@ -1560,11 +1329,19 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
: OMPD_task;
const CapturedStmt *CS = D.getCapturedStmt(Region);
- const auto *TD = dyn_cast<OMPTaskDirective>(&D);
+ bool HasCancel = false;
+ if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
+ HasCancel = TD->hasCancel();
+ else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
+ HasCancel = TD->hasCancel();
+ else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
+ HasCancel = TD->hasCancel();
+ else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
+ HasCancel = TD->hasCancel();
+
CodeGenFunction CGF(CGM, true);
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
- InnermostKind,
- TD ? TD->hasCancel() : false, Action);
+ InnermostKind, HasCancel, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
@@ -1786,7 +1563,8 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::CallInst *Call = CGF.Builder.CreateCall(
- createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
Elem.second.ThreadID = Call;
@@ -1800,16 +1578,17 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
OpenMPLocThreadIDMap.erase(CGF.CurFn);
}
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
- for(auto *D : FunctionUDRMap[CGF.CurFn])
+ for(const auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
FunctionUDRMap.erase(CGF.CurFn);
}
auto I = FunctionUDMMap.find(CGF.CurFn);
if (I != FunctionUDMMap.end()) {
- for(auto *D : I->second)
+ for(const auto *D : I->second)
UDMMap.erase(D);
FunctionUDMMap.erase(I);
}
+ LastprivateConditionalToTypes.erase(CGF.CurFn);
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@@ -1826,766 +1605,6 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
}
-llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
- llvm::FunctionCallee RTLFn = nullptr;
- switch (static_cast<OpenMPRTLFunction>(Function)) {
- case OMPRTL__kmpc_fork_call: {
- // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
- // microtask, ...);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- getKmpc_MicroPointerTy()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
- if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
- if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
- llvm::LLVMContext &Ctx = F->getContext();
- llvm::MDBuilder MDB(Ctx);
- // Annotate the callback behavior of the __kmpc_fork_call:
- // - The callback callee is argument number 2 (microtask).
- // - The first two arguments of the callback callee are unknown (-1).
- // - All variadic arguments to the __kmpc_fork_call are passed to the
- // callback callee.
- F->addMetadata(
- llvm::LLVMContext::MD_callback,
- *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
- 2, {-1, -1},
- /* VarArgsArePassed */ true)}));
- }
- }
- break;
- }
- case OMPRTL__kmpc_global_thread_num: {
- // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
- break;
- }
- case OMPRTL__kmpc_threadprivate_cached: {
- // Build void *__kmpc_threadprivate_cached(ident_t *loc,
- // kmp_int32 global_tid, void *data, size_t size, void ***cache);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.VoidPtrTy, CGM.SizeTy,
- CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
- break;
- }
- case OMPRTL__kmpc_critical: {
- // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *crit);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty,
- llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
- break;
- }
- case OMPRTL__kmpc_critical_with_hint: {
- // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *crit, uintptr_t hint);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- llvm::PointerType::getUnqual(KmpCriticalNameTy),
- CGM.IntPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
- break;
- }
- case OMPRTL__kmpc_threadprivate_register: {
- // Build void __kmpc_threadprivate_register(ident_t *, void *data,
- // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
- // typedef void *(*kmpc_ctor)(void *);
- auto *KmpcCtorTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
- /*isVarArg*/ false)->getPointerTo();
- // typedef void *(*kmpc_cctor)(void *, void *);
- llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto *KmpcCopyCtorTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
- /*isVarArg*/ false)
- ->getPointerTo();
- // typedef void (*kmpc_dtor)(void *);
- auto *KmpcDtorTy =
- llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
- ->getPointerTo();
- llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
- KmpcCopyCtorTy, KmpcDtorTy};
- auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
- /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
- break;
- }
- case OMPRTL__kmpc_end_critical: {
- // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *crit);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty,
- llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
- break;
- }
- case OMPRTL__kmpc_cancel_barrier: {
- // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
- break;
- }
- case OMPRTL__kmpc_barrier: {
- // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
- break;
- }
- case OMPRTL__kmpc_for_static_fini: {
- // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
- break;
- }
- case OMPRTL__kmpc_push_num_threads: {
- // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 num_threads)
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
- break;
- }
- case OMPRTL__kmpc_serialized_parallel: {
- // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
- break;
- }
- case OMPRTL__kmpc_end_serialized_parallel: {
- // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
- break;
- }
- case OMPRTL__kmpc_flush: {
- // Build void __kmpc_flush(ident_t *loc);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
- break;
- }
- case OMPRTL__kmpc_master: {
- // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
- break;
- }
- case OMPRTL__kmpc_end_master: {
- // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
- break;
- }
- case OMPRTL__kmpc_omp_taskyield: {
- // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
- // int end_part);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
- break;
- }
- case OMPRTL__kmpc_single: {
- // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
- break;
- }
- case OMPRTL__kmpc_end_single: {
- // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
- break;
- }
- case OMPRTL__kmpc_omp_task_alloc: {
- // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
- // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- // kmp_routine_entry_t *task_entry);
- assert(KmpRoutineEntryPtrTy != nullptr &&
- "Type kmp_routine_entry_t must be created.");
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
- CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
- // Return void * and then cast to particular kmp_task_t type.
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
- break;
- }
- case OMPRTL__kmpc_omp_target_task_alloc: {
- // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
- // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
- assert(KmpRoutineEntryPtrTy != nullptr &&
- "Type kmp_routine_entry_t must be created.");
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
- CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
- CGM.Int64Ty};
- // Return void * and then cast to particular kmp_task_t type.
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
- break;
- }
- case OMPRTL__kmpc_omp_task: {
- // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
- // *new_task);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
- break;
- }
- case OMPRTL__kmpc_copyprivate: {
- // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
- // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
- // kmp_int32 didit);
- llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto *CpyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
- CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
- CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
- break;
- }
- case OMPRTL__kmpc_reduce: {
- // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
- // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
- llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
- CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
- llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
- break;
- }
- case OMPRTL__kmpc_reduce_nowait: {
- // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
- // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
- // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
- // *lck);
- llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
- CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
- llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
- break;
- }
- case OMPRTL__kmpc_end_reduce: {
- // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *lck);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty,
- llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
- break;
- }
- case OMPRTL__kmpc_end_reduce_nowait: {
- // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
- // kmp_critical_name *lck);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty,
- llvm::PointerType::getUnqual(KmpCriticalNameTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
- break;
- }
- case OMPRTL__kmpc_omp_task_begin_if0: {
- // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
- // *new_task);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
- break;
- }
- case OMPRTL__kmpc_omp_task_complete_if0: {
- // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
- // *new_task);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy,
- /*Name=*/"__kmpc_omp_task_complete_if0");
- break;
- }
- case OMPRTL__kmpc_ordered: {
- // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
- break;
- }
- case OMPRTL__kmpc_end_ordered: {
- // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
- break;
- }
- case OMPRTL__kmpc_omp_taskwait: {
- // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
- break;
- }
- case OMPRTL__kmpc_taskgroup: {
- // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
- break;
- }
- case OMPRTL__kmpc_end_taskgroup: {
- // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
- break;
- }
- case OMPRTL__kmpc_push_proc_bind: {
- // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
- // int proc_bind)
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
- break;
- }
- case OMPRTL__kmpc_omp_task_with_deps: {
- // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
- // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
- // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
- CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
- break;
- }
- case OMPRTL__kmpc_omp_wait_deps: {
- // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
- // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
- // kmp_depend_info_t *noalias_dep_list);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.Int32Ty, CGM.VoidPtrTy,
- CGM.Int32Ty, CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
- break;
- }
- case OMPRTL__kmpc_cancellationpoint: {
- // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
- // global_tid, kmp_int32 cncl_kind)
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
- break;
- }
- case OMPRTL__kmpc_cancel: {
- // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
- // kmp_int32 cncl_kind)
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
- break;
- }
- case OMPRTL__kmpc_push_num_teams: {
- // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
- // kmp_int32 num_teams, kmp_int32 num_threads)
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
- CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
- break;
- }
- case OMPRTL__kmpc_fork_teams: {
- // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
- // microtask, ...);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- getKmpc_MicroPointerTy()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
- if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
- if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
- llvm::LLVMContext &Ctx = F->getContext();
- llvm::MDBuilder MDB(Ctx);
- // Annotate the callback behavior of the __kmpc_fork_teams:
- // - The callback callee is argument number 2 (microtask).
- // - The first two arguments of the callback callee are unknown (-1).
- // - All variadic arguments to the __kmpc_fork_teams are passed to the
- // callback callee.
- F->addMetadata(
- llvm::LLVMContext::MD_callback,
- *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
- 2, {-1, -1},
- /* VarArgsArePassed */ true)}));
- }
- }
- break;
- }
- case OMPRTL__kmpc_taskloop: {
- // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
- // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
- // sched, kmp_uint64 grainsize, void *task_dup);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
- CGM.IntTy,
- CGM.VoidPtrTy,
- CGM.IntTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty,
- CGM.IntTy,
- CGM.IntTy,
- CGM.Int64Ty,
- CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
- break;
- }
- case OMPRTL__kmpc_doacross_init: {
- // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
- // num_dims, struct kmp_dim *dims);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
- CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
- break;
- }
- case OMPRTL__kmpc_doacross_fini: {
- // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
- break;
- }
- case OMPRTL__kmpc_doacross_post: {
- // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
- // *vec);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
- break;
- }
- case OMPRTL__kmpc_doacross_wait: {
- // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
- // *vec);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
- break;
- }
- case OMPRTL__kmpc_task_reduction_init: {
- // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
- // *data);
- llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
- break;
- }
- case OMPRTL__kmpc_task_reduction_get_th_data: {
- // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
- // *d);
- llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
- break;
- }
- case OMPRTL__kmpc_alloc: {
- // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
- // al); omp_allocator_handle_t type is void *.
- llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
- break;
- }
- case OMPRTL__kmpc_free: {
- // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
- // al); omp_allocator_handle_t type is void *.
- llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
- break;
- }
- case OMPRTL__kmpc_push_target_tripcount: {
- // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
- // size);
- llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
- llvm::FunctionType *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
- break;
- }
- case OMPRTL__tgt_target: {
- // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.VoidPtrTy,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
- break;
- }
- case OMPRTL__tgt_target_nowait: {
- // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
- // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
- // int64_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.VoidPtrTy,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
- break;
- }
- case OMPRTL__tgt_target_teams: {
- // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
- // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
- // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.VoidPtrTy,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo(),
- CGM.Int32Ty,
- CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
- break;
- }
- case OMPRTL__tgt_target_teams_nowait: {
- // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
- // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
- // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.VoidPtrTy,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo(),
- CGM.Int32Ty,
- CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
- break;
- }
- case OMPRTL__tgt_register_requires: {
- // Build void __tgt_register_requires(int64_t flags);
- llvm::Type *TypeParams[] = {CGM.Int64Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
- break;
- }
- case OMPRTL__tgt_target_data_begin: {
- // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
- // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
- break;
- }
- case OMPRTL__tgt_target_data_begin_nowait: {
- // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
- break;
- }
- case OMPRTL__tgt_target_data_end: {
- // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
- // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
- break;
- }
- case OMPRTL__tgt_target_data_end_nowait: {
- // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
- break;
- }
- case OMPRTL__tgt_target_data_update: {
- // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
- // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
- break;
- }
- case OMPRTL__tgt_target_data_update_nowait: {
- // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
- // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
- // *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int64Ty,
- CGM.Int32Ty,
- CGM.VoidPtrPtrTy,
- CGM.VoidPtrPtrTy,
- CGM.Int64Ty->getPointerTo(),
- CGM.Int64Ty->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
- break;
- }
- case OMPRTL__tgt_mapper_num_components: {
- // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
- llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
- break;
- }
- case OMPRTL__tgt_push_mapper_component: {
- // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
- // *base, void *begin, int64_t size, int64_t type);
- llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
- CGM.Int64Ty, CGM.Int64Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
- break;
- }
- }
- assert(RTLFn && "Unable to find OpenMP runtime function");
- return RTLFn;
-}
-
llvm::FunctionCallee
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
@@ -2764,7 +1783,9 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
getOrCreateThreadPrivateCache(VD)};
return Address(CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
+ Args),
VDAddr.getAlignment());
}
@@ -2774,7 +1795,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
// library.
llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_global_thread_num),
OMPLoc);
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
// to register constructor/destructor for variable.
@@ -2782,7 +1804,9 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
Ctor, CopyCtor, Dtor};
CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
+ Args);
}
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
@@ -2813,7 +1837,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
std::string Name = getName({"__kmpc_global_ctor_", ""});
llvm::Function *Fn =
- CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
+ CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
Args, Loc, Loc);
llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
@@ -2846,7 +1870,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
std::string Name = getName({"__kmpc_global_dtor_", ""});
llvm::Function *Fn =
- CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
+ CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
Loc, Loc);
@@ -2889,7 +1913,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
auto *InitFunctionTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
std::string Name = getName({"__omp_threadprivate_init_", ""});
- llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
+ llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
CodeGenFunction InitCGF(CGM);
FunctionArgList ArgList;
@@ -2918,12 +1942,14 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
HasRequiresUnifiedSharedMemory))
return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
- if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
+ assert(VD && "Unknown VarDecl");
+
+ if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
return CGM.getLangOpts().OpenMPIsDevice;
QualType ASTTy = VD->getType();
-
SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
+
// Produce the unique prefix to identify the new target regions. We use
// the source location of the variable declaration which we know to not
// conflict with any target region.
@@ -2949,7 +1975,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
+ llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_ctor"), FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
@@ -2987,7 +2013,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
+ llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_dtor"), FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
@@ -3042,7 +2068,9 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
return Address(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
+ Args),
VarLVType->getPointerTo(/*AddrSpace=*/0)),
CGM.getContext().getTypeAlignInChars(VarType));
}
@@ -3093,8 +2121,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
if (!CGF.HaveInsertPoint())
return;
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
- auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &M = CGM.getModule();
+ auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
+ this](CodeGenFunction &CGF, PrePostActionTy &) {
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
@@ -3106,18 +2135,19 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
llvm::FunctionCallee RTLFn =
- RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
+ OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
};
- auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
+ this](CodeGenFunction &CGF, PrePostActionTy &) {
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
// Build calls:
// __kmpc_serialized_parallel(&Loc, GTid);
llvm::Value *Args[] = {RTLoc, ThreadID};
- CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_serialized_parallel),
+ Args);
// OutlinedFn(&GTid, &zero_bound, CapturedStruct);
Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
@@ -3134,9 +2164,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
// __kmpc_end_serialized_parallel(&Loc, GTid);
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
- CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
- EndArgs);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_end_serialized_parallel),
+ EndArgs);
};
if (IfCond) {
emitIfClause(CGF, IfCond, ThenGen, ElseGen);
@@ -3250,12 +2280,16 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
std::end(Args));
if (Hint) {
EnterArgs.push_back(CGF.Builder.CreateIntCast(
- CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
+ CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
}
CommonActionTy Action(
- createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
- : OMPRTL__kmpc_critical),
- EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(),
+ Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
+ EnterArgs,
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___kmpc_end_critical),
+ Args);
CriticalOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
}
@@ -3271,8 +2305,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
// }
// Prepare arguments and build a call to __kmpc_master
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
- createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
+ CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_master),
+ Args,
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_end_master),
+ Args,
/*Conditional=*/true);
MasterOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
@@ -3283,11 +2321,18 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
- // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
- llvm::Value *Args[] = {
- emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
- llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
+ if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+ OMPBuilder.CreateTaskyield(CGF.Builder);
+ } else {
+ // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+ llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
+ Args);
+ }
+
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
Region->emitUntiedSwitch(CGF);
}
@@ -3302,8 +2347,11 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
// __kmpc_end_taskgroup(ident_t *, gtid);
// Prepare arguments and build a call to __kmpc_taskgroup
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
- createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
+ CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskgroup),
+ Args,
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
Args);
TaskgroupOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
@@ -3409,8 +2457,12 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
}
// Prepare arguments and build a call to __kmpc_single
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
- createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
+ CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_single),
+ Args,
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_end_single),
+ Args,
/*Conditional=*/true);
SingleOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
@@ -3455,7 +2507,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
CpyFn, // void (*) (void *, void *) <copy_func>
DidItVal // i32 did_it
};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_copyprivate),
+ Args);
}
}
@@ -3470,8 +2524,11 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
// Prepare arguments and build a call to __kmpc_ordered
if (IsThreads) {
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
- createRuntimeFunction(OMPRTL__kmpc_end_ordered),
+ CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_ordered),
+ Args,
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_end_ordered),
Args);
OrderedOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
@@ -3519,9 +2576,8 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
// Check if we should use the OMPBuilder
auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
- llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
- if (OMPBuilder) {
- CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
+ if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+ CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
return;
}
@@ -3538,7 +2594,9 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
if (OMPRegionInfo) {
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
llvm::Value *Result = CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___kmpc_cancel_barrier),
+ Args);
if (EmitChecks) {
// if (__kmpc_cancel_barrier()) {
// exit from construct;
@@ -3557,7 +2615,9 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
return;
}
}
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_barrier),
+ Args);
}
/// Map the OpenMP loop schedule to the runtime enumeration.
@@ -3771,6 +2831,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
}
@@ -3805,7 +2866,9 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
? OMP_IDENT_WORK_LOOP
: OMP_IDENT_WORK_SECTIONS),
getThreadID(CGF, Loc)};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_for_static_fini),
Args);
}
@@ -3853,7 +2916,8 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_push_num_threads),
Args);
}
@@ -3867,16 +2931,23 @@ void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
+ Args);
}
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
- SourceLocation Loc) {
- if (!CGF.HaveInsertPoint())
- return;
- // Build call void __kmpc_flush(ident_t *loc)
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
- emitUpdateLocation(CGF, Loc));
+ SourceLocation Loc, llvm::AtomicOrdering AO) {
+ if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+ OMPBuilder.CreateFlush(CGF.Builder);
+ } else {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call void __kmpc_flush(ident_t *loc)
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_flush),
+ emitUpdateLocation(CGF, Loc));
+ }
}
namespace {
@@ -4358,13 +3429,14 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
namespace {
struct PrivateHelpersTy {
- PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
- const VarDecl *PrivateElemInit)
- : Original(Original), PrivateCopy(PrivateCopy),
+ PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
+ const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
+ : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
PrivateElemInit(PrivateElemInit) {}
- const VarDecl *Original;
- const VarDecl *PrivateCopy;
- const VarDecl *PrivateElemInit;
+ const Expr *OriginalRef = nullptr;
+ const VarDecl *Original = nullptr;
+ const VarDecl *PrivateCopy = nullptr;
+ const VarDecl *PrivateElemInit = nullptr;
};
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
} // anonymous namespace
@@ -4744,7 +3816,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// For target-based directives skip 3 firstprivate arrays BasePointersArray,
// PointersArray and SizesArray. The original variables for these arrays are
// not captured and we get their addresses explicitly.
- if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
+ if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
(IsTargetTask && KmpTaskSharedsPtr.isValid())) {
SrcBase = CGF.MakeAddrLValue(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
@@ -4776,13 +3848,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
"Expected artificial target data variable.");
SharedRefLValue =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
- } else {
+ } else if (ForDup) {
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
SharedRefLValue = CGF.MakeAddrLValue(
Address(SharedRefLValue.getPointer(CGF),
C.getDeclAlign(OriginalVD)),
SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
SharedRefLValue.getTBAAInfo());
+ } else if (CGF.LambdaCaptureFields.count(
+ Pair.second.Original->getCanonicalDecl()) > 0 ||
+ dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
+ SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
+ } else {
+ // Processing for implicitly captured variables.
+ InlinedOpenMPRegionRAII Region(
+ CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
+ /*HasCancel=*/false);
+ SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
}
if (Type->isArrayType()) {
// Initialize firstprivate array.
@@ -4915,7 +3997,7 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
Base, *std::next(KmpTaskTQTyRD->field_begin(),
KmpTaskTShareds)),
Loc),
- CGF.getNaturalTypeAlignment(SharedsTy));
+ CGM.getNaturalTypeAlignment(SharedsTy));
}
emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
@@ -4938,6 +4020,135 @@ checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
return NeedsCleanup;
}
+namespace {
+/// Loop generator for OpenMP iterator expression.
+class OMPIteratorGeneratorScope final
+ : public CodeGenFunction::OMPPrivateScope {
+ CodeGenFunction &CGF;
+ const OMPIteratorExpr *E = nullptr;
+ SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
+ SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
+ OMPIteratorGeneratorScope() = delete;
+ OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
+
+public:
+ OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
+ : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
+ if (!E)
+ return;
+ SmallVector<llvm::Value *, 4> Uppers;
+ for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
+ Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
+ const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
+ addPrivate(VD, [&CGF, VD]() {
+ return CGF.CreateMemTemp(VD->getType(), VD->getName());
+ });
+ const OMPIteratorHelperData &HelperData = E->getHelper(I);
+ addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
+ return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
+ "counter.addr");
+ });
+ }
+ Privatize();
+
+ for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
+ const OMPIteratorHelperData &HelperData = E->getHelper(I);
+ LValue CLVal =
+ CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
+ HelperData.CounterVD->getType());
+ // Counter = 0;
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
+ CLVal);
+ CodeGenFunction::JumpDest &ContDest =
+ ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
+ CodeGenFunction::JumpDest &ExitDest =
+ ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
+ // N = <number-of_iterations>;
+ llvm::Value *N = Uppers[I];
+ // cont:
+ // if (Counter < N) goto body; else goto exit;
+ CGF.EmitBlock(ContDest.getBlock());
+ auto *CVal =
+ CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
+ llvm::Value *Cmp =
+ HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
+ ? CGF.Builder.CreateICmpSLT(CVal, N)
+ : CGF.Builder.CreateICmpULT(CVal, N);
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
+ CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
+ // body:
+ CGF.EmitBlock(BodyBB);
+ // Iteri = Begini + Counter * Stepi;
+ CGF.EmitIgnoredExpr(HelperData.Update);
+ }
+ }
+ ~OMPIteratorGeneratorScope() {
+ if (!E)
+ return;
+ for (unsigned I = E->numOfIterators(); I > 0; --I) {
+ // Counter = Counter + 1;
+ const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
+ CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
+ // goto cont;
+ CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
+ // exit:
+ CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
+ }
+ }
+};
+} // namespace
+
+static std::pair<llvm::Value *, llvm::Value *>
+getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
+ const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
+ llvm::Value *Addr;
+ if (OASE) {
+ const Expr *Base = OASE->getBase();
+ Addr = CGF.EmitScalarExpr(Base);
+ } else {
+ Addr = CGF.EmitLValue(E).getPointer(CGF);
+ }
+ llvm::Value *SizeVal;
+ QualType Ty = E->getType();
+ if (OASE) {
+ SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
+ for (const Expr *SE : OASE->getDimensions()) {
+ llvm::Value *Sz = CGF.EmitScalarExpr(SE);
+ Sz = CGF.EmitScalarConversion(
+ Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
+ SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
+ }
+ } else if (const auto *ASE =
+ dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
+ LValue UpAddrLVal =
+ CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
+ llvm::Value *UpAddr =
+ CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
+ llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
+ llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
+ SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
+ } else {
+ SizeVal = CGF.getTypeSize(Ty);
+ }
+ return std::make_pair(Addr, SizeVal);
+}
+
+/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
+static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
+ QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
+ if (KmpTaskAffinityInfoTy.isNull()) {
+ RecordDecl *KmpAffinityInfoRD =
+ C.buildImplicitRecord("kmp_task_affinity_info_t");
+ KmpAffinityInfoRD->startDefinition();
+ addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
+ addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
+ addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
+ KmpAffinityInfoRD->completeDefinition();
+ KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
+ }
+}
+
CGOpenMPRuntime::TaskResultTy
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
@@ -4946,23 +4157,23 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
ASTContext &C = CGM.getContext();
llvm::SmallVector<PrivateDataTy, 4> Privates;
// Aggregate privates and sort them by the alignment.
- auto I = Data.PrivateCopies.begin();
+ const auto *I = Data.PrivateCopies.begin();
for (const Expr *E : Data.PrivateVars) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Privates.emplace_back(
C.getDeclAlign(VD),
- PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+ PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
/*PrivateElemInit=*/nullptr));
++I;
}
I = Data.FirstprivateCopies.begin();
- auto IElemInitRef = Data.FirstprivateInits.begin();
+ const auto *IElemInitRef = Data.FirstprivateInits.begin();
for (const Expr *E : Data.FirstprivateVars) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Privates.emplace_back(
C.getDeclAlign(VD),
PrivateHelpersTy(
- VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+ E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
++I;
++IElemInitRef;
@@ -4972,7 +4183,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Privates.emplace_back(
C.getDeclAlign(VD),
- PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+ PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
/*PrivateElemInit=*/nullptr));
++I;
}
@@ -5046,7 +4257,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
TiedFlag = 0x1,
FinalFlag = 0x2,
DestructorsFlag = 0x8,
- PriorityFlag = 0x20
+ PriorityFlag = 0x20,
+ DetachableFlag = 0x40,
};
unsigned Flags = Data.Tied ? TiedFlag : 0;
bool NeedsCleanup = false;
@@ -5057,6 +4269,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
}
if (Data.Priority.getInt())
Flags = Flags | PriorityFlag;
+ if (D.hasClausesOfKind<OMPDetachClause>())
+ Flags = Flags | DetachableFlag;
llvm::Value *TaskFlags =
Data.Final.getPointer()
? CGF.Builder.CreateSelect(Data.Final.getPointer(),
@@ -5084,10 +4298,170 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
AllocArgs.push_back(DeviceID);
NewTask = CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
+ AllocArgs);
} else {
- NewTask = CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
+ NewTask =
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
+ AllocArgs);
+ }
+ // Emit detach clause initialization.
+ // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
+ // task_descriptor);
+ if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
+ const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
+ LValue EvtLVal = CGF.EmitLValue(Evt);
+
+ // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
+ // int gtid, kmp_task_t *task);
+ llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
+ llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
+ Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
+ llvm::Value *EvtVal = CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
+ {Loc, Tid, NewTask});
+ EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
+ Evt->getExprLoc());
+ CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
+ }
+ // Process affinity clauses.
+ if (D.hasClausesOfKind<OMPAffinityClause>()) {
+ // Process list of affinity data.
+ ASTContext &C = CGM.getContext();
+ Address AffinitiesArray = Address::invalid();
+ // Calculate number of elements to form the array of affinity data.
+ llvm::Value *NumOfElements = nullptr;
+ unsigned NumAffinities = 0;
+ for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
+ if (const Expr *Modifier = C->getModifier()) {
+ const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
+ for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
+ llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
+ Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
+ NumOfElements =
+ NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
+ }
+ } else {
+ NumAffinities += C->varlist_size();
+ }
+ }
+ getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
+ // Fields ids in kmp_task_affinity_info record.
+ enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
+
+ QualType KmpTaskAffinityInfoArrayTy;
+ if (NumOfElements) {
+ NumOfElements = CGF.Builder.CreateNUWAdd(
+ llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
+ OpaqueValueExpr OVE(
+ Loc,
+ C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
+ VK_RValue);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
+ RValue::get(NumOfElements));
+ KmpTaskAffinityInfoArrayTy =
+ C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
+ /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
+ // Properly emit variable-sized array.
+ auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
+ ImplicitParamDecl::Other);
+ CGF.EmitVarDecl(*PD);
+ AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
+ NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
+ /*isSigned=*/false);
+ } else {
+ KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
+ KmpTaskAffinityInfoTy,
+ llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
+ ArrayType::Normal, /*IndexTypeQuals=*/0);
+ AffinitiesArray =
+ CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
+ AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
+ NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
+ /*isSigned=*/false);
+ }
+
+ const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
+ // Fill array by elements without iterators.
+ unsigned Pos = 0;
+ bool HasIterator = false;
+ for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
+ if (C->getModifier()) {
+ HasIterator = true;
+ continue;
+ }
+ for (const Expr *E : C->varlists()) {
+ llvm::Value *Addr;
+ llvm::Value *Size;
+ std::tie(Addr, Size) = getPointerAndSize(CGF, E);
+ LValue Base =
+ CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
+ KmpTaskAffinityInfoTy);
+ // affs[i].base_addr = &<Affinities[i].second>;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
+ CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
+ BaseAddrLVal);
+ // affs[i].len = sizeof(<Affinities[i].second>);
+ LValue LenLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
+ CGF.EmitStoreOfScalar(Size, LenLVal);
+ ++Pos;
+ }
+ }
+ LValue PosLVal;
+ if (HasIterator) {
+ PosLVal = CGF.MakeAddrLValue(
+ CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
+ C.getSizeType());
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
+ }
+ // Process elements with iterators.
+ for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
+ const Expr *Modifier = C->getModifier();
+ if (!Modifier)
+ continue;
+ OMPIteratorGeneratorScope IteratorScope(
+ CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
+ for (const Expr *E : C->varlists()) {
+ llvm::Value *Addr;
+ llvm::Value *Size;
+ std::tie(Addr, Size) = getPointerAndSize(CGF, E);
+ llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
+ LValue Base = CGF.MakeAddrLValue(
+ Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
+ AffinitiesArray.getAlignment()),
+ KmpTaskAffinityInfoTy);
+ // affs[i].base_addr = &<Affinities[i].second>;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
+ CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
+ BaseAddrLVal);
+ // affs[i].len = sizeof(<Affinities[i].second>);
+ LValue LenLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
+ CGF.EmitStoreOfScalar(Size, LenLVal);
+ Idx = CGF.Builder.CreateNUWAdd(
+ Idx, llvm::ConstantInt::get(Idx->getType(), 1));
+ CGF.EmitStoreOfScalar(Idx, PosLVal);
+ }
+ }
+ // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
+ // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
+ // naffins, kmp_task_affinity_info_t *affin_list);
+ llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = getThreadID(CGF, Loc);
+ llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ AffinitiesArray.getPointer(), CGM.VoidPtrTy);
+ // FIXME: Emit the function and ignore its result for now unless the
+ // runtime function is properly implemented.
+ (void)CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
+ {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
}
llvm::Value *NewTaskNewTaskTTy =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
@@ -5106,7 +4480,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
KmpTaskTShareds)),
Loc),
- CGF.getNaturalTypeAlignment(SharedsTy));
+ CGM.getNaturalTypeAlignment(SharedsTy));
LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
@@ -5158,6 +4532,540 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
return Result;
}
+namespace {
+/// Dependence kind for RTL.
+enum RTLDependenceKindTy {
+ DepIn = 0x01,
+ DepInOut = 0x3,
+ DepMutexInOutSet = 0x4
+};
+/// Fields ids in kmp_depend_info record.
+enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
+} // namespace
+
+/// Translates internal dependency kind into the runtime kind.
+static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
+ RTLDependenceKindTy DepKind;
+ switch (K) {
+ case OMPC_DEPEND_in:
+ DepKind = DepIn;
+ break;
+ // Out and InOut dependencies must use the same code.
+ case OMPC_DEPEND_out:
+ case OMPC_DEPEND_inout:
+ DepKind = DepInOut;
+ break;
+ case OMPC_DEPEND_mutexinoutset:
+ DepKind = DepMutexInOutSet;
+ break;
+ case OMPC_DEPEND_source:
+ case OMPC_DEPEND_sink:
+ case OMPC_DEPEND_depobj:
+ case OMPC_DEPEND_unknown:
+ llvm_unreachable("Unknown task dependence type");
+ }
+ return DepKind;
+}
+
+/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
+static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
+ QualType &FlagsTy) {
+ FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
+ if (KmpDependInfoTy.isNull()) {
+ RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
+ KmpDependInfoRD->startDefinition();
+ addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
+ addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
+ addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
+ KmpDependInfoRD->completeDefinition();
+ KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
+ }
+}
+
+std::pair<llvm::Value *, LValue>
+CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
+ SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ RecordDecl *KmpDependInfoRD =
+ cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+ LValue Base = CGF.EmitLoadOfPointerLValue(
+ DepobjLVal.getAddress(CGF),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
+ Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
+ Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
+ Base.getTBAAInfo());
+ llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
+ Addr.getPointer(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
+ LValue NumDepsBase = CGF.MakeAddrLValue(
+ Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
+ Base.getBaseInfo(), Base.getTBAAInfo());
+ // NumDeps = deps[i].base_addr;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
+ return std::make_pair(NumDeps, Base);
+}
+
+static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ llvm::PointerUnion<unsigned *, LValue *> Pos,
+ const OMPTaskDataTy::DependData &Data,
+ Address DependenciesArray) {
+ CodeGenModule &CGM = CGF.CGM;
+ ASTContext &C = CGM.getContext();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ RecordDecl *KmpDependInfoRD =
+ cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+ llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
+
+ OMPIteratorGeneratorScope IteratorScope(
+ CGF, cast_or_null<OMPIteratorExpr>(
+ Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
+ : nullptr));
+ for (const Expr *E : Data.DepExprs) {
+ llvm::Value *Addr;
+ llvm::Value *Size;
+ std::tie(Addr, Size) = getPointerAndSize(CGF, E);
+ LValue Base;
+ if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
+ Base = CGF.MakeAddrLValue(
+ CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
+ } else {
+ LValue &PosLVal = *Pos.get<LValue *>();
+ llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
+ Base = CGF.MakeAddrLValue(
+ Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
+ DependenciesArray.getAlignment()),
+ KmpDependInfoTy);
+ }
+ // deps[i].base_addr = &<Dependencies[i].second>;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
+ BaseAddrLVal);
+ // deps[i].len = sizeof(<Dependencies[i].second>);
+ LValue LenLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpDependInfoRD->field_begin(), Len));
+ CGF.EmitStoreOfScalar(Size, LenLVal);
+ // deps[i].flags = <Dependencies[i].first>;
+ RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
+ LValue FlagsLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
+ FlagsLVal);
+ if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
+ ++(*P);
+ } else {
+ LValue &PosLVal = *Pos.get<LValue *>();
+ llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
+ Idx = CGF.Builder.CreateNUWAdd(Idx,
+ llvm::ConstantInt::get(Idx->getType(), 1));
+ CGF.EmitStoreOfScalar(Idx, PosLVal);
+ }
+ }
+}
+
+static SmallVector<llvm::Value *, 4>
+emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ const OMPTaskDataTy::DependData &Data) {
+ assert(Data.DepKind == OMPC_DEPEND_depobj &&
+ "Expected depobj dependecy kind.");
+ SmallVector<llvm::Value *, 4> Sizes;
+ SmallVector<LValue, 4> SizeLVals;
+ ASTContext &C = CGF.getContext();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ RecordDecl *KmpDependInfoRD =
+ cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+ QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
+ llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
+ {
+ OMPIteratorGeneratorScope IteratorScope(
+ CGF, cast_or_null<OMPIteratorExpr>(
+ Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
+ : nullptr));
+ for (const Expr *E : Data.DepExprs) {
+ LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
+ LValue Base = CGF.EmitLoadOfPointerLValue(
+ DepobjLVal.getAddress(CGF),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Base.getAddress(CGF), KmpDependInfoPtrT);
+ Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
+ Base.getTBAAInfo());
+ llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
+ Addr.getPointer(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
+ LValue NumDepsBase = CGF.MakeAddrLValue(
+ Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
+ Base.getBaseInfo(), Base.getTBAAInfo());
+ // NumDeps = deps[i].base_addr;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ llvm::Value *NumDeps =
+ CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
+ LValue NumLVal = CGF.MakeAddrLValue(
+ CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
+ C.getUIntPtrType());
+ CGF.InitTempAlloca(NumLVal.getAddress(CGF),
+ llvm::ConstantInt::get(CGF.IntPtrTy, 0));
+ llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
+ llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
+ CGF.EmitStoreOfScalar(Add, NumLVal);
+ SizeLVals.push_back(NumLVal);
+ }
+ }
+ for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
+ llvm::Value *Size =
+ CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
+ Sizes.push_back(Size);
+ }
+ return Sizes;
+}
+
+static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ LValue PosLVal,
+ const OMPTaskDataTy::DependData &Data,
+ Address DependenciesArray) {
+ assert(Data.DepKind == OMPC_DEPEND_depobj &&
+ "Expected depobj dependecy kind.");
+ ASTContext &C = CGF.getContext();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ RecordDecl *KmpDependInfoRD =
+ cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+ QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
+ llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
+ llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
+ {
+ OMPIteratorGeneratorScope IteratorScope(
+ CGF, cast_or_null<OMPIteratorExpr>(
+ Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
+ : nullptr));
+ for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
+ const Expr *E = Data.DepExprs[I];
+ LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
+ LValue Base = CGF.EmitLoadOfPointerLValue(
+ DepobjLVal.getAddress(CGF),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Base.getAddress(CGF), KmpDependInfoPtrT);
+ Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
+ Base.getTBAAInfo());
+
+ // Get number of elements in a single depobj.
+ llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
+ Addr.getPointer(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
+ LValue NumDepsBase = CGF.MakeAddrLValue(
+ Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
+ Base.getBaseInfo(), Base.getTBAAInfo());
+ // NumDeps = deps[i].base_addr;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ llvm::Value *NumDeps =
+ CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
+
+ // memcopy dependency data.
+ llvm::Value *Size = CGF.Builder.CreateNUWMul(
+ ElSize,
+ CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
+ llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
+ Address DepAddr =
+ Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
+ DependenciesArray.getAlignment());
+ CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
+
+ // Increase pos.
+ // pos += size;
+ llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
+ CGF.EmitStoreOfScalar(Add, PosLVal);
+ }
+ }
+}
+
+std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
+ CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
+ SourceLocation Loc) {
+ if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
+ return D.DepExprs.empty();
+ }))
+ return std::make_pair(nullptr, Address::invalid());
+ // Process list of dependencies.
+ ASTContext &C = CGM.getContext();
+ Address DependenciesArray = Address::invalid();
+ llvm::Value *NumOfElements = nullptr;
+ unsigned NumDependencies = std::accumulate(
+ Dependencies.begin(), Dependencies.end(), 0,
+ [](unsigned V, const OMPTaskDataTy::DependData &D) {
+ return D.DepKind == OMPC_DEPEND_depobj
+ ? V
+ : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
+ });
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ bool HasDepobjDeps = false;
+ bool HasRegularWithIterators = false;
+ llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
+ llvm::Value *NumOfRegularWithIterators =
+ llvm::ConstantInt::get(CGF.IntPtrTy, 1);
+ // Calculate number of depobj dependecies and regular deps with the iterators.
+ for (const OMPTaskDataTy::DependData &D : Dependencies) {
+ if (D.DepKind == OMPC_DEPEND_depobj) {
+ SmallVector<llvm::Value *, 4> Sizes =
+ emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
+ for (llvm::Value *Size : Sizes) {
+ NumOfDepobjElements =
+ CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
+ }
+ HasDepobjDeps = true;
+ continue;
+ }
+ // Include number of iterations, if any.
+ if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
+ for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
+ llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
+ Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
+ NumOfRegularWithIterators =
+ CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
+ }
+ HasRegularWithIterators = true;
+ continue;
+ }
+ }
+
+ QualType KmpDependInfoArrayTy;
+ if (HasDepobjDeps || HasRegularWithIterators) {
+ NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
+ /*isSigned=*/false);
+ if (HasDepobjDeps) {
+ NumOfElements =
+ CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
+ }
+ if (HasRegularWithIterators) {
+ NumOfElements =
+ CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
+ }
+ OpaqueValueExpr OVE(Loc,
+ C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
+ VK_RValue);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
+ RValue::get(NumOfElements));
+ KmpDependInfoArrayTy =
+ C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
+ /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
+ // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
+ // Properly emit variable-sized array.
+ auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
+ ImplicitParamDecl::Other);
+ CGF.EmitVarDecl(*PD);
+ DependenciesArray = CGF.GetAddrOfLocalVar(PD);
+ NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
+ /*isSigned=*/false);
+ } else {
+ KmpDependInfoArrayTy = C.getConstantArrayType(
+ KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
+ ArrayType::Normal, /*IndexTypeQuals=*/0);
+ DependenciesArray =
+ CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
+ DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
+ NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
+ /*isSigned=*/false);
+ }
+ unsigned Pos = 0;
+ for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
+ if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
+ Dependencies[I].IteratorExpr)
+ continue;
+ emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
+ DependenciesArray);
+ }
+ // Copy regular dependecies with iterators.
+ LValue PosLVal = CGF.MakeAddrLValue(
+ CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
+ for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
+ if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
+ !Dependencies[I].IteratorExpr)
+ continue;
+ emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
+ DependenciesArray);
+ }
+ // Copy final depobj arrays without iterators.
+ if (HasDepobjDeps) {
+ for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
+ if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
+ continue;
+ emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
+ DependenciesArray);
+ }
+ }
+ DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ DependenciesArray, CGF.VoidPtrTy);
+ return std::make_pair(NumOfElements, DependenciesArray);
+}
+
+Address CGOpenMPRuntime::emitDepobjDependClause(
+ CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
+ SourceLocation Loc) {
+ if (Dependencies.DepExprs.empty())
+ return Address::invalid();
+ // Process list of dependencies.
+ ASTContext &C = CGM.getContext();
+ Address DependenciesArray = Address::invalid();
+ unsigned NumDependencies = Dependencies.DepExprs.size();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ RecordDecl *KmpDependInfoRD =
+ cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+
+ llvm::Value *Size;
+ // Define type kmp_depend_info[<Dependencies.size()>];
+ // For depobj reserve one extra element to store the number of elements.
+ // It is required to handle depobj(x) update(in) construct.
+ // kmp_depend_info[<Dependencies.size()>] deps;
+ llvm::Value *NumDepsVal;
+ CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
+ if (const auto *IE =
+ cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
+ NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
+ for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
+ llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
+ Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
+ NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
+ }
+ Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
+ NumDepsVal);
+ CharUnits SizeInBytes =
+ C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
+ llvm::Value *RecSize = CGM.getSize(SizeInBytes);
+ Size = CGF.Builder.CreateNUWMul(Size, RecSize);
+ NumDepsVal =
+ CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
+ } else {
+ QualType KmpDependInfoArrayTy = C.getConstantArrayType(
+ KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
+ nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
+ Size = CGM.getSize(Sz.alignTo(Align));
+ NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
+ }
+ // Need to allocate on the dynamic memory.
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ // Use default allocator.
+ llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+ llvm::Value *Addr =
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_alloc),
+ Args, ".dep.arr.addr");
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
+ DependenciesArray = Address(Addr, Align);
+ // Write number of elements in the first element of array for depobj.
+ LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
+ // deps[i].base_addr = NumDependencies;
+ LValue BaseAddrLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
+ llvm::PointerUnion<unsigned *, LValue *> Pos;
+ unsigned Idx = 1;
+ LValue PosLVal;
+ if (Dependencies.IteratorExpr) {
+ PosLVal = CGF.MakeAddrLValue(
+ CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
+ C.getSizeType());
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
+ /*IsInit=*/true);
+ Pos = &PosLVal;
+ } else {
+ Pos = &Idx;
+ }
+ emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
+ DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
+ return DependenciesArray;
+}
+
+void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
+ SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ LValue Base = CGF.EmitLoadOfPointerLValue(
+ DepobjLVal.getAddress(CGF),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
+ Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
+ llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
+ Addr.getPointer(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
+ DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
+ CGF.VoidPtrTy);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ // Use default allocator.
+ llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
+
+ // _kmpc_free(gtid, addr, nullptr);
+ (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_free),
+ Args);
+}
+
+void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
+ OpenMPDependClauseKind NewDepKind,
+ SourceLocation Loc) {
+ ASTContext &C = CGM.getContext();
+ QualType FlagsTy;
+ getDependTypes(C, KmpDependInfoTy, FlagsTy);
+ RecordDecl *KmpDependInfoRD =
+ cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+ llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
+ llvm::Value *NumDeps;
+ LValue Base;
+ std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
+
+ Address Begin = Base.getAddress(CGF);
+ // Cast from pointer to array type to pointer to single element.
+ llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
+ // The basic structure here is a while-do loop.
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
+ llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
+ llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
+ CGF.EmitBlock(BodyBB);
+ llvm::PHINode *ElementPHI =
+ CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
+ ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
+ Begin = Address(ElementPHI, Begin.getAlignment());
+ Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
+ Base.getTBAAInfo());
+ // deps[i].flags = NewDepKind;
+ RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
+ LValue FlagsLVal = CGF.EmitLValueForField(
+ Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
+ FlagsLVal);
+
+ // Shift the address forward by one element.
+ Address ElementNext =
+ CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
+ ElementPHI->addIncoming(ElementNext.getPointer(),
+ CGF.Builder.GetInsertBlock());
+ llvm::Value *IsEmpty =
+ CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
+ CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+ // Done.
+ CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
llvm::Function *TaskFunction,
@@ -5174,94 +5082,11 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
LValue TDBase = Result.TDBase;
const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
- ASTContext &C = CGM.getContext();
// Process list of dependences.
Address DependenciesArray = Address::invalid();
- unsigned NumDependencies = Data.Dependences.size();
- if (NumDependencies) {
- // Dependence kind for RTL.
- enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
- enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
- RecordDecl *KmpDependInfoRD;
- QualType FlagsTy =
- C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
- llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
- if (KmpDependInfoTy.isNull()) {
- KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
- KmpDependInfoRD->startDefinition();
- addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
- addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
- addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
- KmpDependInfoRD->completeDefinition();
- KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
- } else {
- KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
- }
- // Define type kmp_depend_info[<Dependences.size()>];
- QualType KmpDependInfoArrayTy = C.getConstantArrayType(
- KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
- nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
- // kmp_depend_info[<Dependences.size()>] deps;
- DependenciesArray =
- CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
- for (unsigned I = 0; I < NumDependencies; ++I) {
- const Expr *E = Data.Dependences[I].second;
- LValue Addr = CGF.EmitLValue(E);
- llvm::Value *Size;
- QualType Ty = E->getType();
- if (const auto *ASE =
- dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
- LValue UpAddrLVal =
- CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
- llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
- UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
- llvm::Value *LowIntPtr =
- CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
- llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
- Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
- } else {
- Size = CGF.getTypeSize(Ty);
- }
- LValue Base = CGF.MakeAddrLValue(
- CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
- KmpDependInfoTy);
- // deps[i].base_addr = &<Dependences[i].second>;
- LValue BaseAddrLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
- CGF.EmitStoreOfScalar(
- CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
- BaseAddrLVal);
- // deps[i].len = sizeof(<Dependences[i].second>);
- LValue LenLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), Len));
- CGF.EmitStoreOfScalar(Size, LenLVal);
- // deps[i].flags = <Dependences[i].first>;
- RTLDependenceKindTy DepKind;
- switch (Data.Dependences[I].first) {
- case OMPC_DEPEND_in:
- DepKind = DepIn;
- break;
- // Out and InOut dependencies must use the same code.
- case OMPC_DEPEND_out:
- case OMPC_DEPEND_inout:
- DepKind = DepInOut;
- break;
- case OMPC_DEPEND_mutexinoutset:
- DepKind = DepMutexInOutSet;
- break;
- case OMPC_DEPEND_source:
- case OMPC_DEPEND_sink:
- case OMPC_DEPEND_unknown:
- llvm_unreachable("Unknown task dependence type");
- }
- LValue FlagsLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
- CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
- FlagsLVal);
- }
- DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
- }
+ llvm::Value *NumOfElements;
+ std::tie(NumOfElements, DependenciesArray) =
+ emitDependClause(CGF, Data.Dependences, Loc);
// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
// libcall.
@@ -5273,28 +5098,30 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
llvm::Value *DepTaskArgs[7];
- if (NumDependencies) {
+ if (!Data.Dependences.empty()) {
DepTaskArgs[0] = UpLoc;
DepTaskArgs[1] = ThreadID;
DepTaskArgs[2] = NewTask;
- DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
+ DepTaskArgs[3] = NumOfElements;
DepTaskArgs[4] = DependenciesArray.getPointer();
DepTaskArgs[5] = CGF.Builder.getInt32(0);
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
- auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
- &TaskArgs,
+ auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
}
- if (NumDependencies) {
+ if (!Data.Dependences.empty()) {
CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
+ DepTaskArgs);
} else {
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_task),
TaskArgs);
}
// Check if parent region is untied and build return for untied task;
@@ -5304,26 +5131,27 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
};
llvm::Value *DepWaitTaskArgs[6];
- if (NumDependencies) {
+ if (!Data.Dependences.empty()) {
DepWaitTaskArgs[0] = UpLoc;
DepWaitTaskArgs[1] = ThreadID;
- DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
+ DepWaitTaskArgs[2] = NumOfElements;
DepWaitTaskArgs[3] = DependenciesArray.getPointer();
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
- auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
- NumDependencies, &DepWaitTaskArgs,
+ auto &M = CGM.getModule();
+ auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
+ TaskEntry, &Data, &DepWaitTaskArgs,
Loc](CodeGenFunction &CGF, PrePostActionTy &) {
- CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
// is specified.
- if (NumDependencies)
- CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
- DepWaitTaskArgs);
+ if (!Data.Dependences.empty())
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
+ DepWaitTaskArgs);
// Call proxy_task_entry(gtid, new_task);
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -5338,9 +5166,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
// kmp_task_t *new_task);
RegionCodeGenTy RCG(CodeGen);
- CommonActionTy Action(
- RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
- RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
+ CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_omp_task_begin_if0),
+ TaskArgs,
+ OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_omp_task_complete_if0),
+ TaskArgs);
RCG.setAction(Action);
RCG(CGF);
};
@@ -5434,7 +5265,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Result.TaskDupFn, CGF.VoidPtrTy)
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskloop),
+ TaskArgs);
}
/// Emit reduction operation for each element of array (required for
@@ -5776,8 +5609,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
Lock // kmp_critical_name *&<lock>
};
llvm::Value *Res = CGF.EmitRuntimeCall(
- createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
- : OMPRTL__kmpc_reduce),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(),
+ WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
Args);
// 5. Build switch(res)
@@ -5818,8 +5652,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
RegionCodeGenTy RCG(CodeGen);
CommonActionTy Action(
nullptr, llvm::None,
- createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
- : OMPRTL__kmpc_end_reduce),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
+ : OMPRTL___kmpc_end_reduce),
EndArgs);
RCG.setAction(Action);
RCG(CGF);
@@ -5942,7 +5777,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
Lock // kmp_critical_name *&<lock>
};
CommonActionTy Action(nullptr, llvm::None,
- createRuntimeFunction(OMPRTL__kmpc_end_reduce),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_end_reduce),
EndArgs);
AtomicRCG.setAction(Action);
AtomicRCG(CGF);
@@ -5969,12 +5805,12 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
{D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
Out << Prefix << Name << "_"
<< D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
- return Out.str();
+ return std::string(Out.str());
}
/// Emits reduction initializer function:
/// \code
-/// void @.red_init(void* %arg) {
+/// void @.red_init(void* %arg, void* %orig) {
/// %0 = bitcast void* %arg to <type>*
/// store <type> <init>, <type>* %0
/// ret void
@@ -5984,10 +5820,15 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
SourceLocation Loc,
ReductionCodeGen &RCG, unsigned N) {
ASTContext &C = CGM.getContext();
+ QualType VoidPtrTy = C.VoidPtrTy;
+ VoidPtrTy.addRestrict();
FunctionArgList Args;
- ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.emplace_back(&Param);
+ Args.emplace_back(&ParamOrig);
const auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
@@ -6012,28 +5853,25 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
CGM.getContext().getSizeType(), Loc);
}
RCG.emitAggregateType(CGF, N, Size);
- LValue SharedLVal;
+ LValue OrigLVal;
// If initializer uses initializer from declare reduction construct, emit a
// pointer to the address of the original reduction item (reuired by reduction
// initializer)
if (RCG.usesReductionInitializer(N)) {
- Address SharedAddr =
- CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
- CGF, CGM.getContext().VoidPtrTy,
- generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
+ Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
SharedAddr = CGF.EmitLoadOfPointer(
SharedAddr,
CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
- SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
+ OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
} else {
- SharedLVal = CGF.MakeNaturalAlignAddrLValue(
+ OrigLVal = CGF.MakeNaturalAlignAddrLValue(
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
CGM.getContext().VoidPtrTy);
}
// Emit the initializer:
// %0 = bitcast void* %arg to <type>*
// store <type> <init>, <type>* %0
- RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
+ RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
[](CodeGenFunction &) { return false; });
CGF.FinishFunction();
return Fn;
@@ -6173,18 +6011,20 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
return nullptr;
// Build typedef struct:
- // kmp_task_red_input {
+ // kmp_taskred_input {
// void *reduce_shar; // shared reduction item
+ // void *reduce_orig; // original reduction item used for initialization
// size_t reduce_size; // size of data item
// void *reduce_init; // data initialization routine
// void *reduce_fini; // data finalization routine
// void *reduce_comb; // data combiner routine
// kmp_task_red_flags_t flags; // flags for additional info from compiler
- // } kmp_task_red_input_t;
+ // } kmp_taskred_input_t;
ASTContext &C = CGM.getContext();
- RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+ RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
RD->startDefinition();
const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
@@ -6199,8 +6039,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
- ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
- Data.ReductionOps);
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
+ Data.ReductionCopies, Data.ReductionOps);
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
@@ -6212,20 +6052,24 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
// ElemLVal.reduce_shar = &Shareds[Cnt];
LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
- RCG.emitSharedLValue(CGF, Cnt);
+ RCG.emitSharedOrigLValue(CGF, Cnt);
llvm::Value *CastedShared =
CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
+ // ElemLVal.reduce_orig = &Origs[Cnt];
+ LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
+ llvm::Value *CastedOrig =
+ CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
+ CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
RCG.emitAggregateType(CGF, Cnt);
llvm::Value *SizeValInChars;
llvm::Value *SizeVal;
std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
- // We use delayed creation/initialization for VLAs, array sections and
- // custom reduction initializations. It is required because runtime does not
- // provide the way to pass the sizes of VLAs/array sections to
- // initializer/combiner/finalizer functions and does not pass the pointer to
- // original reduction item to the initializer. Instead threadprivate global
- // variables are used to store these values and use them in the functions.
+ // We use delayed creation/initialization for VLAs and array sections. It is
+ // required because runtime does not provide the way to pass the sizes of
+ // VLAs/array sections to initializer/combiner/finalizer functions. Instead
+ // threadprivate global variables are used to store these values and use
+ // them in the functions.
bool DelayedCreation = !!SizeVal;
SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
/*isSigned=*/false);
@@ -6236,7 +6080,6 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
llvm::Value *InitAddr =
CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
CGF.EmitStoreOfScalar(InitAddr, InitLVal);
- DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
// ElemLVal.reduce_fini = fini;
LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
@@ -6260,16 +6103,52 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
FlagsLVal.getType());
}
- // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
- // *data);
+ if (Data.IsReductionWithTaskMod) {
+ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy, /*isSigned=*/true);
+ llvm::Value *Args[] = {
+ IdentTLoc, GTid,
+ llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
+ /*isSigned=*/true),
+ llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ TaskRedInput.getPointer(), CGM.VoidPtrTy)};
+ return CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
+ Args);
+ }
+ // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
llvm::Value *Args[] = {
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
/*isSigned=*/true),
llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
CGM.VoidPtrTy)};
- return CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
+ return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskred_init),
+ Args);
+}
+
+void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ bool IsWorksharingReduction) {
+ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy, /*isSigned=*/true);
+ llvm::Value *Args[] = {IdentTLoc, GTid,
+ llvm::ConstantInt::get(CGM.IntTy,
+ IsWorksharingReduction ? 1 : 0,
+ /*isSigned=*/true)};
+ (void)CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
+ Args);
}
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
@@ -6287,16 +6166,6 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
}
- // Store address of the original reduction item if custom initializer is used.
- if (RCG.usesReductionInitializer(N)) {
- Address SharedAddr = getAddrOfArtificialThreadPrivate(
- CGF, CGM.getContext().VoidPtrTy,
- generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
- CGF.Builder.CreateStore(
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
- SharedAddr, /*IsVolatile=*/false);
- }
}
Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
@@ -6313,7 +6182,9 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
return Address(
CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
+ Args),
SharedLVal.getAlignment());
}
@@ -6321,11 +6192,19 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
- // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- // Ignore return result until untied tasks are supported.
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
+
+ if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+ OMPBuilder.CreateTaskwait(CGF.Builder);
+ } else {
+ // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+ // Ignore return result until untied tasks are supported.
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
+ Args);
+ }
+
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
Region->emitUntiedSwitch(CGF);
}
@@ -6382,7 +6261,9 @@ void CGOpenMPRuntime::emitCancellationPointCall(
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
// Ignore return result until untied tasks are supported.
llvm::Value *Result = CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
+ Args);
// if (__kmpc_cancellationpoint()) {
// exit from construct;
// }
@@ -6407,17 +6288,18 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
return;
// Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind);
+ auto &M = CGM.getModule();
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
- auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&ThenGen = [this, &M, Loc, CancelRegion,
+ OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
// Ignore return result until untied tasks are supported.
llvm::Value *Result = CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
+ OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
// if (__kmpc_cancel()) {
// exit from construct;
// }
@@ -6442,16 +6324,106 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
}
}
+namespace {
+/// Cleanup action for uses_allocators support.
+class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
+ ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
+
+public:
+ OMPUsesAllocatorsActionTy(
+ ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
+ : Allocators(Allocators) {}
+ void Enter(CodeGenFunction &CGF) override {
+ if (!CGF.HaveInsertPoint())
+ return;
+ for (const auto &AllocatorData : Allocators) {
+ CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
+ CGF, AllocatorData.first, AllocatorData.second);
+ }
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ if (!CGF.HaveInsertPoint())
+ return;
+ for (const auto &AllocatorData : Allocators) {
+ CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
+ AllocatorData.first);
+ }
+ }
+};
+} // namespace
+
void CGOpenMPRuntime::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
assert(!ParentName.empty() && "Invalid target region parent name!");
HasEmittedTargetRegion = true;
+ SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
+ for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
+ for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
+ const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
+ if (!D.AllocatorTraits)
+ continue;
+ Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
+ }
+ }
+ OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
+ CodeGen.setAction(UsesAllocatorAction);
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
}
+void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
+ const Expr *Allocator,
+ const Expr *AllocatorTraits) {
+ llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
+ ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
+ // Use default memspace handle.
+ llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ llvm::Value *NumTraits = llvm::ConstantInt::get(
+ CGF.IntTy, cast<ConstantArrayType>(
+ AllocatorTraits->getType()->getAsArrayTypeUnsafe())
+ ->getSize()
+ .getLimitedValue());
+ LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
+ Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
+ AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
+ AllocatorTraitsLVal.getBaseInfo(),
+ AllocatorTraitsLVal.getTBAAInfo());
+ llvm::Value *Traits =
+ CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
+
+ llvm::Value *AllocatorVal =
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_init_allocator),
+ {ThreadId, MemSpaceHandle, NumTraits, Traits});
+ // Store to allocator.
+ CGF.EmitVarDecl(*cast<VarDecl>(
+ cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
+ LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
+ AllocatorVal =
+ CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
+ Allocator->getType(), Allocator->getExprLoc());
+ CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
+}
+
+void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
+ const Expr *Allocator) {
+ llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
+ ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
+ LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
+ llvm::Value *AllocatorVal =
+ CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
+ AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
+ CGF.getContext().VoidPtrTy,
+ Allocator->getExprLoc());
+ (void)CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___kmpc_destroy_allocator),
+ {ThreadId, AllocatorVal});
+}
+
void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -6483,7 +6455,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
+ OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
// If this target outline function is not an offload entry, we don't need to
// register it.
@@ -6669,6 +6641,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -6684,6 +6658,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -6697,6 +6673,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_requires:
case OMPD_unknown:
break;
+ default:
+ break;
}
llvm_unreachable("Unexpected directive kind.");
}
@@ -6980,6 +6958,8 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -6995,6 +6975,8 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -7008,6 +6990,8 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_requires:
case OMPD_unknown:
break;
+ default:
+ break;
}
llvm_unreachable("Unsupported directive kind.");
}
@@ -7044,7 +7028,7 @@ public:
OMP_MAP_TARGET_PARAM = 0x20,
/// Signal that the runtime library has to return the device pointer
/// in the current position for the data being mapped. Used when we have the
- /// use_device_ptr clause.
+ /// use_device_ptr or use_device_addr clause.
OMP_MAP_RETURN_PARAM = 0x40,
/// This flag signals that the reference being passed is a pointer to
/// private data.
@@ -7112,26 +7096,30 @@ private:
ArrayRef<OpenMPMapModifierKind> MapModifiers;
bool ReturnDevicePointer = false;
bool IsImplicit = false;
+ bool ForDeviceAddr = false;
MapInfo() = default;
MapInfo(
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
OpenMPMapClauseKind MapType,
- ArrayRef<OpenMPMapModifierKind> MapModifiers,
- bool ReturnDevicePointer, bool IsImplicit)
+ ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
+ bool IsImplicit, bool ForDeviceAddr = false)
: Components(Components), MapType(MapType), MapModifiers(MapModifiers),
- ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
+ ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
+ ForDeviceAddr(ForDeviceAddr) {}
};
- /// If use_device_ptr is used on a pointer which is a struct member and there
- /// is no map information about it, then emission of that entry is deferred
- /// until the whole struct has been processed.
+ /// If use_device_ptr or use_device_addr is used on a decl which is a struct
+ /// member and there is no map information about it, then emission of that
+ /// entry is deferred until the whole struct has been processed.
struct DeferredDevicePtrEntryTy {
const Expr *IE = nullptr;
const ValueDecl *VD = nullptr;
+ bool ForDeviceAddr = false;
- DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
- : IE(IE), VD(VD) {}
+ DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
+ bool ForDeviceAddr)
+ : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
};
/// The target directive from where the mappable clauses were extracted. It
@@ -7158,6 +7146,20 @@ private:
llvm::Value *getExprTypeSize(const Expr *E) const {
QualType ExprTy = E->getType().getCanonicalType();
+ // Calculate the size for array shaping expression.
+ if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
+ llvm::Value *Size =
+ CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
+ for (const Expr *SE : OAE->getDimensions()) {
+ llvm::Value *Sz = CGF.EmitScalarExpr(SE);
+ Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
+ CGF.getContext().getSizeType(),
+ SE->getExprLoc());
+ Size = CGF.Builder.CreateNUWMul(Size, Sz);
+ }
+ return Size;
+ }
+
// Reference types are ignored for mapping purposes.
if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
ExprTy = RefTy->getPointeeType().getCanonicalType();
@@ -7173,7 +7175,7 @@ private:
// If there is no length associated with the expression and lower bound is
// not specified too, that means we are using the whole length of the
// base.
- if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
!OAE->getLowerBound())
return CGF.getTypeSize(BaseTy);
@@ -7188,7 +7190,7 @@ private:
// If we don't have a length at this point, that is because we have an
// array section with a single element.
- if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
+ if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
return ElemSize;
if (const Expr *LenExpr = OAE->getLength()) {
@@ -7198,7 +7200,7 @@ private:
LenExpr->getExprLoc());
return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
}
- assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
OAE->getLowerBound() && "expected array_section[lb:].");
// Size = sizetype - lb * elemtype;
llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
@@ -7271,7 +7273,7 @@ private:
return false;
// An array section with no colon always refer to a single element.
- if (OASE->getColonLoc().isInvalid())
+ if (OASE->getColonLocFirst().isInvalid())
return false;
const Expr *Length = OASE->getLength();
@@ -7305,13 +7307,12 @@ private:
/// \a IsFirstComponent should be set to true if the provided set of
/// components is the first associated with a capture.
void generateInfoForComponentList(
- OpenMPMapClauseKind MapType,
- ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
- bool IsImplicit,
+ bool IsImplicit, bool ForDeviceAddr = false,
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
OverlappedElements = llvm::None) const {
// The following summarizes what has to be generated for each map and the
@@ -7489,6 +7490,7 @@ private:
const Expr *AssocExpr = I->getAssociatedExpression();
const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
+ const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
if (isa<MemberExpr>(AssocExpr)) {
// The base is the 'this' pointer. The content of the pointer is going
@@ -7498,6 +7500,11 @@ private:
(OASE &&
isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
+ } else if (OAShE &&
+ isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
+ BP = Address(
+ CGF.EmitScalarExpr(OAShE->getBase()),
+ CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
} else {
// The base is the reference to the variable.
// BP = &Var.
@@ -7580,29 +7587,44 @@ private:
// types.
const auto *OASE =
dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
+ const auto *OAShE =
+ dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
+ const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
+ const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
bool IsPointer =
+ OAShE ||
(OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
.getCanonicalType()
->isAnyPointerType()) ||
I->getAssociatedExpression()->getType()->isAnyPointerType();
+ bool IsNonDerefPointer = IsPointer && !UO && !BO;
- if (Next == CE || IsPointer || IsFinalArraySection) {
+ if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
// If this is not the last component, we expect the pointer to be
// associated with an array expression or member expression.
assert((Next == CE ||
isa<MemberExpr>(Next->getAssociatedExpression()) ||
isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
- isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
+ isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
+ isa<UnaryOperator>(Next->getAssociatedExpression()) ||
+ isa<BinaryOperator>(Next->getAssociatedExpression())) &&
"Unexpected expression");
- Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
- .getAddress(CGF);
+ Address LB = Address::invalid();
+ if (OAShE) {
+ LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
+ CGF.getContext().getTypeAlignInChars(
+ OAShE->getBase()->getType()));
+ } else {
+ LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
+ .getAddress(CGF);
+ }
// If this component is a pointer inside the base struct then we don't
// need to create any entry for it - it will be combined with the object
// it is pointing to into a single PTR_AND_OBJ entry.
- bool IsMemberPointer =
- IsPointer && EncounteredME &&
+ bool IsMemberPointerOrAddr =
+ (IsPointer || ForDeviceAddr) && EncounteredME &&
(dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
EncounteredME);
if (!OverlappedElements.empty()) {
@@ -7669,7 +7691,7 @@ private:
break;
}
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
- if (!IsMemberPointer) {
+ if (!IsMemberPointerOrAddr) {
BasePointers.push_back(BP.getPointer());
Pointers.push_back(LB.getPointer());
Sizes.push_back(
@@ -7708,13 +7730,20 @@ private:
// mapped member. If the parent is "*this", then the value declaration
// is nullptr.
if (EncounteredME) {
- const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
+ const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
unsigned FieldIndex = FD->getFieldIndex();
// Update info about the lowest and highest elements for this struct
if (!PartialStruct.Base.isValid()) {
PartialStruct.LowestElem = {FieldIndex, LB};
- PartialStruct.HighestElem = {FieldIndex, LB};
+ if (IsFinalArraySection) {
+ Address HB =
+ CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
+ .getAddress(CGF);
+ PartialStruct.HighestElem = {FieldIndex, HB};
+ } else {
+ PartialStruct.HighestElem = {FieldIndex, LB};
+ }
PartialStruct.Base = BP;
} else if (FieldIndex < PartialStruct.LowestElem.first) {
PartialStruct.LowestElem = {FieldIndex, LB};
@@ -7851,6 +7880,19 @@ public:
for (const auto *D : C->varlists())
FirstPrivateDecls.try_emplace(
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
+ // Extract implicit firstprivates from uses_allocators clauses.
+ for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
+ for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
+ OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
+ if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
+ FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
+ /*Implicit=*/true);
+ else if (const auto *VD = dyn_cast<VarDecl>(
+ cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
+ ->getDecl()))
+ FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
+ }
+ }
// Extract device pointer clause information.
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
for (auto L : C->component_lists())
@@ -7910,17 +7952,18 @@ public:
// Helper function to fill the information map for the different supported
// clauses.
- auto &&InfoGen = [&Info](
- const ValueDecl *D,
- OMPClauseMappableExprCommon::MappableExprComponentListRef L,
- OpenMPMapClauseKind MapType,
- ArrayRef<OpenMPMapModifierKind> MapModifiers,
- bool ReturnDevicePointer, bool IsImplicit) {
- const ValueDecl *VD =
- D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
- IsImplicit);
- };
+ auto &&InfoGen =
+ [&Info](const ValueDecl *D,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ bool ReturnDevicePointer, bool IsImplicit,
+ bool ForDeviceAddr = false) {
+ const ValueDecl *VD =
+ D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+ Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
+ IsImplicit, ForDeviceAddr);
+ };
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
@@ -7990,7 +8033,7 @@ public:
// partial struct.
InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
- DeferredInfo[nullptr].emplace_back(IE, VD);
+ DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
} else {
llvm::Value *Ptr =
CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
@@ -8002,6 +8045,70 @@ public:
}
}
+ // Look at the use_device_addr clause information and mark the existing map
+ // entries as such. If there is no map information for an entry in the
+ // use_device_addr list, we create one with map type 'alloc' and zero size
+ // section. It is the user fault if that was not mapped before. If there is
+ // no map information and the pointer is a struct member, then we defer the
+ // emission of that entry until the whole struct has been processed.
+ llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
+ for (const auto *C :
+ CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
+ for (const auto L : C->component_lists()) {
+ assert(!L.second.empty() && "Not expecting empty list of components!");
+ const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+ if (!Processed.insert(VD).second)
+ continue;
+ VD = cast<ValueDecl>(VD->getCanonicalDecl());
+ const Expr *IE = L.second.back().getAssociatedExpression();
+ // If the first component is a member expression, we have to look into
+ // 'this', which maps to null in the map of map information. Otherwise
+ // look directly for the information.
+ auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
+
+ // We potentially have map information for this declaration already.
+ // Look for the first set of components that refer to it.
+ if (It != Info.end()) {
+ auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
+ return MI.Components.back().getAssociatedDeclaration() == VD;
+ });
+ // If we found a map entry, signal that the pointer has to be returned
+ // and move on to the next declaration.
+ if (CI != It->second.end()) {
+ CI->ReturnDevicePointer = true;
+ continue;
+ }
+ }
+
+ // We didn't find any match in our map information - generate a zero
+ // size array section - if the pointer is a struct member we defer this
+ // action until the whole struct has been processed.
+ if (isa<MemberExpr>(IE)) {
+ // Insert the pointer into Info to be processed by
+ // generateInfoForComponentList. Because it is a member pointer
+ // without a pointee, no entry will be generated for it, therefore
+ // we need to generate one after the whole struct has been processed.
+ // Nonetheless, generateInfoForComponentList must be called to take
+ // the pointer into account for the calculation of the range of the
+ // partial struct.
+ InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
+ /*ReturnDevicePointer=*/false, C->isImplicit(),
+ /*ForDeviceAddr=*/true);
+ DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
+ } else {
+ llvm::Value *Ptr;
+ if (IE->isGLValue())
+ Ptr = CGF.EmitLValue(IE).getPointer(CGF);
+ else
+ Ptr = CGF.EmitScalarExpr(IE);
+ BasePointers.emplace_back(Ptr, VD);
+ Pointers.push_back(Ptr);
+ Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+ Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+ }
+ }
+ }
+
for (const auto &M : Info) {
// We need to know when we generate information for the first component
// associated with a capture, because the mapping flags depend on it.
@@ -8020,10 +8127,10 @@ public:
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = CurBasePointers.size();
- generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
- CurBasePointers, CurPointers, CurSizes,
- CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit);
+ generateInfoForComponentList(
+ L.MapType, L.MapModifiers, L.Components, CurBasePointers,
+ CurPointers, CurSizes, CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
@@ -8043,21 +8150,35 @@ public:
}
// Append any pending zero-length pointers which are struct members and
- // used with use_device_ptr.
+ // used with use_device_ptr or use_device_addr.
auto CI = DeferredInfo.find(M.first);
if (CI != DeferredInfo.end()) {
for (const DeferredDevicePtrEntryTy &L : CI->second) {
- llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
- llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
- this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
+ llvm::Value *BasePtr;
+ llvm::Value *Ptr;
+ if (L.ForDeviceAddr) {
+ if (L.IE->isGLValue())
+ Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
+ else
+ Ptr = this->CGF.EmitScalarExpr(L.IE);
+ BasePtr = Ptr;
+ // Entry is RETURN_PARAM. Also, set the placeholder value
+ // MEMBER_OF=FFFF so that the entry is later updated with the
+ // correct value of MEMBER_OF.
+ CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
+ } else {
+ BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
+ Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
+ L.IE->getExprLoc());
+ // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
+ // value MEMBER_OF=FFFF so that the entry is later updated with the
+ // correct value of MEMBER_OF.
+ CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
+ OMP_MAP_MEMBER_OF);
+ }
CurBasePointers.emplace_back(BasePtr, L.VD);
CurPointers.push_back(Ptr);
CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
- // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
- // value MEMBER_OF=FFFF so that the entry is later updated with the
- // correct value of MEMBER_OF.
- CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
- OMP_MAP_MEMBER_OF);
}
}
@@ -8126,10 +8247,10 @@ public:
for (const MapInfo &L : M.second) {
assert(!L.Components.empty() &&
"Not expecting declaration with no component lists.");
- generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
- CurBasePointers, CurPointers, CurSizes,
- CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit);
+ generateInfoForComponentList(
+ L.MapType, L.MapModifiers, L.Components, CurBasePointers,
+ CurPointers, CurSizes, CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
IsFirstComponentList = false;
}
@@ -8395,10 +8516,10 @@ public:
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
OverlappedComponents = Pair.getSecond();
bool IsFirstComponentList = true;
- generateInfoForComponentList(MapType, MapModifiers, Components,
- BasePointers, Pointers, Sizes, Types,
- PartialStruct, IsFirstComponentList,
- IsImplicit, OverlappedComponents);
+ generateInfoForComponentList(
+ MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
+ Types, PartialStruct, IsFirstComponentList, IsImplicit,
+ /*ForDeviceAddr=*/false, OverlappedComponents);
}
// Go through other elements without overlapped elements.
bool IsFirstComponentList = OverlappedData.empty();
@@ -8759,6 +8880,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -8774,6 +8897,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -8786,6 +8911,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unexpected directive.");
}
}
@@ -8935,7 +9061,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
// pre-existing components.
llvm::Value *OffloadingArgs[] = {Handle};
llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___tgt_mapper_num_components),
+ OffloadingArgs);
llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
PreviousSize,
MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
@@ -9041,7 +9169,8 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
CurSizeArg, CurMapType};
MapperCGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_push_mapper_component),
OffloadingArgs);
}
@@ -9085,8 +9214,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
// Evaluate if this is an array section.
llvm::BasicBlock *IsDeleteBB =
- MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
- llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
+ MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
+ llvm::BasicBlock *BodyBB =
+ MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
@@ -9099,10 +9229,10 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
llvm::Value *DeleteCond;
if (IsInit) {
DeleteCond = MapperCGF.Builder.CreateIsNull(
- DeleteBit, "omp.array" + Prefix + ".delete");
+ DeleteBit, getName({"omp.array", Prefix, ".delete"}));
} else {
DeleteCond = MapperCGF.Builder.CreateIsNotNull(
- DeleteBit, "omp.array" + Prefix + ".delete");
+ DeleteBit, getName({"omp.array", Prefix, ".delete"}));
}
MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
@@ -9121,7 +9251,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
// data structure.
llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
MapperCGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___tgt_push_mapper_component),
+ OffloadingArgs);
}
void CGOpenMPRuntime::emitTargetNumIterationsCall(
@@ -9143,7 +9275,9 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
llvm::Value *Args[] = {DeviceID, NumIterations};
CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
+ Args);
}
};
emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
@@ -9152,7 +9286,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
void CGOpenMPRuntime::emitTargetCall(
CodeGenFunction &CGF, const OMPExecutableDirective &D,
llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
- const Expr *Device,
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
const OMPLoopDirective &D)>
SizeEmitter) {
@@ -9176,6 +9310,16 @@ void CGOpenMPRuntime::emitTargetCall(
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
&MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
+ if (Device.getInt() == OMPC_DEVICE_ancestor) {
+ // Reverse offloading is not supported, so just execute on the host.
+ if (RequiresOuterTask) {
+ CapturedVars.clear();
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ }
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+ return;
+ }
+
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
// pointer is used by the runtime library to identify the current target
@@ -9190,9 +9334,13 @@ void CGOpenMPRuntime::emitTargetCall(
// Emit device ID if any.
llvm::Value *DeviceID;
- if (Device) {
- DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int64Ty, /*isSigned=*/true);
+ if (Device.getPointer()) {
+ assert((Device.getInt() == OMPC_DEVICE_unknown ||
+ Device.getInt() == OMPC_DEVICE_device_num) &&
+ "Expected device_num modifier.");
+ llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
+ DeviceID =
+ CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
} else {
DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
}
@@ -9256,8 +9404,9 @@ void CGOpenMPRuntime::emitTargetCall(
NumTeams,
NumThreads};
Return = CGF.EmitRuntimeCall(
- createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
- : OMPRTL__tgt_target_teams),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
+ : OMPRTL___tgt_target_teams),
OffloadingArgs);
} else {
llvm::Value *OffloadingArgs[] = {DeviceID,
@@ -9268,8 +9417,9 @@ void CGOpenMPRuntime::emitTargetCall(
InputInfo.SizesArray.getPointer(),
MapTypesArray};
Return = CGF.EmitRuntimeCall(
- createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
- : OMPRTL__tgt_target),
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(),
+ HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
OffloadingArgs);
}
@@ -9521,6 +9671,8 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -9536,6 +9688,8 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -9548,6 +9702,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
return;
@@ -9774,22 +9929,40 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
" Expected target-based directive.");
}
-void CGOpenMPRuntime::checkArchForUnifiedAddressing(
- const OMPRequiresDecl *D) {
+void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
for (const OMPClause *Clause : D->clauselists()) {
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
HasRequiresUnifiedSharedMemory = true;
- break;
+ } else if (const auto *AC =
+ dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
+ switch (AC->getAtomicDefaultMemOrderKind()) {
+ case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
+ RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
+ break;
+ case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
+ RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
+ RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
+ break;
+ case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
+ break;
+ }
}
}
}
+llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
+ return RequiresAtomicOrdering;
+}
+
bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
LangAS &AS) {
if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
return false;
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
switch(A->getAllocatorType()) {
+ case OMPAllocateDeclAttr::OMPNullMemAlloc:
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
// Not supported, fallback to the default mem space.
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
@@ -9865,7 +10038,7 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
const auto &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
std::string ReqName = getName({"omp_offloading", "requires_reg"});
- RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
+ RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
// TODO: check for other requires clauses.
@@ -9880,8 +10053,9 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
"Target or declare target region expected.");
if (HasRequiresUnifiedSharedMemory)
Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
- llvm::ConstantInt::get(CGM.Int64Ty, Flags));
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_register_requires),
+ llvm::ConstantInt::get(CGM.Int64Ty, Flags));
CGF.FinishFunction();
}
return RequiresRegFn;
@@ -9907,7 +10081,8 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
- llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
+ llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_fork_teams);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
}
@@ -9935,7 +10110,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
ThreadLimitVal};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_push_num_teams),
PushNumTeamsArgs);
}
@@ -9989,7 +10165,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg,
PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_target_data_begin),
OffloadingArgs);
// If device pointer privatization is required, emit the body of the region
@@ -10025,7 +10202,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg,
PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_target_data_end),
OffloadingArgs);
};
@@ -10105,19 +10283,19 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
// Select the right runtime function call for each expected standalone
// directive.
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
- OpenMPRTLFunction RTLFn;
+ RuntimeFunction RTLFn;
switch (D.getDirectiveKind()) {
case OMPD_target_enter_data:
- RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
- : OMPRTL__tgt_target_data_begin;
+ RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
+ : OMPRTL___tgt_target_data_begin;
break;
case OMPD_target_exit_data:
- RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
- : OMPRTL__tgt_target_data_end;
+ RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
+ : OMPRTL___tgt_target_data_end;
break;
case OMPD_target_update:
- RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
- : OMPRTL__tgt_target_data_update;
+ RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
+ : OMPRTL___tgt_target_data_update;
break;
case OMPD_parallel:
case OMPD_for:
@@ -10144,6 +10322,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_distribute:
@@ -10156,6 +10336,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -10178,10 +10360,13 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_target_parallel_for_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unexpected standalone target data directive.");
break;
}
- CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+ OffloadingArgs);
};
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
@@ -10343,7 +10528,7 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
break;
case Linear:
Out << 'l';
- if (!!ParamAttr.StrideOrArg)
+ if (ParamAttr.StrideOrArg != 1)
Out << ParamAttr.StrideOrArg;
break;
case Uniform:
@@ -10420,7 +10605,7 @@ static bool getAArch64PBV(QualType QT, ASTContext &C) {
/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
/// TODO: Add support for references, section 3.2.1, item 1.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
- if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
+ if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
QualType PTy = QT.getCanonicalType()->getPointeeType();
if (getAArch64PBV(PTy, C))
return C.getTypeSize(PTy);
@@ -10483,7 +10668,7 @@ static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
Out << 'l';
// Don't print the step value if it is not present or if it is
// equal to 1.
- if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
+ if (ParamAttr.StrideOrArg != 1)
Out << ParamAttr.StrideOrArg;
break;
case Uniform:
@@ -10498,7 +10683,7 @@ static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
Out << 'a' << ParamAttr.Alignment;
}
- return Out.str();
+ return std::string(Out.str());
}
// Function used to add the attribute. The parameter `VLEN` is
@@ -10721,15 +10906,24 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
for (const Expr *E : Attr->linears()) {
E = E->IgnoreParenImpCasts();
unsigned Pos;
+ // Rescaling factor needed to compute the linear parameter
+ // value in the mangled name.
+ unsigned PtrRescalingFactor = 1;
if (isa<CXXThisExpr>(E)) {
Pos = ParamPositions[FD];
} else {
const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
->getCanonicalDecl();
Pos = ParamPositions[PVD];
+ if (auto *P = dyn_cast<PointerType>(PVD->getType()))
+ PtrRescalingFactor = CGM.getContext()
+ .getTypeSizeInChars(P->getPointeeType())
+ .getQuantity();
}
ParamAttrTy &ParamAttr = ParamAttrs[Pos];
ParamAttr.Kind = Linear;
+ // Assuming a stride of 1, for `linear` without modifiers.
+ ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
if (*SI) {
Expr::EvalResult Result;
if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
@@ -10745,6 +10939,11 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamAttr.StrideOrArg = Result.Val.getInt();
}
}
+ // If we are using a linear clause on a pointer, we need to
+ // rescale the value of linear_step with the byte size of the
+ // pointee type.
+ if (Linear == ParamAttr.Kind)
+ ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
++SI;
++MI;
}
@@ -10837,10 +11036,9 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
// dims.upper = num_iterations;
LValue UpperLVal = CGF.EmitLValueForField(
DimsLVal, *std::next(RD->field_begin(), UpperFD));
- llvm::Value *NumIterVal =
- CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
- D.getNumIterations()->getType(), Int64Ty,
- D.getNumIterations()->getExprLoc());
+ llvm::Value *NumIterVal = CGF.EmitScalarConversion(
+ CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
+ Int64Ty, NumIterations[I]->getExprLoc());
CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
// dims.stride = 1;
LValue StrideLVal = CGF.EmitLValueForField(
@@ -10859,13 +11057,13 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
CGM.VoidPtrTy)};
- llvm::FunctionCallee RTLFn =
- createRuntimeFunction(OMPRTL__kmpc_doacross_init);
+ llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_doacross_init);
CGF.EmitRuntimeCall(RTLFn, Args);
llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
- llvm::FunctionCallee FiniRTLFn =
- createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
+ llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_doacross_fini);
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
}
@@ -10893,10 +11091,12 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
llvm::FunctionCallee RTLFn;
if (C->getDependencyKind() == OMPC_DEPEND_source) {
- RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
+ RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___kmpc_doacross_post);
} else {
assert(C->getDependencyKind() == OMPC_DEPEND_sink);
- RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
+ RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+ OMPRTL___kmpc_doacross_wait);
}
CGF.EmitRuntimeCall(RTLFn, Args);
}
@@ -10969,7 +11169,8 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address::invalid();
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
- if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
+ if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
+ AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator())
return Address::invalid();
llvm::Value *Size;
@@ -10999,296 +11200,23 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
llvm::Value *Args[] = {ThreadID, Size, Allocator};
llvm::Value *Addr =
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
- CVD->getName() + ".void.addr");
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_alloc),
+ Args, getName({CVD->getName(), ".void.addr"}));
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
- llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+ llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_free);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
- CVD->getName() + ".addr");
+ getName({CVD->getName(), ".addr"}));
return Address(Addr, Align);
}
-namespace {
-using OMPContextSelectorData =
- OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
-using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
-} // anonymous namespace
-
-/// Checks current context and returns true if it matches the context selector.
-template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
- typename... Arguments>
-static bool checkContext(const OMPContextSelectorData &Data,
- Arguments... Params) {
- assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
- "Unknown context selector or context selector set.");
- return false;
-}
-
-/// Checks for implementation={vendor(<vendor>)} context selector.
-/// \returns true iff <vendor>="llvm", false otherwise.
-template <>
-bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
- const OMPContextSelectorData &Data) {
- return llvm::all_of(Data.Names,
- [](StringRef S) { return !S.compare_lower("llvm"); });
-}
-
-/// Checks for device={kind(<kind>)} context selector.
-/// \returns true if <kind>="host" and compilation is for host.
-/// true if <kind>="nohost" and compilation is for device.
-/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
-/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
-/// false otherwise.
-template <>
-bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
- const OMPContextSelectorData &Data, CodeGenModule &CGM) {
- for (StringRef Name : Data.Names) {
- if (!Name.compare_lower("host")) {
- if (CGM.getLangOpts().OpenMPIsDevice)
- return false;
- continue;
- }
- if (!Name.compare_lower("nohost")) {
- if (!CGM.getLangOpts().OpenMPIsDevice)
- return false;
- continue;
- }
- switch (CGM.getTriple().getArch()) {
- case llvm::Triple::arm:
- case llvm::Triple::armeb:
- case llvm::Triple::aarch64:
- case llvm::Triple::aarch64_be:
- case llvm::Triple::aarch64_32:
- case llvm::Triple::ppc:
- case llvm::Triple::ppc64:
- case llvm::Triple::ppc64le:
- case llvm::Triple::x86:
- case llvm::Triple::x86_64:
- if (Name.compare_lower("cpu"))
- return false;
- break;
- case llvm::Triple::amdgcn:
- case llvm::Triple::nvptx:
- case llvm::Triple::nvptx64:
- if (Name.compare_lower("gpu"))
- return false;
- break;
- case llvm::Triple::UnknownArch:
- case llvm::Triple::arc:
- case llvm::Triple::avr:
- case llvm::Triple::bpfel:
- case llvm::Triple::bpfeb:
- case llvm::Triple::hexagon:
- case llvm::Triple::mips:
- case llvm::Triple::mipsel:
- case llvm::Triple::mips64:
- case llvm::Triple::mips64el:
- case llvm::Triple::msp430:
- case llvm::Triple::r600:
- case llvm::Triple::riscv32:
- case llvm::Triple::riscv64:
- case llvm::Triple::sparc:
- case llvm::Triple::sparcv9:
- case llvm::Triple::sparcel:
- case llvm::Triple::systemz:
- case llvm::Triple::tce:
- case llvm::Triple::tcele:
- case llvm::Triple::thumb:
- case llvm::Triple::thumbeb:
- case llvm::Triple::xcore:
- case llvm::Triple::le32:
- case llvm::Triple::le64:
- case llvm::Triple::amdil:
- case llvm::Triple::amdil64:
- case llvm::Triple::hsail:
- case llvm::Triple::hsail64:
- case llvm::Triple::spir:
- case llvm::Triple::spir64:
- case llvm::Triple::kalimba:
- case llvm::Triple::shave:
- case llvm::Triple::lanai:
- case llvm::Triple::wasm32:
- case llvm::Triple::wasm64:
- case llvm::Triple::renderscript32:
- case llvm::Triple::renderscript64:
- case llvm::Triple::ve:
- return false;
- }
- }
- return true;
-}
-
-static bool matchesContext(CodeGenModule &CGM,
- const CompleteOMPContextSelectorData &ContextData) {
- for (const OMPContextSelectorData &Data : ContextData) {
- switch (Data.Ctx) {
- case OMP_CTX_vendor:
- assert(Data.CtxSet == OMP_CTX_SET_implementation &&
- "Expected implementation context selector set.");
- if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
- return false;
- break;
- case OMP_CTX_kind:
- assert(Data.CtxSet == OMP_CTX_SET_device &&
- "Expected device context selector set.");
- if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
- CGM))
- return false;
- break;
- case OMP_CTX_unknown:
- llvm_unreachable("Unknown context selector kind.");
- }
- }
- return true;
-}
-
-static CompleteOMPContextSelectorData
-translateAttrToContextSelectorData(ASTContext &C,
- const OMPDeclareVariantAttr *A) {
- CompleteOMPContextSelectorData Data;
- for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
- Data.emplace_back();
- auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
- *std::next(A->ctxSelectorSets_begin(), I));
- auto Ctx = static_cast<OpenMPContextSelectorKind>(
- *std::next(A->ctxSelectors_begin(), I));
- Data.back().CtxSet = CtxSet;
- Data.back().Ctx = Ctx;
- const Expr *Score = *std::next(A->scores_begin(), I);
- Data.back().Score = Score->EvaluateKnownConstInt(C);
- switch (Ctx) {
- case OMP_CTX_vendor:
- assert(CtxSet == OMP_CTX_SET_implementation &&
- "Expected implementation context selector set.");
- Data.back().Names =
- llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
- break;
- case OMP_CTX_kind:
- assert(CtxSet == OMP_CTX_SET_device &&
- "Expected device context selector set.");
- Data.back().Names =
- llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
- break;
- case OMP_CTX_unknown:
- llvm_unreachable("Unknown context selector kind.");
- }
- }
- return Data;
-}
-
-static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
- const CompleteOMPContextSelectorData &RHS) {
- llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
- for (const OMPContextSelectorData &D : RHS) {
- auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
- Pair.getSecond().insert(D.Names.begin(), D.Names.end());
- }
- bool AllSetsAreEqual = true;
- for (const OMPContextSelectorData &D : LHS) {
- auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
- if (It == RHSData.end())
- return false;
- if (D.Names.size() > It->getSecond().size())
- return false;
- if (llvm::set_union(It->getSecond(), D.Names))
- return false;
- AllSetsAreEqual =
- AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
- }
-
- return LHS.size() != RHS.size() || !AllSetsAreEqual;
-}
-
-static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
- const CompleteOMPContextSelectorData &RHS) {
- // Score is calculated as sum of all scores + 1.
- llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
- bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
- if (RHSIsSubsetOfLHS) {
- LHSScore = llvm::APSInt::get(0);
- } else {
- for (const OMPContextSelectorData &Data : LHS) {
- if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
- LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
- } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
- LHSScore += Data.Score.extend(LHSScore.getBitWidth());
- } else {
- LHSScore += Data.Score;
- }
- }
- }
- llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
- if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
- RHSScore = llvm::APSInt::get(0);
- } else {
- for (const OMPContextSelectorData &Data : RHS) {
- if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
- RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
- } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
- RHSScore += Data.Score.extend(RHSScore.getBitWidth());
- } else {
- RHSScore += Data.Score;
- }
- }
- }
- return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
-}
-
-/// Finds the variant function that matches current context with its context
-/// selector.
-static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
- const FunctionDecl *FD) {
- if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
- return FD;
- // Iterate through all DeclareVariant attributes and check context selectors.
- const OMPDeclareVariantAttr *TopMostAttr = nullptr;
- CompleteOMPContextSelectorData TopMostData;
- for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
- CompleteOMPContextSelectorData Data =
- translateAttrToContextSelectorData(CGM.getContext(), A);
- if (!matchesContext(CGM, Data))
- continue;
- // If the attribute matches the context, find the attribute with the highest
- // score.
- if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
- TopMostAttr = A;
- TopMostData.swap(Data);
- }
- }
- if (!TopMostAttr)
- return FD;
- return cast<FunctionDecl>(
- cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
- ->getDecl());
-}
-
-bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
- const auto *D = cast<FunctionDecl>(GD.getDecl());
- // If the original function is defined already, use its definition.
- StringRef MangledName = CGM.getMangledName(GD);
- llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
- if (Orig && !Orig->isDeclaration())
- return false;
- const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
- // Emit original function if it does not have declare variant attribute or the
- // context does not match.
- if (NewFD == D)
- return false;
- GlobalDecl NewGD = GD.getWithDecl(NewFD);
- if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
- DeferredVariantFunction.erase(D);
- return true;
- }
- DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
- return true;
-}
-
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
CodeGenModule &CGM, const OMPLoopDirective &S)
: CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
@@ -11329,17 +11257,101 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
[VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
}
+void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
+ const OMPExecutableDirective &S,
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
+ const {
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
+ // Vars in target/task regions must be excluded completely.
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
+ isOpenMPTaskingDirective(S.getDirectiveKind())) {
+ SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+ getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
+ const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
+ for (const CapturedStmt::Capture &Cap : CS->captures()) {
+ if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
+ NeedToCheckForLPCs.insert(Cap.getCapturedVar());
+ }
+ }
+ // Exclude vars in private clauses.
+ for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ NeedToCheckForLPCs.insert(DRE->getDecl());
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ NeedToCheckForLPCs.insert(DRE->getDecl());
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ NeedToCheckForLPCs.insert(DRE->getDecl());
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ NeedToCheckForLPCs.insert(DRE->getDecl());
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ NeedToCheckForLPCs.insert(DRE->getDecl());
+ }
+ }
+ for (const Decl *VD : NeedToCheckForLPCs) {
+ for (const LastprivateConditionalData &Data :
+ llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
+ if (Data.DeclToUniqueName.count(VD) > 0) {
+ if (!Data.Disabled)
+ NeedToAddForLPCsAsDisabled.insert(VD);
+ break;
+ }
+ }
+ }
+}
+
CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
: CGM(CGF.CGM),
- NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
- [](const OMPLastprivateClause *C) {
- return C->getKind() ==
- OMPC_LASTPRIVATE_conditional;
- })) {
+ Action((CGM.getLangOpts().OpenMP >= 50 &&
+ llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() ==
+ OMPC_LASTPRIVATE_conditional;
+ }))
+ ? ActionToDo::PushAsLastprivateConditional
+ : ActionToDo::DoNotPush) {
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
- if (!NeedToPush)
+ if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
return;
+ assert(Action == ActionToDo::PushAsLastprivateConditional &&
+ "Expected a push action.");
LastprivateConditionalData &Data =
CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
@@ -11347,107 +11359,136 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
continue;
for (const Expr *Ref : C->varlists()) {
- Data.DeclToUniqeName.try_emplace(
+ Data.DeclToUniqueName.insert(std::make_pair(
cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
- generateUniqueName(CGM, "pl_cond", Ref));
+ SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
}
}
Data.IVLVal = IVLVal;
- // In simd only mode or for simd directives no need to generate threadprivate
- // references for the loop iteration counter, we can use the original one
- // since outlining cannot happen in simd regions.
- if (CGF.getLangOpts().OpenMPSimd ||
- isOpenMPSimdDirective(S.getDirectiveKind())) {
- Data.UseOriginalIV = true;
+ Data.Fn = CGF.CurFn;
+}
+
+CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (CGM.getLangOpts().OpenMP < 50)
return;
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
+ tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
+ if (!NeedToAddForLPCsAsDisabled.empty()) {
+ Action = ActionToDo::DisableLastprivateConditional;
+ LastprivateConditionalData &Data =
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
+ for (const Decl *VD : NeedToAddForLPCsAsDisabled)
+ Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
+ Data.Fn = CGF.CurFn;
+ Data.Disabled = true;
}
- llvm::SmallString<16> Buffer;
- llvm::raw_svector_ostream OS(Buffer);
- PresumedLoc PLoc =
- CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
- assert(PLoc.isValid() && "Source location is expected to be always valid.");
+}
- llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
- CGM.getDiags().Report(diag::err_cannot_open_file)
- << PLoc.getFilename() << EC.message();
- OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
- << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
- Data.IVName = OS.str();
+CGOpenMPRuntime::LastprivateConditionalRAII
+CGOpenMPRuntime::LastprivateConditionalRAII::disable(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ return LastprivateConditionalRAII(CGF, S);
}
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
- if (!NeedToPush)
+ if (CGM.getLangOpts().OpenMP < 50)
return;
- CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
+ if (Action == ActionToDo::DisableLastprivateConditional) {
+ assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
+ "Expected list of disabled private vars.");
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
+ }
+ if (Action == ActionToDo::PushAsLastprivateConditional) {
+ assert(
+ !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
+ "Expected list of lastprivate conditional vars.");
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
+ }
}
-void CGOpenMPRuntime::initLastprivateConditionalCounter(
- CodeGenFunction &CGF, const OMPExecutableDirective &S) {
- if (CGM.getLangOpts().OpenMPSimd ||
- !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
- [](const OMPLastprivateClause *C) {
- return C->getKind() == OMPC_LASTPRIVATE_conditional;
- }))
- return;
- const CGOpenMPRuntime::LastprivateConditionalData &Data =
- LastprivateConditionalStack.back();
- if (Data.UseOriginalIV)
- return;
- // Global loop counter. Required to handle inner parallel-for regions.
- // global_iv = iv;
- Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
- CGF, Data.IVLVal.getType(), Data.IVName);
- LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
- llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
- CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
+Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
+ const VarDecl *VD) {
+ ASTContext &C = CGM.getContext();
+ auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
+ if (I == LastprivateConditionalToTypes.end())
+ I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
+ QualType NewType;
+ const FieldDecl *VDField;
+ const FieldDecl *FiredField;
+ LValue BaseLVal;
+ auto VI = I->getSecond().find(VD);
+ if (VI == I->getSecond().end()) {
+ RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
+ RD->startDefinition();
+ VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
+ FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
+ RD->completeDefinition();
+ NewType = C.getRecordType(RD);
+ Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
+ BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
+ I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
+ } else {
+ NewType = std::get<0>(VI->getSecond());
+ VDField = std::get<1>(VI->getSecond());
+ FiredField = std::get<2>(VI->getSecond());
+ BaseLVal = std::get<3>(VI->getSecond());
+ }
+ LValue FiredLVal =
+ CGF.EmitLValueForField(BaseLVal, FiredField);
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
+ FiredLVal);
+ return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
}
namespace {
/// Checks if the lastprivate conditional variable is referenced in LHS.
class LastprivateConditionalRefChecker final
: public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
- CodeGenFunction &CGF;
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
const Expr *FoundE = nullptr;
const Decl *FoundD = nullptr;
StringRef UniqueDeclName;
LValue IVLVal;
- StringRef IVName;
+ llvm::Function *FoundFn = nullptr;
SourceLocation Loc;
- bool UseOriginalIV = false;
public:
bool VisitDeclRefExpr(const DeclRefExpr *E) {
for (const CGOpenMPRuntime::LastprivateConditionalData &D :
llvm::reverse(LPM)) {
- auto It = D.DeclToUniqeName.find(E->getDecl());
- if (It == D.DeclToUniqeName.end())
+ auto It = D.DeclToUniqueName.find(E->getDecl());
+ if (It == D.DeclToUniqueName.end())
continue;
+ if (D.Disabled)
+ return false;
FoundE = E;
FoundD = E->getDecl()->getCanonicalDecl();
- UniqueDeclName = It->getSecond();
+ UniqueDeclName = It->second;
IVLVal = D.IVLVal;
- IVName = D.IVName;
- UseOriginalIV = D.UseOriginalIV;
+ FoundFn = D.Fn;
break;
}
return FoundE == E;
}
bool VisitMemberExpr(const MemberExpr *E) {
- if (!CGF.IsWrappedCXXThis(E->getBase()))
+ if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
return false;
for (const CGOpenMPRuntime::LastprivateConditionalData &D :
llvm::reverse(LPM)) {
- auto It = D.DeclToUniqeName.find(E->getMemberDecl());
- if (It == D.DeclToUniqeName.end())
+ auto It = D.DeclToUniqueName.find(E->getMemberDecl());
+ if (It == D.DeclToUniqueName.end())
continue;
+ if (D.Disabled)
+ return false;
FoundE = E;
FoundD = E->getMemberDecl()->getCanonicalDecl();
- UniqueDeclName = It->getSecond();
+ UniqueDeclName = It->second;
IVLVal = D.IVLVal;
- IVName = D.IVName;
- UseOriginalIV = D.UseOriginalIV;
+ FoundFn = D.Fn;
break;
}
return FoundE == E;
@@ -11465,62 +11506,41 @@ public:
return false;
}
explicit LastprivateConditionalRefChecker(
- CodeGenFunction &CGF,
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
- : CGF(CGF), LPM(LPM) {}
- std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
+ : LPM(LPM) {}
+ std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const {
- return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
- UseOriginalIV);
+ return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
}
};
} // namespace
-void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
- const Expr *LHS) {
- if (CGF.getLangOpts().OpenMP < 50)
- return;
- LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
- if (!Checker.Visit(LHS))
- return;
- const Expr *FoundE;
- const Decl *FoundD;
- StringRef UniqueDeclName;
- LValue IVLVal;
- StringRef IVName;
- bool UseOriginalIV;
- std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
- Checker.getFoundData();
-
+void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
+ LValue IVLVal,
+ StringRef UniqueDeclName,
+ LValue LVal,
+ SourceLocation Loc) {
// Last updated loop counter for the lastprivate conditional var.
// int<xx> last_iv = 0;
llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
llvm::Constant *LastIV =
- getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
+ getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
cast<llvm::GlobalVariable>(LastIV)->setAlignment(
IVLVal.getAlignment().getAsAlign());
LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
- // Private address of the lastprivate conditional in the current context.
- // priv_a
- LValue LVal = CGF.EmitLValue(FoundE);
// Last value of the lastprivate conditional.
// decltype(priv_a) last_a;
llvm::Constant *Last = getOrCreateInternalVariable(
- LVal.getAddress(CGF).getElementType(), UniqueDeclName);
+ CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
cast<llvm::GlobalVariable>(Last)->setAlignment(
LVal.getAlignment().getAsAlign());
LValue LastLVal =
CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
// Global loop counter. Required to handle inner parallel-for regions.
- // global_iv
- if (!UseOriginalIV) {
- Address IVAddr =
- getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
- IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
- }
- llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
+ // iv
+ llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
// #pragma omp critical(a)
// if (last_iv <= iv) {
@@ -11528,11 +11548,10 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
// last_a = priv_a;
// }
auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
- FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- llvm::Value *LastIVVal =
- CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
- // (last_iv <= global_iv) ? Check if the variable is updated and store new
+ llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
+ // (last_iv <= iv) ? Check if the variable is updated and store new
// value in global var.
llvm::Value *CmpRes;
if (IVLVal.getType()->isSignedIntegerType()) {
@@ -11548,19 +11567,18 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
// {
CGF.EmitBlock(ThenBB);
- // last_iv = global_iv;
+ // last_iv = iv;
CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
// last_a = priv_a;
switch (CGF.getEvaluationKind(LVal.getType())) {
case TEK_Scalar: {
- llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
+ llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
CGF.EmitStoreOfScalar(PrivVal, LastLVal);
break;
}
case TEK_Complex: {
- CodeGenFunction::ComplexPairTy PrivVal =
- CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
+ CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
break;
}
@@ -11580,7 +11598,100 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
RegionCodeGenTy ThenRCG(CodeGen);
ThenRCG(CGF);
} else {
- emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
+ emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
+ }
+}
+
+void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
+ const Expr *LHS) {
+ if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
+ return;
+ LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
+ if (!Checker.Visit(LHS))
+ return;
+ const Expr *FoundE;
+ const Decl *FoundD;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ llvm::Function *FoundFn;
+ std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
+ Checker.getFoundData();
+ if (FoundFn != CGF.CurFn) {
+ // Special codegen for inner parallel regions.
+ // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
+ auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
+ assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
+ "Lastprivate conditional is not found in outer region.");
+ QualType StructTy = std::get<0>(It->getSecond());
+ const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
+ LValue PrivLVal = CGF.EmitLValue(FoundE);
+ Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ PrivLVal.getAddress(CGF),
+ CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
+ LValue BaseLVal =
+ CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
+ LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
+ CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
+ CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
+ FiredLVal, llvm::AtomicOrdering::Unordered,
+ /*IsVolatile=*/true, /*isInit=*/false);
+ return;
+ }
+
+ // Private address of the lastprivate conditional in the current context.
+ // priv_a
+ LValue LVal = CGF.EmitLValue(FoundE);
+ emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
+ FoundE->getExprLoc());
+}
+
+void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
+ if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
+ return;
+ auto Range = llvm::reverse(LastprivateConditionalStack);
+ auto It = llvm::find_if(
+ Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
+ if (It == Range.end() || It->Fn != CGF.CurFn)
+ return;
+ auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
+ assert(LPCI != LastprivateConditionalToTypes.end() &&
+ "Lastprivates must be registered already.");
+ SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+ getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
+ const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
+ for (const auto &Pair : It->DeclToUniqueName) {
+ const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
+ if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
+ continue;
+ auto I = LPCI->getSecond().find(Pair.first);
+ assert(I != LPCI->getSecond().end() &&
+ "Lastprivate must be rehistered already.");
+ // bool Cmp = priv_a.Fired != 0;
+ LValue BaseLVal = std::get<3>(I->getSecond());
+ LValue FiredLVal =
+ CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
+ llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
+ llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
+ llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
+ // if (Cmp) {
+ CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
+ CGF.EmitBlock(ThenBB);
+ Address Addr = CGF.GetAddrOfLocalVar(VD);
+ LValue LVal;
+ if (VD->getType()->isReferenceType())
+ LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
+ AlignmentSource::Decl);
+ else
+ LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
+ AlignmentSource::Decl);
+ emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
+ D.getBeginLoc());
+ auto AL = ApplyDebugLocation::CreateArtificial(CGF);
+ CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
+ // }
}
}
@@ -11589,10 +11700,10 @@ void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
SourceLocation Loc) {
if (CGF.getLangOpts().OpenMP < 50)
return;
- auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
- assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
+ auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
+ assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
"Unknown lastprivate conditional variable.");
- StringRef UniqueName = It->getSecond();
+ StringRef UniqueName = It->second;
llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
// The variable was not updated in the region - exit.
if (!GV)
@@ -11750,7 +11861,8 @@ Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
ArrayRef<const Expr *> Vars,
- SourceLocation Loc) {
+ SourceLocation Loc,
+ llvm::AtomicOrdering AO) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -11785,6 +11897,12 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
llvm_unreachable("Not supported in SIMD-only mode");
}
+void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ bool IsWorksharingReduction) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,
@@ -11826,7 +11944,7 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
void CGOpenMPSIMDRuntime::emitTargetCall(
CodeGenFunction &CGF, const OMPExecutableDirective &D,
llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
- const Expr *Device,
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
const OMPLoopDirective &D)>
SizeEmitter) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 8159f5e8b790..eb22f155f5ef 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -20,12 +20,15 @@
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
class ArrayType;
@@ -35,6 +38,7 @@ class GlobalVariable;
class StructType;
class Type;
class Value;
+class OpenMPIRBuilder;
} // namespace llvm
namespace clang {
@@ -80,11 +84,10 @@ public:
template <typename Callable>
RegionCodeGenTy(
Callable &&CodeGen,
- typename std::enable_if<
- !std::is_same<typename std::remove_reference<Callable>::type,
- RegionCodeGenTy>::value>::type * = nullptr)
+ std::enable_if_t<!std::is_same<std::remove_reference_t<Callable>,
+ RegionCodeGenTy>::value> * = nullptr)
: CodeGen(reinterpret_cast<intptr_t>(&CodeGen)),
- Callback(CallbackFn<typename std::remove_reference<Callable>::type>),
+ Callback(CallbackFn<std::remove_reference_t<Callable>>),
PrePostAction(nullptr) {}
void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; }
void operator()(CodeGenFunction &CGF) const;
@@ -99,9 +102,18 @@ struct OMPTaskDataTy final {
SmallVector<const Expr *, 4> LastprivateVars;
SmallVector<const Expr *, 4> LastprivateCopies;
SmallVector<const Expr *, 4> ReductionVars;
+ SmallVector<const Expr *, 4> ReductionOrigs;
SmallVector<const Expr *, 4> ReductionCopies;
SmallVector<const Expr *, 4> ReductionOps;
- SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences;
+ struct DependData {
+ OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
+ const Expr *IteratorExpr = nullptr;
+ SmallVector<const Expr *, 4> DepExprs;
+ explicit DependData() = default;
+ DependData(OpenMPDependClauseKind DepKind, const Expr *IteratorExpr)
+ : DepKind(DepKind), IteratorExpr(IteratorExpr) {}
+ };
+ SmallVector<DependData, 4> Dependences;
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule;
llvm::PointerIntPair<llvm::Value *, 1, bool> Priority;
@@ -109,6 +121,8 @@ struct OMPTaskDataTy final {
unsigned NumberOfParts = 0;
bool Tied = true;
bool Nogroup = false;
+ bool IsReductionWithTaskMod = false;
+ bool IsWorksharingReduction = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
@@ -116,20 +130,26 @@ class ReductionCodeGen {
private:
/// Data required for codegen of reduction clauses.
struct ReductionData {
- /// Reference to the original shared item.
+ /// Reference to the item shared between tasks to reduce into.
+ const Expr *Shared = nullptr;
+ /// Reference to the original item.
const Expr *Ref = nullptr;
/// Helper expression for generation of private copy.
const Expr *Private = nullptr;
/// Helper expression for generation reduction operation.
const Expr *ReductionOp = nullptr;
- ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp)
- : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {}
+ ReductionData(const Expr *Shared, const Expr *Ref, const Expr *Private,
+ const Expr *ReductionOp)
+ : Shared(Shared), Ref(Ref), Private(Private), ReductionOp(ReductionOp) {
+ }
};
/// List of reduction-based clauses.
SmallVector<ReductionData, 4> ClausesData;
- /// List of addresses of original shared variables/expressions.
+ /// List of addresses of shared variables/expressions.
SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses;
+ /// List of addresses of original variables/expressions.
+ SmallVector<std::pair<LValue, LValue>, 4> OrigAddresses;
/// Sizes of the reduction items in chars.
SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes;
/// Base declarations for the reduction items.
@@ -149,12 +169,12 @@ private:
const OMPDeclareReductionDecl *DRD);
public:
- ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+ ReductionCodeGen(ArrayRef<const Expr *> Shareds, ArrayRef<const Expr *> Origs,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> ReductionOps);
- /// Emits lvalue for a reduction item.
+ /// Emits lvalue for the shared and original reduction item.
/// \param N Number of the reduction item.
- void emitSharedLValue(CodeGenFunction &CGF, unsigned N);
+ void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N);
/// Emits the code for the variable-modified type, if required.
/// \param N Number of the reduction item.
void emitAggregateType(CodeGenFunction &CGF, unsigned N);
@@ -186,6 +206,8 @@ public:
Address PrivateAddr);
/// Returns LValue for the reduction item.
LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; }
+ /// Returns LValue for the original reduction item.
+ LValue getOrigLValue(unsigned N) const { return OrigAddresses[N].first; }
/// Returns the size of the reduction item (in chars and total number of
/// elements in the item), or nullptr, if the size is a constant.
std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const {
@@ -230,26 +252,42 @@ public:
/// Also, stores the expression for the private loop counter and it
/// threaprivate name.
struct LastprivateConditionalData {
- llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>>
- DeclToUniqeName;
+ llvm::MapVector<CanonicalDeclPtr<const Decl>, SmallString<16>>
+ DeclToUniqueName;
LValue IVLVal;
- SmallString<16> IVName;
- /// True if original lvalue for loop counter can be used in codegen (simd
- /// region or simd only mode) and no need to create threadprivate
- /// references.
- bool UseOriginalIV = false;
+ llvm::Function *Fn = nullptr;
+ bool Disabled = false;
};
/// Manages list of lastprivate conditional decls for the specified directive.
class LastprivateConditionalRAII {
+ enum class ActionToDo {
+ DoNotPush,
+ PushAsLastprivateConditional,
+ DisableLastprivateConditional,
+ };
CodeGenModule &CGM;
- const bool NeedToPush;
+ ActionToDo Action = ActionToDo::DoNotPush;
+
+ /// Check and try to disable analysis of inner regions for changes in
+ /// lastprivate conditional.
+ void tryToDisableInnerAnalysis(const OMPExecutableDirective &S,
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>>
+ &NeedToAddForLPCsAsDisabled) const;
- public:
LastprivateConditionalRAII(CodeGenFunction &CGF,
- const OMPExecutableDirective &S, LValue IVLVal);
+ const OMPExecutableDirective &S);
+
+ public:
+ explicit LastprivateConditionalRAII(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ LValue IVLVal);
+ static LastprivateConditionalRAII disable(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S);
~LastprivateConditionalRAII();
};
+ llvm::OpenMPIRBuilder &getOMPBuilder() { return OMPBuilder; }
+
protected:
CodeGenModule &CGM;
StringRef FirstSeparator, Separator;
@@ -319,17 +357,6 @@ protected:
/// default location.
virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
- /// Tries to emit declare variant function for \p OldGD from \p NewGD.
- /// \param OrigAddr LLVM IR value for \p OldGD.
- /// \param IsForDefinition true, if requested emission for the definition of
- /// \p OldGD.
- /// \returns true, was able to emit a definition function for \p OldGD, which
- /// points to \p NewGD.
- virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD,
- const GlobalDecl &OldGD,
- llvm::GlobalValue *OrigAddr,
- bool IsForDefinition);
-
/// Returns default flags for the barriers depending on the directive, for
/// which this barier is going to be emitted.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
@@ -345,6 +372,8 @@ protected:
llvm::Value *getCriticalRegionLock(StringRef CriticalName);
private:
+ /// An OpenMP-IR-Builder instance.
+ llvm::OpenMPIRBuilder OMPBuilder;
/// Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource = nullptr;
@@ -392,6 +421,13 @@ private:
llvm::DenseMap<llvm::Function *,
SmallVector<const OMPDeclareMapperDecl *, 4>>;
FunctionUDMMapTy FunctionUDMMap;
+ /// Maps local variables marked as lastprivate conditional to their internal
+ /// types.
+ llvm::DenseMap<llvm::Function *,
+ llvm::DenseMap<CanonicalDeclPtr<const Decl>,
+ std::tuple<QualType, const FieldDecl *,
+ const FieldDecl *, LValue>>>
+ LastprivateConditionalToTypes;
/// Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
@@ -428,6 +464,16 @@ private:
/// } flags;
/// } kmp_depend_info_t;
QualType KmpDependInfoTy;
+ /// Type typedef struct kmp_task_affinity_info {
+ /// kmp_intptr_t base_addr;
+ /// size_t len;
+ /// struct {
+ /// bool flag1 : 1;
+ /// bool flag2 : 1;
+ /// kmp_int32 reserved : 30;
+ /// } flags;
+ /// } kmp_task_affinity_info_t;
+ QualType KmpTaskAffinityInfoTy;
/// struct kmp_dim { // loop bounds info casted to kmp_int64
/// kmp_int64 lo; // lower
/// kmp_int64 up; // upper
@@ -664,12 +710,6 @@ private:
/// must be emitted.
llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables;
- /// Mapping of the original functions to their variants and original global
- /// decl.
- llvm::MapVector<CanonicalDeclPtr<const FunctionDecl>,
- std::pair<GlobalDecl, GlobalDecl>>
- DeferredVariantFunction;
-
using NontemporalDeclsSet = llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>>;
/// Stack for list of declarations in current context marked as nontemporal.
/// The set is the union of all current stack elements.
@@ -684,6 +724,9 @@ private:
/// directive is present.
bool HasRequiresUnifiedSharedMemory = false;
+ /// Atomic ordering from the omp requires directive.
+ llvm::AtomicOrdering RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
+
/// Flag for keeping track of weather a target region has been emitted.
bool HasEmittedTargetRegion = false;
@@ -710,11 +753,6 @@ private:
/// Returns pointer to kmpc_micro type.
llvm::Type *getKmpc_MicroPointerTy();
- /// Returns specified OpenMP runtime function.
- /// \param Function OpenMP runtime function.
- /// \return Specified function.
- llvm::FunctionCallee createRuntimeFunction(unsigned Function);
-
/// Returns __kmpc_for_static_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize,
@@ -826,6 +864,19 @@ private:
const OMPLoopDirective &D)>
SizeEmitter);
+ /// Emit update for lastprivate conditional data.
+ void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal,
+ StringRef UniqueDeclName, LValue LVal,
+ SourceLocation Loc);
+
+ /// Returns the number of the elements and the address of the depobj
+ /// dependency array.
+ /// \return Number of elements in depobj array and the pointer to the array of
+ /// dependencies.
+ std::pair<llvm::Value *, LValue> getDepobjElements(CodeGenFunction &CGF,
+ LValue DepobjLVal,
+ SourceLocation Loc);
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, ".", ".") {}
@@ -1220,7 +1271,7 @@ public:
/// Emit flush of the variables specified in 'omp flush' directive.
/// \param Vars List of variables to flush.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
- SourceLocation Loc);
+ SourceLocation Loc, llvm::AtomicOrdering AO);
/// Emit task region for the task directive. The task region is
/// emitted in several steps:
@@ -1381,18 +1432,34 @@ public:
/// should be emitted for reduction:
/// \code
///
- /// _task_red_item_t red_data[n];
+ /// _taskred_item_t red_data[n];
/// ...
- /// red_data[i].shar = &origs[i];
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
- /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
+ /// For reduction clause with task modifier it emits the next call:
+ /// \code
///
+ /// _taskred_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+ /// red_data);
+ /// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
@@ -1403,11 +1470,16 @@ public:
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data);
+ /// Emits the following code for reduction clause with task modifier:
+ /// \code
+ /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+ /// \endcode
+ virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+ bool IsWorksharingReduction);
+
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
- /// initializer/combiner/finalizer functions + emits threadprivate variable to
- /// store the pointer to the original reduction item for the custom
- /// initializer defined by declare reduction construct.
+ /// initializer/combiner/finalizer functions.
/// \param RCG Allows to reuse an existing data for the reductions.
/// \param N Reduction item for which fixups must be emitted.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc,
@@ -1467,16 +1539,16 @@ public:
/// \param IfCond Expression evaluated in if clause associated with the target
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
- /// target directive, or null if no device clause is used.
+ /// target directive, or null if no device clause is used and device modifier.
/// \param SizeEmitter Callback to emit number of iterations for loop-based
/// directives.
- virtual void
- emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device,
- llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
- const OMPLoopDirective &D)>
- SizeEmitter);
+ virtual void emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter);
/// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
@@ -1675,7 +1747,10 @@ public:
/// Perform check on requires decl to ensure that target architecture
/// supports unified addressing
- virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D);
+ virtual void processRequiresDirective(const OMPRequiresDecl *D);
+
+ /// Gets default memory ordering as specified in requires directive.
+ llvm::AtomicOrdering getDefaultMemoryOrdering() const;
/// Checks if the variable has associated OMPAllocateDeclAttr attribute with
/// the predefined allocator and translates it into the corresponding address
@@ -1685,17 +1760,13 @@ public:
/// Return whether the unified_shared_memory has been specified.
bool hasRequiresUnifiedSharedMemory() const;
- /// Emits the definition of the declare variant function.
- virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition);
-
/// Checks if the \p VD variable is marked as nontemporal declaration in
/// current context.
bool isNontemporalDecl(const ValueDecl *VD) const;
- /// Initializes global counter for lastprivate conditional.
- virtual void
- initLastprivateConditionalCounter(CodeGenFunction &CGF,
- const OMPExecutableDirective &S);
+ /// Create specialized alloca to handle lastprivate conditionals.
+ Address emitLastprivateConditionalInit(CodeGenFunction &CGF,
+ const VarDecl *VD);
/// Checks if the provided \p LVal is lastprivate conditional and emits the
/// code to update the value of the original variable.
@@ -1713,6 +1784,30 @@ public:
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
const Expr *LHS);
+ /// Checks if the lastprivate conditional was updated in inner region and
+ /// writes the value.
+ /// \code
+ /// lastprivate(conditional: a)
+ /// ...
+ /// <type> a;bool Fired = false;
+ /// #pragma omp ... shared(a)
+ /// {
+ /// lp_a = ...;
+ /// Fired = true;
+ /// }
+ /// if (Fired) {
+ /// #pragma omp critical(a)
+ /// if (last_iv_a <= iv) {
+ /// last_iv_a = iv;
+ /// global_a = lp_a;
+ /// }
+ /// Fired = false;
+ /// }
+ /// \endcode
+ virtual void checkAndEmitSharedLastprivateConditional(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls);
+
/// Gets the address of the global copy used for lastprivate conditional
/// update, if any.
/// \param PrivLVal LValue for the private copy.
@@ -1721,6 +1816,41 @@ public:
LValue PrivLVal,
const VarDecl *VD,
SourceLocation Loc);
+
+ /// Emits list of dependecies based on the provided data (array of
+ /// dependence/expression pairs).
+ /// \returns Pointer to the first element of the array casted to VoidPtr type.
+ std::pair<llvm::Value *, Address>
+ emitDependClause(CodeGenFunction &CGF,
+ ArrayRef<OMPTaskDataTy::DependData> Dependencies,
+ SourceLocation Loc);
+
+ /// Emits list of dependecies based on the provided data (array of
+ /// dependence/expression pairs) for depobj construct. In this case, the
+ /// variable is allocated in dynamically. \returns Pointer to the first
+ /// element of the array casted to VoidPtr type.
+ Address emitDepobjDependClause(CodeGenFunction &CGF,
+ const OMPTaskDataTy::DependData &Dependencies,
+ SourceLocation Loc);
+
+ /// Emits the code to destroy the dependency object provided in depobj
+ /// directive.
+ void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
+ SourceLocation Loc);
+
+ /// Updates the dependency kind in the specified depobj object.
+ /// \param DepobjLVal LValue for the main depobj object.
+ /// \param NewDepKind New dependency kind.
+ void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
+ OpenMPDependClauseKind NewDepKind, SourceLocation Loc);
+
+ /// Initializes user defined allocators specified in the uses_allocators
+ /// clauses.
+ void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator,
+ const Expr *AllocatorTraits);
+
+ /// Destroys user defined allocators specified in the uses_allocators clause.
+ void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator);
};
/// Class supports emissionof SIMD-only code.
@@ -1985,7 +2115,7 @@ public:
/// Emit flush of the variables specified in 'omp flush' directive.
/// \param Vars List of variables to flush.
void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
- SourceLocation Loc) override;
+ SourceLocation Loc, llvm::AtomicOrdering AO) override;
/// Emit task region for the task directive. The task region is
/// emitted in several steps:
@@ -2107,18 +2237,34 @@ public:
/// should be emitted for reduction:
/// \code
///
- /// _task_red_item_t red_data[n];
+ /// _taskred_item_t red_data[n];
/// ...
- /// red_data[i].shar = &origs[i];
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
- /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
+ /// For reduction clause with task modifier it emits the next call:
+ /// \code
///
+ /// _taskred_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+ /// red_data);
+ /// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
@@ -2128,6 +2274,13 @@ public:
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data) override;
+ /// Emits the following code for reduction clause with task modifier:
+ /// \code
+ /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+ /// \endcode
+ void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+ bool IsWorksharingReduction) override;
+
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions + emits threadprivate variable to
@@ -2191,14 +2344,14 @@ public:
/// \param IfCond Expression evaluated in if clause associated with the target
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
- /// target directive, or null if no device clause is used.
- void
- emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device,
- llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
- const OMPLoopDirective &D)>
- SizeEmitter) override;
+ /// target directive, or null if no device clause is used and device modifier.
+ void emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) override;
/// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index d00d84b79cfe..cbd443134e7a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -38,11 +38,9 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
- /// *outlined_function, int16_t
- /// IsOMPRuntimeInitialized);
+ /// *outlined_function);
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
- /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
- /// int16_t IsOMPRuntimeInitialized);
+ /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
OMPRTL_NVPTX__kmpc_kernel_parallel,
/// Call to void __kmpc_kernel_end_parallel();
OMPRTL_NVPTX__kmpc_kernel_end_parallel,
@@ -85,6 +83,9 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
/// int16_t UseSharedMemory);
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
+ /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
+ /// UseSharedMemory);
+ OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
/// Call to void __kmpc_data_sharing_pop_stack(void *a);
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
/// Call to void __kmpc_begin_sharing_variables(void ***args,
@@ -341,8 +342,7 @@ class CheckVarsEscapingDeclContext final
if (!Attr)
return;
if (((Attr->getCaptureKind() != OMPC_map) &&
- !isOpenMPPrivate(
- static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) ||
+ !isOpenMPPrivate(Attr->getCaptureKind())) ||
((Attr->getCaptureKind() == OMPC_map) &&
!FD->getType()->isAnyPointerType()))
return;
@@ -786,6 +786,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -801,6 +803,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -813,6 +817,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unexpected directive.");
}
}
@@ -862,6 +867,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -877,6 +884,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -889,6 +898,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
break;
}
llvm_unreachable(
@@ -1031,6 +1041,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -1046,6 +1058,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -1058,6 +1072,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unexpected directive.");
}
}
@@ -1113,6 +1128,8 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -1128,6 +1145,8 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -1140,6 +1159,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
break;
}
llvm_unreachable(
@@ -1444,8 +1464,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
// TODO: Optimize runtime initialization and pass in correct value.
- llvm::Value *Args[] = {WorkFn.getPointer(),
- /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+ llvm::Value *Args[] = {WorkFn.getPointer()};
llvm::Value *Ret = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
@@ -1573,17 +1592,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
/// Build void __kmpc_kernel_prepare_parallel(
- /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
+ /// void *outlined_function);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_parallel: {
- /// Build bool __kmpc_kernel_parallel(void **outlined_function,
- /// int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
+ /// Build bool __kmpc_kernel_parallel(void **outlined_function);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
auto *FnTy =
llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
@@ -1738,6 +1756,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
break;
}
+ case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
+ // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
+ // UseSharedMemory);
+ llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+ break;
+ }
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
// Build void __kmpc_data_sharing_pop_stack(void *a);
llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
@@ -1915,19 +1943,6 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
llvm_unreachable("Unknown flags are requested.");
}
-bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD,
- const GlobalDecl &OldGD,
- llvm::GlobalValue *OrigAddr,
- bool IsForDefinition) {
- // Emit the function in OldGD with the body from NewGD, if NewGD is defined.
- auto *NewFD = cast<FunctionDecl>(NewGD.getDecl());
- if (NewFD->isDefined()) {
- CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr);
- return true;
- }
- return false;
-}
-
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
@@ -2208,7 +2223,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
GlobalRecCastAddr = Phi;
I->getSecond().GlobalRecordAddr = Phi;
I->getSecond().IsInSPMDModeFlag = IsSPMD;
- } else if (IsInTTDRegion) {
+ } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
assert(GlobalizedRecords.back().Records.size() < 2 &&
"Expected less than 2 globalized records: one for target and one "
"for teams.");
@@ -2281,12 +2296,16 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
} else {
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
+ bool UseSharedMemory =
+ IsInTTDRegion && GlobalRecordSize <= SharedMemorySize;
llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ IsInTTDRegion
+ ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
+ : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, GlobalRecPtrTy);
@@ -2433,7 +2452,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
- } else if (IsInTTDRegion) {
+ } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
assert(GlobalizedRecords.back().RegionCounter > 0 &&
"region counter must be > 0.");
--GlobalizedRecords.back().RegionCounter;
@@ -2546,7 +2565,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
// Prepare for parallel region. Indicate the outlined function.
- llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+ llvm::Value *Args[] = {ID};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
Args);
@@ -4754,6 +4773,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
switch (A->getAllocatorType()) {
// Use the default allocator here as by default local vars are
// threadlocal.
+ case OMPAllocateDeclAttr::OMPNullMemAlloc:
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
@@ -4920,6 +4940,7 @@ bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
return false;
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
switch(A->getAllocatorType()) {
+ case OMPAllocateDeclAttr::OMPNullMemAlloc:
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
// Not supported, fallback to the default mem space.
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
@@ -4962,7 +4983,7 @@ static CudaArch getCudaArch(CodeGenModule &CGM) {
/// Check to see if target architecture supports unified addressing which is
/// a restriction for OpenMP requires clause "unified_shared_memory".
-void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+void CGOpenMPRuntimeNVPTX::processRequiresDirective(
const OMPRequiresDecl *D) {
for (const OMPClause *Clause : D->clauselists()) {
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
@@ -4990,6 +5011,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
+ case CudaArch::SM_80:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
@@ -5010,6 +5032,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
+ case CudaArch::GFX1030:
case CudaArch::UNKNOWN:
break;
case CudaArch::LAST:
@@ -5017,7 +5040,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
}
}
}
- CGOpenMPRuntime::checkArchForUnifiedAddressing(D);
+ CGOpenMPRuntime::processRequiresDirective(D);
}
/// Get number of SMs and number of blocks per SM.
@@ -5047,6 +5070,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
+ case CudaArch::SM_80:
return {84, 32};
case CudaArch::GFX600:
case CudaArch::GFX601:
@@ -5068,6 +5092,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
+ case CudaArch::GFX1030:
case CudaArch::UNKNOWN:
break;
case CudaArch::LAST:
@@ -5077,7 +5102,8 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
}
void CGOpenMPRuntimeNVPTX::clear() {
- if (!GlobalizedRecords.empty()) {
+ if (!GlobalizedRecords.empty() &&
+ !CGM.getLangOpts().OpenMPCUDATargetParallel) {
ASTContext &C = CGM.getContext();
llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 4159af0a622f..c52ae43817c7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -193,18 +193,6 @@ protected:
/// Full/Lightweight runtime mode. Used for better optimization.
unsigned getDefaultLocationReserved2Flags() const override;
- /// Tries to emit declare variant function for \p OldGD from \p NewGD.
- /// \param OrigAddr LLVM IR value for \p OldGD.
- /// \param IsForDefinition true, if requested emission for the definition of
- /// \p OldGD.
- /// \returns true, was able to emit a definition function for \p OldGD, which
- /// points to \p NewGD.
- /// NVPTX backend does not support global aliases, so just use the function,
- /// emitted for \p NewGD instead of \p OldGD.
- bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD,
- llvm::GlobalValue *OrigAddr,
- bool IsForDefinition) override;
-
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
void clear() override;
@@ -395,7 +383,7 @@ public:
/// Perform check on requires decl to ensure that target architecture
/// supports unified addressing
- void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override;
+ void processRequiresDirective(const OMPRequiresDecl *D) override;
/// Returns default address space for the constant firstprivates, __constant__
/// address space by default.
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 4de64a32f2ac..4e5d1d3f16f6 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -385,7 +385,8 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
Run = FieldEnd;
continue;
}
- llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
+ llvm::Type *Type =
+ Types.ConvertTypeForMem(Field->getType(), /*ForBitFields=*/true);
// If we don't have a run yet, or don't live within the previous run's
// allocated storage then we allocate some storage and start a new run.
if (Run == FieldEnd || BitOffset >= Tail) {
@@ -405,15 +406,17 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
return;
}
- // Check if OffsetInRecord is better as a single field run. When OffsetInRecord
- // has legal integer width, and its bitfield offset is naturally aligned, it
- // is better to make the bitfield a separate storage component so as it can be
- // accessed directly with lower cost.
+ // Check if OffsetInRecord (the size in bits of the current run) is better
+ // as a single field run. When OffsetInRecord has legal integer width, and
+ // its bitfield offset is naturally aligned, it is better to make the
+ // bitfield a separate storage component so as it can be accessed directly
+ // with lower cost.
auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord,
uint64_t StartBitOffset) {
if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
return false;
- if (!DataLayout.isLegalInteger(OffsetInRecord))
+ if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) ||
+ !DataLayout.fitsInLegalInteger(OffsetInRecord))
return false;
// Make sure StartBitOffset is natually aligned if it is treated as an
// IType integer.
@@ -729,8 +732,8 @@ CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageOffset);
}
-CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
- llvm::StructType *Ty) {
+std::unique_ptr<CGRecordLayout>
+CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) {
CGRecordLowering Builder(*this, D, /*Packed=*/false);
Builder.lower(/*NonVirtualBaseType=*/false);
@@ -757,9 +760,9 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
// but we may need to recursively layout D while laying D out as a base type.
Ty->setBody(Builder.FieldTypes, Builder.Packed);
- CGRecordLayout *RL =
- new CGRecordLayout(Ty, BaseTy, Builder.IsZeroInitializable,
- Builder.IsZeroInitializableAsBase);
+ auto RL = std::make_unique<CGRecordLayout>(
+ Ty, BaseTy, (bool)Builder.IsZeroInitializable,
+ (bool)Builder.IsZeroInitializableAsBase);
RL->NonVirtualBases.swap(Builder.NonVirtualBases);
RL->CompleteObjectVirtualBases.swap(Builder.VirtualBases);
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 138459c68dbf..672909849bb7 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CGDebugInfo.h"
+#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
@@ -18,12 +19,14 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/PrettyStackTrace.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
using namespace CodeGen;
@@ -246,6 +249,12 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPFlushDirectiveClass:
EmitOMPFlushDirective(cast<OMPFlushDirective>(*S));
break;
+ case Stmt::OMPDepobjDirectiveClass:
+ EmitOMPDepobjDirective(cast<OMPDepobjDirective>(*S));
+ break;
+ case Stmt::OMPScanDirectiveClass:
+ EmitOMPScanDirective(cast<OMPScanDirective>(*S));
+ break;
case Stmt::OMPOrderedDirectiveClass:
EmitOMPOrderedDirective(cast<OMPOrderedDirective>(*S));
break;
@@ -601,6 +610,13 @@ void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
}
void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
+ bool nomerge = false;
+ for (const auto *A : S.getAttrs())
+ if (A->getKind() == attr::NoMerge) {
+ nomerge = true;
+ break;
+ }
+ SaveAndRestore<bool> save_nomerge(InNoMergeAttributedStmt, nomerge);
EmitStmt(S.getSubStmt(), S.getAttrs());
}
@@ -721,8 +737,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
EmitBlock(LoopHeader.getBlock());
const SourceRange &R = S.getSourceRange();
- LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs,
- SourceLocToDebugLoc(R.getBegin()),
+ LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(),
+ WhileAttrs, SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
// Create an exit block for when the condition fails, which will
@@ -823,7 +839,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
EmitBlock(LoopCond.getBlock());
const SourceRange &R = S.getSourceRange();
- LoopStack.push(LoopBody, CGM.getContext(), DoAttrs,
+ LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs,
SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
@@ -881,7 +897,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
EmitBlock(CondBlock);
const SourceRange &R = S.getSourceRange();
- LoopStack.push(CondBlock, CGM.getContext(), ForAttrs,
+ LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
@@ -982,7 +998,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
EmitBlock(CondBlock);
const SourceRange &R = S.getSourceRange();
- LoopStack.push(CondBlock, CGM.getContext(), ForAttrs,
+ LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
@@ -1054,6 +1070,19 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
EmitBranchThroughCleanup(ReturnBlock);
}
+namespace {
+// RAII struct used to save and restore a return statment's result expression.
+struct SaveRetExprRAII {
+ SaveRetExprRAII(const Expr *RetExpr, CodeGenFunction &CGF)
+ : OldRetExpr(CGF.RetExpr), CGF(CGF) {
+ CGF.RetExpr = RetExpr;
+ }
+ ~SaveRetExprRAII() { CGF.RetExpr = OldRetExpr; }
+ const Expr *OldRetExpr;
+ CodeGenFunction &CGF;
+};
+} // namespace
+
/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
/// if the function returns void, or may be missing one if the function returns
/// non-void. Fun stuff :).
@@ -1079,20 +1108,28 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
// Emit the result value, even if unused, to evaluate the side effects.
const Expr *RV = S.getRetValue();
- // Treat block literals in a return expression as if they appeared
- // in their own scope. This permits a small, easily-implemented
- // exception to our over-conservative rules about not jumping to
- // statements following block literals with non-trivial cleanups.
- RunCleanupsScope cleanupScope(*this);
- if (const FullExpr *fe = dyn_cast_or_null<FullExpr>(RV)) {
- enterFullExpression(fe);
- RV = fe->getSubExpr();
- }
+ // Record the result expression of the return statement. The recorded
+ // expression is used to determine whether a block capture's lifetime should
+ // end at the end of the full expression as opposed to the end of the scope
+ // enclosing the block expression.
+ //
+ // This permits a small, easily-implemented exception to our over-conservative
+ // rules about not jumping to statements following block literals with
+ // non-trivial cleanups.
+ SaveRetExprRAII SaveRetExpr(RV, *this);
+ RunCleanupsScope cleanupScope(*this);
+ if (const auto *EWC = dyn_cast_or_null<ExprWithCleanups>(RV))
+ RV = EWC->getSubExpr();
// FIXME: Clean this up by using an LValue for ReturnTemp,
// EmitStoreThroughLValue, and EmitAnyExpr.
- if (getLangOpts().ElideConstructors &&
- S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
+ // Check if the NRVO candidate was not globalized in OpenMP mode.
+ if (getLangOpts().ElideConstructors && S.getNRVOCandidate() &&
+ S.getNRVOCandidate()->isNRVOVariable() &&
+ (!getLangOpts().OpenMP ||
+ !CGM.getOpenMPRuntime()
+ .getAddressOfLocalVariable(*this, S.getNRVOCandidate())
+ .isValid())) {
// Apply the named return value optimization for this return statement,
// which means doing nothing: the appropriate result has already been
// constructed into the NRVO variable.
@@ -2091,8 +2128,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Update largest vector width for any vector types.
if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
} else {
ArgTypes.push_back(Dest.getAddress(*this).getType());
Args.push_back(Dest.getPointer(*this));
@@ -2116,8 +2154,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Update largest vector width for any vector types.
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
if (Info.allowsRegister())
InOutConstraints += llvm::utostr(i);
else
@@ -2203,21 +2242,15 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Update largest vector width for any vector types.
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
ArgTypes.push_back(Arg->getType());
Args.push_back(Arg);
Constraints += InputConstraint;
}
- // Append the "input" part of inout constraints last.
- for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) {
- ArgTypes.push_back(InOutArgTypes[i]);
- Args.push_back(InOutArgs[i]);
- }
- Constraints += InOutConstraints;
-
// Labels
SmallVector<llvm::BasicBlock *, 16> Transfer;
llvm::BasicBlock *Fallthrough = nullptr;
@@ -2225,7 +2258,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) {
IsGCCAsmGoto = GS->isAsmGoto();
if (IsGCCAsmGoto) {
- for (auto *E : GS->labels()) {
+ for (const auto *E : GS->labels()) {
JumpDest Dest = getJumpDestForLabel(E->getLabel());
Transfer.push_back(Dest.getBlock());
llvm::BlockAddress *BA =
@@ -2236,19 +2269,31 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
Constraints += ',';
Constraints += 'X';
}
- StringRef Name = "asm.fallthrough";
- Fallthrough = createBasicBlock(Name);
+ Fallthrough = createBasicBlock("asm.fallthrough");
}
}
+ // Append the "input" part of inout constraints last.
+ for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) {
+ ArgTypes.push_back(InOutArgTypes[i]);
+ Args.push_back(InOutArgs[i]);
+ }
+ Constraints += InOutConstraints;
+
// Clobbers
for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
StringRef Clobber = S.getClobber(i);
if (Clobber == "memory")
ReadOnly = ReadNone = false;
- else if (Clobber != "cc")
+ else if (Clobber != "cc") {
Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
+ if (CGM.getCodeGenOpts().StackClashProtector &&
+ getTarget().isSPRegName(Clobber)) {
+ CGM.getDiags().Report(S.getAsmLoc(),
+ diag::warn_stack_clash_protection_inline_asm);
+ }
+ }
if (!Constraints.empty())
Constraints += ',';
@@ -2287,9 +2332,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
if (IsGCCAsmGoto) {
llvm::CallBrInst *Result =
Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
+ EmitBlock(Fallthrough);
UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
ReadNone, S, ResultRegTypes, *this, RegResults);
- EmitBlock(Fallthrough);
} else {
llvm::CallInst *Result =
Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index dc3899f0e4ea..cfd5eda8cc80 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -18,14 +18,22 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/PrettyStackTrace.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/AtomicOrdering.h"
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
+static const VarDecl *getBaseDecl(const Expr *Ref);
+
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
@@ -53,7 +61,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
- (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
+ (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
+ cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}
public:
@@ -214,6 +223,12 @@ public:
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
CGF.EmitVarDecl(*OED);
}
+ } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
+ for (const Expr *E : UDP->varlists()) {
+ const Decl *D = getBaseDecl(E);
+ if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
+ CGF.EmitVarDecl(*OED);
+ }
}
}
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
@@ -365,26 +380,28 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) {
}
namespace {
- /// Contains required data for proper outlined function codegen.
- struct FunctionOptions {
- /// Captured statement for which the function is generated.
- const CapturedStmt *S = nullptr;
- /// true if cast to/from UIntPtr is required for variables captured by
- /// value.
- const bool UIntPtrCastRequired = true;
- /// true if only casted arguments must be registered as local args or VLA
- /// sizes.
- const bool RegisterCastedArgsOnly = false;
- /// Name of the generated function.
- const StringRef FunctionName;
- explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
- bool RegisterCastedArgsOnly,
- StringRef FunctionName)
- : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
- RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
- FunctionName(FunctionName) {}
- };
-}
+/// Contains required data for proper outlined function codegen.
+struct FunctionOptions {
+ /// Captured statement for which the function is generated.
+ const CapturedStmt *S = nullptr;
+ /// true if cast to/from UIntPtr is required for variables captured by
+ /// value.
+ const bool UIntPtrCastRequired = true;
+ /// true if only casted arguments must be registered as local args or VLA
+ /// sizes.
+ const bool RegisterCastedArgsOnly = false;
+ /// Name of the generated function.
+ const StringRef FunctionName;
+ /// Location of the non-debug version of the outlined function.
+ SourceLocation Loc;
+ explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
+ bool RegisterCastedArgsOnly, StringRef FunctionName,
+ SourceLocation Loc)
+ : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
+ RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
+ FunctionName(FunctionName), Loc(Loc) {}
+};
+} // namespace
static llvm::Function *emitOutlinedFunctionPrologue(
CodeGenFunction &CGF, FunctionArgList &Args,
@@ -485,7 +502,9 @@ static llvm::Function *emitOutlinedFunctionPrologue(
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
- FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
+ FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
+ FO.UIntPtrCastRequired ? FO.Loc
+ : CD->getBody()->getBeginLoc());
unsigned Cnt = CD->getContextParamPosition();
I = FO.S->captures().begin();
for (const FieldDecl *FD : RD->fields()) {
@@ -560,7 +579,8 @@ static llvm::Function *emitOutlinedFunctionPrologue(
}
llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+ SourceLocation Loc) {
assert(
CapturedStmtInfo &&
"CapturedStmtInfo should be set when generating the captured function");
@@ -577,7 +597,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
if (NeedWrapperFunction)
Out << "_debug__";
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
- Out.str());
+ Out.str(), Loc);
llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
VLASizes, CXXThisValue, FO);
CodeGenFunction::OMPPrivateScope LocalScope(*this);
@@ -600,7 +620,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
/*RegisterCastedArgsOnly=*/true,
- CapturedStmtInfo->getHelperName());
+ CapturedStmtInfo->getHelperName(), Loc);
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
Args.clear();
@@ -632,8 +652,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
}
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
}
- CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
- F, CallArgs);
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
WrapperCGF.FinishFunction();
return WrapperF;
}
@@ -747,11 +766,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
getLangOpts().OpenMPIsDevice &&
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
bool FirstprivateIsLastprivate = false;
- llvm::DenseSet<const VarDecl *> Lastprivates;
+ llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
for (const auto *D : C->varlists())
- Lastprivates.insert(
- cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+ Lastprivates.try_emplace(
+ cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
+ C->getKind());
}
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
@@ -761,8 +781,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
bool MustEmitFirstprivateCopy =
CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
- auto IRef = C->varlist_begin();
- auto InitsRef = C->inits().begin();
+ const auto *IRef = C->varlist_begin();
+ const auto *InitsRef = C->inits().begin();
for (const Expr *IInit : C->private_copies()) {
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
bool ThisFirstprivateIsLastprivate =
@@ -853,14 +873,34 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
});
} else {
Address OriginalAddr = OriginalLVal.getAddress(*this);
- IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, VDInit, OriginalAddr, VD]() {
+ IsRegistered =
+ PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
+ ThisFirstprivateIsLastprivate,
+ OrigVD, &Lastprivates, IRef]() {
// Emit private VarDecl with copy init.
// Remap temp VDInit variable to the address of the original
// variable (for proper handling of captured global variables).
setAddrOfLocalVar(VDInit, OriginalAddr);
EmitDecl(*VD);
LocalDeclMap.erase(VDInit);
+ if (ThisFirstprivateIsLastprivate &&
+ Lastprivates[OrigVD->getCanonicalDecl()] ==
+ OMPC_LASTPRIVATE_conditional) {
+ // Create/init special variable for lastprivate conditionals.
+ Address VDAddr =
+ CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
+ *this, OrigVD);
+ llvm::Value *V = EmitLoadOfScalar(
+ MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
+ AlignmentSource::Decl),
+ (*IRef)->getExprLoc());
+ EmitStoreOfScalar(V,
+ MakeAddrLValue(VDAddr, (*IRef)->getType(),
+ AlignmentSource::Decl));
+ LocalDeclMap.erase(VD);
+ setAddrOfLocalVar(VD, VDAddr);
+ return VDAddr;
+ }
return GetAddrOfLocalVar(VD);
});
}
@@ -990,8 +1030,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
!getLangOpts().OpenMPSimd)
break;
- auto IRef = C->varlist_begin();
- auto IDestRef = C->destination_exprs().begin();
+ const auto *IRef = C->varlist_begin();
+ const auto *IDestRef = C->destination_exprs().begin();
for (const Expr *IInit : C->private_copies()) {
// Keep the address of the original variable for future update at the end
// of the loop.
@@ -1013,7 +1053,15 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
// for 'firstprivate' clause.
if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
+ bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
+ OrigVD]() {
+ if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
+ Address VDAddr =
+ CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
+ OrigVD);
+ setAddrOfLocalVar(VD, VDAddr);
+ return VDAddr;
+ }
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
@@ -1099,7 +1147,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
PrivateAddr =
Address(Builder.CreateLoad(PrivateAddr),
- getNaturalTypeAlignment(RefTy->getPointeeType()));
+ CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
// Store the last value to the private copy in the last iteration.
if (C->getKind() == OMPC_LASTPRIVATE_conditional)
CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
@@ -1122,7 +1170,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
void CodeGenFunction::EmitOMPReductionClauseInit(
const OMPExecutableDirective &D,
- CodeGenFunction::OMPPrivateScope &PrivateScope) {
+ CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
if (!HaveInsertPoint())
return;
SmallVector<const Expr *, 4> Shareds;
@@ -1130,32 +1178,36 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
+ OMPTaskDataTy Data;
+ SmallVector<const Expr *, 4> TaskLHSs;
+ SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Shareds.emplace_back(Ref);
- Privates.emplace_back(*IPriv);
- ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
+ if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
+ continue;
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ if (C->getModifier() == OMPC_REDUCTION_task) {
+ Data.ReductionVars.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
- ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
+ ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
unsigned Count = 0;
- auto ILHS = LHSs.begin();
- auto IRHS = RHSs.begin();
- auto IPriv = Privates.begin();
+ auto *ILHS = LHSs.begin();
+ auto *IRHS = RHSs.begin();
+ auto *IPriv = Privates.begin();
for (const Expr *IRef : Shareds) {
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
// Emit private VarDecl with reduction init.
- RedCG.emitSharedLValue(*this, Count);
+ RedCG.emitSharedOrigLValue(*this, Count);
RedCG.emitAggregateType(*this, Count);
AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
@@ -1222,6 +1274,118 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
++IPriv;
++Count;
}
+ if (!Data.ReductionVars.empty()) {
+ Data.IsReductionWithTaskMod = true;
+ Data.IsWorksharingReduction =
+ isOpenMPWorksharingDirective(D.getDirectiveKind());
+ llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
+ const Expr *TaskRedRef = nullptr;
+ switch (D.getDirectiveKind()) {
+ case OMPD_parallel:
+ TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_for:
+ TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_sections:
+ TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_for:
+ TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_master:
+ TaskRedRef =
+ cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_sections:
+ TaskRedRef =
+ cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel:
+ TaskRedRef =
+ cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel_for:
+ TaskRedRef =
+ cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_distribute_parallel_for:
+ TaskRedRef =
+ cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_target_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_simd:
+ case OMPD_for_simd:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_parallel_for_simd:
+ case OMPD_task:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
+ case OMPD_ordered:
+ case OMPD_atomic:
+ case OMPD_teams:
+ case OMPD_target:
+ case OMPD_cancellation_point:
+ case OMPD_cancel:
+ case OMPD_target_data:
+ case OMPD_target_enter_data:
+ case OMPD_target_exit_data:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
+ case OMPD_distribute:
+ case OMPD_target_update:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_distribute_simd:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_declare_simd:
+ case OMPD_requires:
+ case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
+ case OMPD_unknown:
+ default:
+ llvm_unreachable("Enexpected directive with task reductions.");
+ }
+
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
+ EmitVarDecl(*VD);
+ EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
+ /*Volatile=*/false, TaskRedRef->getType());
+ }
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
@@ -1233,14 +1397,25 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
+ bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+ // Do not emit for inscan reductions.
+ if (C->getModifier() == OMPC_REDUCTION_inscan)
+ continue;
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ IsReductionWithTaskMod =
+ IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
+ if (IsReductionWithTaskMod) {
+ CGM.getOpenMPRuntime().emitTaskReductionFini(
+ *this, D.getBeginLoc(),
+ isOpenMPWorksharingDirective(D.getDirectiveKind()));
+ }
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
@@ -1288,6 +1463,63 @@ typedef llvm::function_ref<void(CodeGenFunction &,
CodeGenBoundParametersTy;
} // anonymous namespace
+static void
+checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ }
+ // Privates should ne analyzed since they are not captured at all.
+ // Task reductions may be skipped - tasks are ignored.
+ // Firstprivates do not return value but may be passed by reference - no need
+ // to check for updated lastprivate conditional.
+ for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ }
+ }
+ CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
+ CGF, S, PrivateDecls);
+}
+
static void emitCommonOMPParallelDirective(
CodeGenFunction &CGF, const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
@@ -1334,9 +1566,97 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
const OMPExecutableDirective &,
llvm::SmallVectorImpl<llvm::Value *> &) {}
-void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
+ CodeGenFunction &CGF, const VarDecl *VD) {
+ CodeGenModule &CGM = CGF.CGM;
+ auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ if (!VD)
+ return Address::invalid();
+ const VarDecl *CVD = VD->getCanonicalDecl();
+ if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+ return Address::invalid();
+ const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+ // Use the default allocation.
+ if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
+ !AA->getAllocator())
+ return Address::invalid();
+ llvm::Value *Size;
+ CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+ if (CVD->getType()->isVariablyModifiedType()) {
+ Size = CGF.getTypeSize(CVD->getType());
+ // Align the size: ((size + align - 1) / align) * align
+ Size = CGF.Builder.CreateNUWAdd(
+ Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
+ Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
+ Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
+ } else {
+ CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+ Size = CGM.getSize(Sz.alignTo(Align));
+ }
+
+ assert(AA->getAllocator() &&
+ "Expected allocator expression for non-default allocator.");
+ llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+ // According to the standard, the original allocator type is a enum (integer).
+ // Convert to pointer type, if required.
+ if (Allocator->getType()->isIntegerTy())
+ Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
+ else if (Allocator->getType()->isPointerTy())
+ Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
+ CGM.VoidPtrTy);
+
+ llvm::Value *Addr = OMPBuilder.CreateOMPAlloc(
+ CGF.Builder, Size, Allocator,
+ getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
+ llvm::CallInst *FreeCI =
+ OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator);
+
+ CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr,
+ CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
+ getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
+ return Address(Addr, Align);
+}
+
+Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
+ CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
+ SourceLocation Loc) {
+ CodeGenModule &CGM = CGF.CGM;
+ if (CGM.getLangOpts().OpenMPUseTLS &&
+ CGM.getContext().getTargetInfo().isTLSSupported())
+ return VDAddr;
+
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ llvm::Type *VarTy = VDAddr.getElementType();
+ llvm::Value *Data =
+ CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
+ llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
+ std::string Suffix = getNameWithSeparators({"cache", ""});
+ llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
+
+ llvm::CallInst *ThreadPrivateCacheCall =
+ OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
+
+ return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
+}
- if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
+std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
+ ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
+ SmallString<128> Buffer;
+ llvm::raw_svector_ostream OS(Buffer);
+ StringRef Sep = FirstSeparator;
+ for (StringRef Part : Parts) {
+ OS << Sep << Part;
+ Sep = Separator;
+ }
+ return OS.str().str();
+}
+void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// Check if we have any if clause associated with the directive.
llvm::Value *IfCond = nullptr;
if (const auto *C = S.getSingleClause<OMPIfClause>())
@@ -1357,15 +1677,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// The cleanup callback that finalizes all variabels at the given location,
// thus calls destructors etc.
auto FiniCB = [this](InsertPointTy IP) {
- CGBuilderTy::InsertPointGuard IPG(Builder);
- assert(IP.getBlock()->end() != IP.getPoint() &&
- "OpenMP IR Builder should cause terminated block!");
- llvm::BasicBlock *IPBB = IP.getBlock();
- llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint());
- IPBB->getTerminator()->eraseFromParent();
- Builder.SetInsertPoint(IPBB);
- CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB);
- EmitBranchThroughCleanup(Dest);
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
};
// Privatization callback that performs appropriate action for
@@ -1387,32 +1699,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
auto BodyGenCB = [ParallelRegionBodyStmt,
this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::BasicBlock &ContinuationBB) {
- auto OldAllocaIP = AllocaInsertPt;
- AllocaInsertPt = &*AllocaIP.getPoint();
-
- auto OldReturnBlock = ReturnBlock;
- ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB);
-
- llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
- CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint());
- llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator();
- CodeGenIPBBTI->removeFromParent();
-
- Builder.SetInsertPoint(CodeGenIPBB);
-
- EmitStmt(ParallelRegionBodyStmt);
-
- Builder.Insert(CodeGenIPBBTI);
-
- AllocaInsertPt = OldAllocaIP;
- ReturnBlock = OldReturnBlock;
+ OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
+ ContinuationBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
+ CodeGenIP, ContinuationBB);
};
CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
- Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
- FiniCB, IfCond, NumThreads,
- ProcBind, S.hasCancel()));
+ Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
+ FiniCB, IfCond, NumThreads,
+ ProcBind, S.hasCancel()));
return;
}
@@ -1436,10 +1733,16 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
- emitEmptyBoundParameters);
- emitPostUpdateForReductionClause(*this, S,
- [](CodeGenFunction &) { return nullptr; });
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
@@ -1506,6 +1809,27 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
getProfileCount(D.getBody()));
EmitBlock(NextBB);
}
+
+ OMPPrivateScope InscanScope(*this);
+ EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
+ bool IsInscanRegion = InscanScope.Privatize();
+ if (IsInscanRegion) {
+ // Need to remember the block before and after scan directive
+ // to dispatch them correctly depending on the clause used in
+ // this directive, inclusive or exclusive. For inclusive scan the natural
+ // order of the blocks is used, for exclusive clause the blocks must be
+ // executed in reverse order.
+ OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
+ OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
+ // No need to allocate inscan exit block, in simd mode it is selected in the
+ // codegen for the scan directive.
+ if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
+ OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
+ OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
+ EmitBranch(OMPScanDispatch);
+ EmitBlock(OMPBeforeScanBlock);
+ }
+
// Emit loop variables for C++ range loops.
const Stmt *Body =
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
@@ -1515,13 +1839,17 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
Body, /*TryImperfectlyNestedLoops=*/true),
D.getCollapsedNumber());
+ // Jump to the dispatcher at the end of the loop body.
+ if (IsInscanRegion)
+ EmitBranch(OMPScanExitBlock);
+
// The end (updates/cleanups).
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
}
void CodeGenFunction::EmitOMPInnerLoop(
- const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
+ const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
const Expr *IncExpr,
const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
@@ -1531,8 +1859,19 @@ void CodeGenFunction::EmitOMPInnerLoop(
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
const SourceRange R = S.getSourceRange();
- LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
- SourceLocToDebugLoc(R.getEnd()));
+
+ // If attributes are attached, push to the basic block with them.
+ const auto &OMPED = cast<OMPExecutableDirective>(S);
+ const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
+ const Stmt *SS = ICS->getCapturedStmt();
+ const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
+ if (AS)
+ LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
+ AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+ else
+ LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
@@ -1671,7 +2010,7 @@ static void emitAlignedClause(CodeGenFunction &CGF,
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
- CGF.EmitAlignmentAssumption(
+ CGF.emitAlignmentAssumption(
PtrValue, E, /*No second loc needed*/ SourceLocation(),
llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
}
@@ -1835,6 +2174,18 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
LoopStack.setParallel(!IsMonotonic);
LoopStack.setVectorizeEnable();
emitSimdlenSafelenClause(*this, D, IsMonotonic);
+ if (const auto *C = D.getSingleClause<OMPOrderClause>())
+ if (C->getKind() == OMPC_ORDER_concurrent)
+ LoopStack.setParallel(/*Enable=*/true);
+ if ((D.getDirectiveKind() == OMPD_simd ||
+ (getLangOpts().OpenMPSimd &&
+ isOpenMPSimdDirective(D.getDirectiveKind()))) &&
+ llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ }))
+ // Disable parallel access in case of prefix sum.
+ LoopStack.setParallel(/*Enable=*/false);
}
void CodeGenFunction::EmitOMPSimdFinal(
@@ -1886,7 +2237,6 @@ void CodeGenFunction::EmitOMPSimdFinal(
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
}
@@ -1917,12 +2267,14 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
BodyCodeGen(CGF);
};
const Expr *IfCond = nullptr;
- for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
- if (CGF.getLangOpts().OpenMP >= 50 &&
- (C->getNameModifier() == OMPD_unknown ||
- C->getNameModifier() == OMPD_simd)) {
- IfCond = C->getCondition();
- break;
+ if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+ for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+ if (CGF.getLangOpts().OpenMP >= 50 &&
+ (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_simd)) {
+ IfCond = C->getCondition();
+ break;
+ }
}
}
if (IfCond) {
@@ -2007,10 +2359,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
[&S](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(
- CGF, S);
- CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
- CGF.EmitStopPoint(&S);
+ emitOMPLoopBodyWithStopPoint(CGF, S,
+ CodeGenFunction::JumpDest());
},
[](CodeGenFunction &) {});
});
@@ -2031,11 +2381,19 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
}
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+ ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
+ OMPFirstScanLoop = true;
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitOMPSimdRegion(CGF, S, Action);
};
- OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
+ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPOuterLoop(
@@ -2103,10 +2461,14 @@ void CodeGenFunction::EmitOMPOuterLoop(
[&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
// Generate !llvm.loop.parallel metadata for loads and stores for loops
// with dynamic/guided scheduling and without ordered clause.
- if (!isOpenMPSimdDirective(S.getDirectiveKind()))
+ if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
CGF.LoopStack.setParallel(!IsMonotonic);
- else
+ if (const auto *C = S.getSingleClause<OMPOrderClause>())
+ if (C->getKind() == OMPC_ORDER_concurrent)
+ CGF.LoopStack.setParallel(/*Enable=*/true);
+ } else {
CGF.EmitOMPSimdInit(S, IsMonotonic);
+ }
},
[&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
&LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -2612,6 +2974,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
/* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+ bool IsMonotonic =
+ Ordered ||
+ ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
+ ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
+ !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
+ ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
+ ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
+ ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
/* Chunked */ Chunk != nullptr) ||
StaticChunkedOne) &&
@@ -2620,9 +2990,13 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
emitCommonSimdLoop(
*this, S,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- if (isOpenMPSimdDirective(S.getDirectiveKind()))
- CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+ [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
+ if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+ CGF.EmitOMPSimdInit(S, IsMonotonic);
+ } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
+ if (C->getKind() == OMPC_ORDER_concurrent)
+ CGF.LoopStack.setParallel(/*Enable=*/true);
+ }
},
[IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
&S, ScheduleKind, LoopExit,
@@ -2663,10 +3037,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
: S.getCond(),
StaticChunkedOne ? S.getDistInc() : S.getInc(),
[&S, LoopExit](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime()
- .initLastprivateConditionalCounter(CGF, S);
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
+ emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
},
[](CodeGenFunction &) {});
});
@@ -2678,11 +3049,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
};
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
} else {
- const bool IsMonotonic =
- Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
- ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
- ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
- ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
const OMPLoopArguments LoopArguments(
@@ -2755,16 +3121,233 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
return {LBVal, UBVal};
}
+/// Emits the code for the directive with inscan reductions.
+/// The code is the following:
+/// \code
+/// size num_iters = <num_iters>;
+/// <type> buffer[num_iters];
+/// #pragma omp ...
+/// for (i: 0..<num_iters>) {
+/// <input phase>;
+/// buffer[i] = red;
+/// }
+/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
+/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
+/// buffer[i] op= buffer[i-pow(2,k)];
+/// #pragma omp ...
+/// for (0..<num_iters>) {
+/// red = InclusiveScan ? buffer[i] : buffer[i-1];
+/// <scan phase>;
+/// }
+/// \endcode
+static void emitScanBasedDirective(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
+ llvm::function_ref<void(CodeGenFunction &)> FirstGen,
+ llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+ llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+ NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+ SmallVector<const Expr *, 4> Shareds;
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ SmallVector<const Expr *, 4> CopyOps;
+ SmallVector<const Expr *, 4> CopyArrayTemps;
+ SmallVector<const Expr *, 4> CopyArrayElems;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+ "Only inscan reductions are expected.");
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+ CopyArrayTemps.append(C->copy_array_temps().begin(),
+ C->copy_array_temps().end());
+ CopyArrayElems.append(C->copy_array_elems().begin(),
+ C->copy_array_elems().end());
+ }
+ {
+ // Emit buffers for each reduction variables.
+ // ReductionCodeGen is required to emit correctly the code for array
+ // reductions.
+ ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
+ unsigned Count = 0;
+ auto *ITA = CopyArrayTemps.begin();
+ for (const Expr *IRef : Privates) {
+ const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+ // Emit variably modified arrays, used for arrays/array sections
+ // reductions.
+ if (PrivateVD->getType()->isVariablyModifiedType()) {
+ RedCG.emitSharedOrigLValue(CGF, Count);
+ RedCG.emitAggregateType(CGF, Count);
+ }
+ CodeGenFunction::OpaqueValueMapping DimMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
+ ->getSizeExpr()),
+ RValue::get(OMPScanNumIterations));
+ // Emit temp buffer.
+ CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
+ ++ITA;
+ ++Count;
+ }
+ }
+ CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
+ {
+ // Emit loop with input phase:
+ // #pragma omp ...
+ // for (i: 0..<num_iters>) {
+ // <input phase>;
+ // buffer[i] = red;
+ // }
+ CGF.OMPFirstScanLoop = true;
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ FirstGen(CGF);
+ }
+ // Emit prefix reduction:
+ // for (int k = 0; k <= ceil(log2(n)); ++k)
+ llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
+ llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
+ llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
+ F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
+ LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
+ LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
+ llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
+ OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
+ CGF.EmitBlock(LoopBB);
+ auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
+ // size pow2k = 1;
+ auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
+ Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
+ // for (size i = n - 1; i >= 2 ^ k; --i)
+ // tmp[i] op= tmp[i-pow2k];
+ llvm::BasicBlock *InnerLoopBB =
+ CGF.createBasicBlock("omp.inner.log.scan.body");
+ llvm::BasicBlock *InnerExitBB =
+ CGF.createBasicBlock("omp.inner.log.scan.exit");
+ llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerLoopBB);
+ auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ IVal->addIncoming(NMin1, LoopBB);
+ {
+ CodeGenFunction::OMPPrivateScope PrivScope(CGF);
+ auto *ILHS = LHSs.begin();
+ auto *IRHS = RHSs.begin();
+ for (const Expr *CopyArrayElem : CopyArrayElems) {
+ const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ Address LHSAddr = Address::invalid();
+ {
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IVal));
+ LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
+ Address RHSAddr = Address::invalid();
+ {
+ llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(OffsetIVal));
+ RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
+ ++ILHS;
+ ++IRHS;
+ }
+ PrivScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitReduction(
+ CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+ {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
+ }
+ llvm::Value *NextIVal =
+ CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
+ CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerExitBB);
+ llvm::Value *Next =
+ CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
+ Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
+ // pow2k <<= 1;
+ llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+ Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
+ llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
+ CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+ auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
+ CGF.EmitBlock(ExitBB);
+
+ CGF.OMPFirstScanLoop = false;
+ SecondGen(CGF);
+}
+
+static bool emitWorksharingDirective(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ bool HasCancel) {
+ bool HasLastprivates;
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(
+ CGF, S.getDirectiveKind(), HasCancel);
+ (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ // Emit an implicit barrier at the end.
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
+ OMPD_for);
+ };
+ const auto &&SecondGen = [&S, HasCancel,
+ &HasLastprivates](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(
+ CGF, S.getDirectiveKind(), HasCancel);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ };
+ emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
+ } else {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
+ HasCancel);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ }
+ return HasLastprivates;
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
+ HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
S.hasCancel());
@@ -2773,17 +3356,19 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
+ HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
@@ -2791,6 +3376,8 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
@@ -2808,7 +3395,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, CapturedStmt, CS,
&HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
- ASTContext &C = CGF.getContext();
+ const ASTContext &C = CGF.getContext();
QualType KmpInt32Ty =
C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Emit helper vars inits.
@@ -2830,11 +3417,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
- BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
- OK_Ordinary, S.getBeginLoc(), FPOptions());
+ BinaryOperator *Cond = BinaryOperator::Create(
+ C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
+ S.getBeginLoc(), FPOptionsOverride());
// Increment for loop counter.
- UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
- S.getBeginLoc(), true);
+ UnaryOperator *Inc = UnaryOperator::Create(
+ C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
+ S.getBeginLoc(), true, FPOptionsOverride());
auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
// Iterate through all sections and emit a switch construct:
// switch (IV) {
@@ -2847,7 +3436,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// break;
// }
// .omp.sections.exit:
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
llvm::SwitchInst *SwitchStmt =
CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
@@ -2905,7 +3493,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// IV = LB;
CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
// while (idx <= UB) { BODY; ++idx; }
- CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
+ CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
[](CodeGenFunction &) {});
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
@@ -2949,6 +3537,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
EmitSections(S);
}
@@ -2957,6 +3547,8 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
OMPD_sections);
}
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
@@ -2995,6 +3587,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
CopyprivateVars, DestExprs,
@@ -3007,6 +3601,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
*this, S.getBeginLoc(),
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
}
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
@@ -3018,11 +3614,75 @@ static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
}
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
+ const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt();
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+ Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB));
+
+ return;
+ }
OMPLexicalScope Scope(*this, S, OMPD_unknown);
emitMaster(*this, S);
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
+ const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt();
+ const Expr *Hint = nullptr;
+ if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
+ Hint = HintClause->getHint();
+
+ // TODO: This is slightly different from what's currently being done in
+ // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
+ // about typing is final.
+ llvm::Value *HintInst = nullptr;
+ if (Hint)
+ HintInst =
+ Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+ Builder.restoreIP(OMPBuilder.CreateCritical(
+ Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
+ HintInst));
+
+ return;
+ }
+
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
@@ -3042,12 +3702,16 @@ void CodeGenFunction::EmitOMPParallelForDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ (void)emitWorksharingDirective(CGF, S, S.hasCancel());
};
- emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
- emitEmptyBoundParameters);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
+ emitEmptyBoundParameters);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -3056,11 +3720,16 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
- emitEmptyBoundParameters);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
+ emitEmptyBoundParameters);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPParallelMasterDirective(
@@ -3086,10 +3755,16 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(
emitMaster(CGF, S);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
- emitEmptyBoundParameters);
- emitPostUpdateForReductionClause(*this, S,
- [](CodeGenFunction &) { return nullptr; });
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -3100,8 +3775,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
Action.Enter(CGF);
CGF.EmitSections(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
- emitEmptyBoundParameters);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
+ emitEmptyBoundParameters);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPTaskBasedDirective(
@@ -3188,33 +3869,28 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, S.getBeginLoc(), LHSs, RHSs, Data);
// Build list of dependences.
- for (const auto *C : S.getClausesOfKind<OMPDependClause>())
- for (const Expr *IRef : C->varlists())
- Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
+ for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
+ OMPTaskDataTy::DependData &DD =
+ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
+ DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
+ }
auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
CapturedRegion](CodeGenFunction &CGF,
PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
+ llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
!Data.LastprivateVars.empty()) {
llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
@@ -3241,6 +3917,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
".firstpriv.ptr.addr");
PrivatePtrs.emplace_back(VD, PrivatePtr);
+ FirstprivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
for (const Expr *E : Data.LastprivateVars) {
@@ -3271,13 +3948,21 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
}
}
if (Data.Reductions) {
+ OMPPrivateScope FirstprivateScope(CGF);
+ for (const auto &Pair : FirstprivatePtrs) {
+ Address Replacement(CGF.Builder.CreateLoad(Pair.second),
+ CGF.getContext().getDeclAlign(Pair.first));
+ FirstprivateScope.addPrivate(Pair.first,
+ [Replacement]() { return Replacement; });
+ }
+ (void)FirstprivateScope.Privatize();
OMPLexicalScope LexScope(CGF, S, CapturedRegion);
- ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
- Data.ReductionOps);
+ ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
+ Data.ReductionCopies, Data.ReductionOps);
llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
- RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitSharedOrigLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
@@ -3322,9 +4007,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
// privatized earlier.
OMPPrivateScope InRedScope(CGF);
if (!InRedVars.empty()) {
- ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
+ ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
- RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitSharedOrigLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// The taskgroup descriptor variable is always implicit firstprivate and
// privatized already during processing of the firstprivates.
@@ -3333,9 +4018,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
// initializer/combiner/finalizer.
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
RedCG, Cnt);
- llvm::Value *ReductionsPtr =
- CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
- TaskgroupDescriptors[Cnt]->getExprLoc());
+ llvm::Value *ReductionsPtr;
+ if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
+ ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
+ TRExpr->getExprLoc());
+ } else {
+ ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ }
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement = Address(
@@ -3448,9 +4137,11 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
(void)TargetScope.Privatize();
// Build list of dependences.
- for (const auto *C : S.getClausesOfKind<OMPDependClause>())
- for (const Expr *IRef : C->varlists())
- Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
+ for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
+ OMPTaskDataTy::DependData &DD =
+ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
+ DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
+ }
auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
&InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
@@ -3537,6 +4228,8 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
SharedsTy, CapturedStruct, IfCond,
Data);
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
}
@@ -3562,21 +4255,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
llvm::Value *ReductionDesc =
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
@@ -3593,6 +4278,9 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
}
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
+ llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
+ ? llvm::AtomicOrdering::NotAtomic
+ : llvm::AtomicOrdering::AcquireRelease;
CGM.getOpenMPRuntime().emitFlush(
*this,
[&S]() -> ArrayRef<const Expr *> {
@@ -3601,7 +4289,233 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
FlushClause->varlist_end());
return llvm::None;
}(),
- S.getBeginLoc());
+ S.getBeginLoc(), AO);
+}
+
+void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
+ const auto *DO = S.getSingleClause<OMPDepobjClause>();
+ LValue DOLVal = EmitLValue(DO->getDepobj());
+ if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
+ OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
+ DC->getModifier());
+ Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
+ Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
+ *this, Dependencies, DC->getBeginLoc());
+ EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
+ return;
+ }
+ if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
+ CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
+ return;
+ }
+ if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
+ CGM.getOpenMPRuntime().emitUpdateClause(
+ *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
+ return;
+ }
+}
+
+void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
+ if (!OMPParentLoopDirectiveForScan)
+ return;
+ const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
+ bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
+ SmallVector<const Expr *, 4> Shareds;
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> CopyOps;
+ SmallVector<const Expr *, 4> CopyArrayTemps;
+ SmallVector<const Expr *, 4> CopyArrayElems;
+ for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
+ if (C->getModifier() != OMPC_REDUCTION_inscan)
+ continue;
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+ CopyArrayTemps.append(C->copy_array_temps().begin(),
+ C->copy_array_temps().end());
+ CopyArrayElems.append(C->copy_array_elems().begin(),
+ C->copy_array_elems().end());
+ }
+ if (ParentDir.getDirectiveKind() == OMPD_simd ||
+ (getLangOpts().OpenMPSimd &&
+ isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
+ // For simd directive and simd-based directives in simd only mode, use the
+ // following codegen:
+ // int x = 0;
+ // #pragma omp simd reduction(inscan, +: x)
+ // for (..) {
+ // <first part>
+ // #pragma omp scan inclusive(x)
+ // <second part>
+ // }
+ // is transformed to:
+ // int x = 0;
+ // for (..) {
+ // int x_priv = 0;
+ // <first part>
+ // x = x_priv + x;
+ // x_priv = x;
+ // <second part>
+ // }
+ // and
+ // int x = 0;
+ // #pragma omp simd reduction(inscan, +: x)
+ // for (..) {
+ // <first part>
+ // #pragma omp scan exclusive(x)
+ // <second part>
+ // }
+ // to
+ // int x = 0;
+ // for (..) {
+ // int x_priv = 0;
+ // <second part>
+ // int temp = x;
+ // x = x_priv + x;
+ // x_priv = temp;
+ // <first part>
+ // }
+ llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
+ EmitBranch(IsInclusive
+ ? OMPScanReduce
+ : BreakContinueStack.back().ContinueBlock.getBlock());
+ EmitBlock(OMPScanDispatch);
+ {
+ // New scope for correct construction/destruction of temp variables for
+ // exclusive scan.
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
+ EmitBlock(OMPScanReduce);
+ if (!IsInclusive) {
+ // Create temp var and copy LHS value to this temp value.
+ // TMP = LHS;
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ const Expr *TempExpr = CopyArrayTemps[I];
+ EmitAutoVarDecl(
+ *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
+ LValue DestLVal = EmitLValue(TempExpr);
+ LValue SrcLVal = EmitLValue(LHSs[I]);
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ }
+ CGM.getOpenMPRuntime().emitReduction(
+ *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+ {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ LValue DestLVal;
+ LValue SrcLVal;
+ if (IsInclusive) {
+ DestLVal = EmitLValue(RHSs[I]);
+ SrcLVal = EmitLValue(LHSs[I]);
+ } else {
+ const Expr *TempExpr = CopyArrayTemps[I];
+ DestLVal = EmitLValue(RHSs[I]);
+ SrcLVal = EmitLValue(TempExpr);
+ }
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ }
+ EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
+ OMPScanExitBlock = IsInclusive
+ ? BreakContinueStack.back().ContinueBlock.getBlock()
+ : OMPScanReduce;
+ EmitBlock(OMPAfterScanBlock);
+ return;
+ }
+ if (!IsInclusive) {
+ EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+ EmitBlock(OMPScanExitBlock);
+ }
+ if (OMPFirstScanLoop) {
+ // Emit buffer[i] = red; at the end of the input phase.
+ const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
+ .getIterationVariable()
+ ->IgnoreParenImpCasts();
+ LValue IdxLVal = EmitLValue(IVExpr);
+ llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
+ IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ const Expr *OrigExpr = Shareds[I];
+ const Expr *CopyArrayElem = CopyArrayElems[I];
+ OpaqueValueMapping IdxMapping(
+ *this,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IdxVal));
+ LValue DestLVal = EmitLValue(CopyArrayElem);
+ LValue SrcLVal = EmitLValue(OrigExpr);
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ }
+ EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+ if (IsInclusive) {
+ EmitBlock(OMPScanExitBlock);
+ EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+ }
+ EmitBlock(OMPScanDispatch);
+ if (!OMPFirstScanLoop) {
+ // Emit red = buffer[i]; at the entrance to the scan phase.
+ const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
+ .getIterationVariable()
+ ->IgnoreParenImpCasts();
+ LValue IdxLVal = EmitLValue(IVExpr);
+ llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
+ IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
+ llvm::BasicBlock *ExclusiveExitBB = nullptr;
+ if (!IsInclusive) {
+ llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
+ ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
+ llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
+ Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
+ EmitBlock(ContBB);
+ // Use idx - 1 iteration for exclusive scan.
+ IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
+ }
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ const Expr *OrigExpr = Shareds[I];
+ const Expr *CopyArrayElem = CopyArrayElems[I];
+ OpaqueValueMapping IdxMapping(
+ *this,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IdxVal));
+ LValue SrcLVal = EmitLValue(CopyArrayElem);
+ LValue DestLVal = EmitLValue(OrigExpr);
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ if (!IsInclusive) {
+ EmitBlock(ExclusiveExitBB);
+ }
+ }
+ EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
+ : OMPAfterScanBlock);
+ EmitBlock(OMPAfterScanBlock);
}
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
@@ -3790,7 +4704,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
});
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
- RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
+ RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
@@ -3843,11 +4757,12 @@ void CodeGenFunction::EmitOMPDistributeDirective(
}
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
- const CapturedStmt *S) {
+ const CapturedStmt *S,
+ SourceLocation Loc) {
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
CGF.CapturedStmtInfo = &CapStmtInfo;
- llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
+ llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
Fn->setDoesNotRecurse();
return Fn;
}
@@ -3867,7 +4782,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
if (C) {
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
- llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
+ llvm::Function *OutlinedFn =
+ emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
OutlinedFn, CapturedVars);
} else {
@@ -3918,16 +4834,22 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
return ComplexVal;
}
-static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
LValue LVal, RValue RVal) {
- if (LVal.isGlobalReg()) {
+ if (LVal.isGlobalReg())
CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
- } else {
- CGF.EmitAtomicStore(RVal, LVal,
- IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic,
- LVal.isVolatile(), /*isInit=*/false);
- }
+ else
+ CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
+}
+
+static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO, LValue LVal,
+ SourceLocation Loc) {
+ if (LVal.isGlobalReg())
+ return CGF.EmitLoadOfLValue(LVal, Loc);
+ return CGF.EmitAtomicLoad(
+ LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
+ LVal.isVolatile());
}
void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
@@ -3948,7 +4870,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
}
}
-static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
const Expr *X, const Expr *V,
SourceLocation Loc) {
// v = x;
@@ -3956,34 +4878,54 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
LValue XLValue = CGF.EmitLValue(X);
LValue VLValue = CGF.EmitLValue(V);
- RValue Res = XLValue.isGlobalReg()
- ? CGF.EmitLoadOfLValue(XLValue, Loc)
- : CGF.EmitAtomicLoad(
- XLValue, Loc,
- IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic,
- XLValue.isVolatile());
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the read or capture clause is specified and the acquire, acq_rel, or
+ // seq_cst clause is specified then the strong flush on exit from the atomic
+ // operation is also an acquire flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Acquire:
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Acquire);
+ break;
+ case llvm::AtomicOrdering::Monotonic:
+ case llvm::AtomicOrdering::Release:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
}
-static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
- const Expr *X, const Expr *E,
- SourceLocation Loc) {
+static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO, const Expr *X,
+ const Expr *E, SourceLocation Loc) {
// x = expr;
assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
- emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
@@ -4104,10 +5046,10 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
return Res;
}
-static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
- const Expr *X, const Expr *E,
- const Expr *UE, bool IsXLHSInRHSPart,
- SourceLocation Loc) {
+static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO, const Expr *X,
+ const Expr *E, const Expr *UE,
+ bool IsXLHSInRHSPart, SourceLocation Loc) {
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
"Update expr in 'atomic update' must be a binary operator.");
const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
@@ -4120,9 +5062,6 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- llvm::AtomicOrdering AO = IsSeqCst
- ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic;
const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
@@ -4134,12 +5073,25 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
};
(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
static RValue convertToType(CodeGenFunction &CGF, RValue Value,
@@ -4159,7 +5111,8 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value,
llvm_unreachable("Must be a scalar or complex.");
}
-static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO,
bool IsPostfixUpdate, const Expr *V,
const Expr *X, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
@@ -4170,9 +5123,6 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
LValue VLValue = CGF.EmitLValue(V);
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- llvm::AtomicOrdering AO = IsSeqCst
- ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic;
QualType NewVValType;
if (UE) {
// 'x' is updated with some additional value.
@@ -4200,6 +5150,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
};
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
if (Res.first) {
// 'atomicrmw' instruction was generated.
if (IsPostfixUpdate) {
@@ -4226,6 +5177,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
Loc, Gen);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
if (Res.first) {
// 'atomicrmw' instruction was generated.
NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
@@ -4233,32 +5185,54 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
}
// Emit post-update store to 'v' of old/new 'x' value.
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ // If the read or capture clause is specified and the acquire, acq_rel, or
+ // seq_cst clause is specified then the strong flush on exit from the atomic
+ // operation is also an acquire flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Acquire);
+ break;
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::AcquireRelease);
+ break;
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
- bool IsSeqCst, bool IsPostfixUpdate,
+ llvm::AtomicOrdering AO, bool IsPostfixUpdate,
const Expr *X, const Expr *V, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
SourceLocation Loc) {
switch (Kind) {
case OMPC_read:
- emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+ emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
break;
case OMPC_write:
- emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
+ emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
break;
case OMPC_unknown:
case OMPC_update:
- emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
+ emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
break;
case OMPC_capture:
- emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
+ emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
IsXLHSInRHSPart, Loc);
break;
case OMPC_if:
@@ -4277,12 +5251,17 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_collapse:
case OMPC_default:
case OMPC_seq_cst:
+ case OMPC_acq_rel:
+ case OMPC_acquire:
+ case OMPC_release:
+ case OMPC_relaxed:
case OMPC_shared:
case OMPC_linear:
case OMPC_aligned:
case OMPC_copyin:
case OMPC_copyprivate:
case OMPC_flush:
+ case OMPC_depobj:
case OMPC_proc_bind:
case OMPC_schedule:
case OMPC_ordered:
@@ -4308,6 +5287,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_to:
case OMPC_from:
case OMPC_use_device_ptr:
+ case OMPC_use_device_addr:
case OMPC_is_device_ptr:
case OMPC_unified_address:
case OMPC_unified_shared_memory:
@@ -4317,38 +5297,76 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_device_type:
case OMPC_match:
case OMPC_nontemporal:
+ case OMPC_order:
+ case OMPC_destroy:
+ case OMPC_detach:
+ case OMPC_inclusive:
+ case OMPC_exclusive:
+ case OMPC_uses_allocators:
+ case OMPC_affinity:
+ default:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
- bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
+ llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
+ bool MemOrderingSpecified = false;
+ if (S.getSingleClause<OMPSeqCstClause>()) {
+ AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPAcqRelClause>()) {
+ AO = llvm::AtomicOrdering::AcquireRelease;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPAcquireClause>()) {
+ AO = llvm::AtomicOrdering::Acquire;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPReleaseClause>()) {
+ AO = llvm::AtomicOrdering::Release;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPRelaxedClause>()) {
+ AO = llvm::AtomicOrdering::Monotonic;
+ MemOrderingSpecified = true;
+ }
OpenMPClauseKind Kind = OMPC_unknown;
for (const OMPClause *C : S.clauses()) {
- // Find first clause (skip seq_cst clause, if it is first).
- if (C->getClauseKind() != OMPC_seq_cst) {
+ // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
+ // if it is first).
+ if (C->getClauseKind() != OMPC_seq_cst &&
+ C->getClauseKind() != OMPC_acq_rel &&
+ C->getClauseKind() != OMPC_acquire &&
+ C->getClauseKind() != OMPC_release &&
+ C->getClauseKind() != OMPC_relaxed) {
Kind = C->getClauseKind();
break;
}
}
-
- const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
- if (const auto *FE = dyn_cast<FullExpr>(CS))
- enterFullExpression(FE);
- // Processing for statements under 'atomic capture'.
- if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
- for (const Stmt *C : Compound->body()) {
- if (const auto *FE = dyn_cast<FullExpr>(C))
- enterFullExpression(FE);
+ if (!MemOrderingSpecified) {
+ llvm::AtomicOrdering DefaultOrder =
+ CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
+ if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
+ DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
+ (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
+ Kind == OMPC_capture)) {
+ AO = DefaultOrder;
+ } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
+ if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
+ AO = llvm::AtomicOrdering::Release;
+ } else if (Kind == OMPC_read) {
+ assert(Kind == OMPC_read && "Unexpected atomic kind.");
+ AO = llvm::AtomicOrdering::Acquire;
+ }
}
}
- auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
+ const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
+
+ auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF,
PrePostActionTy &) {
CGF.EmitStopPoint(CS);
- emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
- S.getV(), S.getExpr(), S.getUpdateExpr(),
- S.isXLHSInRHSPart(), S.getBeginLoc());
+ emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
+ S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
+ S.getBeginLoc());
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
@@ -4370,6 +5388,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
return;
}
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
@@ -4384,9 +5404,10 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
}
// Check if we have any device clause associated with the directive.
- const Expr *Device = nullptr;
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
+ nullptr, OMPC_DEVICE_unknown);
if (auto *C = S.getSingleClause<OMPDeviceClause>())
- Device = C->getDevice();
+ Device.setPointerAndInt(C->getDevice(), C->getModifier());
// Check if we have an if clause whose conditional always evaluates to false
// or if we do not have any targets specified. If so the target region is not
@@ -4856,7 +5877,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
break;
}
}
- if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// TODO: This check is necessary as we only generate `omp parallel` through
// the OpenMPIRBuilder for now.
if (S.getCancelRegion() == OMPD_parallel) {
@@ -4865,7 +5887,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
IfCondition = EmitScalarExpr(IfCond,
/*IgnoreResultAssign=*/true);
return Builder.restoreIP(
- OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion()));
+ OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion()));
}
}
@@ -4876,7 +5898,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
CodeGenFunction::JumpDest
CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
if (Kind == OMPD_parallel || Kind == OMPD_task ||
- Kind == OMPD_target_parallel)
+ Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
+ Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
return ReturnBlock;
assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
@@ -4888,9 +5911,8 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
}
void CodeGenFunction::EmitOMPUseDevicePtrClause(
- const OMPClause &NC, OMPPrivateScope &PrivateScope,
+ const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
- const auto &C = cast<OMPUseDevicePtrClause>(NC);
auto OrigVarIt = C.varlist_begin();
auto InitIt = C.inits().begin();
for (const Expr *PvtVarIt : C.private_copies()) {
@@ -4951,6 +5973,60 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
}
}
+static const VarDecl *getBaseDecl(const Expr *Ref) {
+ const Expr *Base = Ref->IgnoreParenImpCasts();
+ while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = ASE->getBase()->IgnoreParenImpCasts();
+ return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
+}
+
+void CodeGenFunction::EmitOMPUseDeviceAddrClause(
+ const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
+ const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
+ llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
+ for (const Expr *Ref : C.varlists()) {
+ const VarDecl *OrigVD = getBaseDecl(Ref);
+ if (!Processed.insert(OrigVD).second)
+ continue;
+ // In order to identify the right initializer we need to match the
+ // declaration used by the mapping logic. In some cases we may get
+ // OMPCapturedExprDecl that refers to the original declaration.
+ const ValueDecl *MatchingVD = OrigVD;
+ if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
+ // OMPCapturedExprDecl are used to privative fields of the current
+ // structure.
+ const auto *ME = cast<MemberExpr>(OED->getInit());
+ assert(isa<CXXThisExpr>(ME->getBase()) &&
+ "Base should be the current struct!");
+ MatchingVD = ME->getMemberDecl();
+ }
+
+ // If we don't have information about the current list item, move on to
+ // the next one.
+ auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
+ if (InitAddrIt == CaptureDeviceAddrMap.end())
+ continue;
+
+ Address PrivAddr = InitAddrIt->getSecond();
+ // For declrefs and variable length array need to load the pointer for
+ // correct mapping, since the pointer to the data was passed to the runtime.
+ if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
+ MatchingVD->getType()->isArrayType())
+ PrivAddr =
+ EmitLoadOfPointer(PrivAddr, getContext()
+ .getPointerType(OrigVD->getType())
+ ->castAs<PointerType>());
+ llvm::Type *RealTy =
+ ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
+ ->getPointerTo();
+ PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
+
+ (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
+ }
+}
+
// Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
@@ -4995,9 +6071,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
Info.CaptureDeviceAddrMap);
+ for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
+ CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
+ Info.CaptureDeviceAddrMap);
(void)PrivateScope.Privatize();
RCG(CGF);
} else {
+ OMPLexicalScope Scope(CGF, S, OMPD_unknown);
RCG(CGF);
}
};
@@ -5222,7 +6302,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
// Emit outlined function for task construct.
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
- Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ Address CapturedStruct = Address::invalid();
+ {
+ OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
+ CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ }
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
@@ -5322,8 +6406,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
[&S](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
- CGF.EmitStopPoint(&S);
+ emitOMPLoopBodyWithStopPoint(CGF, S,
+ CodeGenFunction::JumpDest());
},
[](CodeGenFunction &) {});
});
@@ -5376,11 +6460,15 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
}
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
EmitOMPTaskLoopBasedDirective(S);
}
void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
const OMPTaskLoopSimdDirective &S) {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S);
EmitOMPTaskLoopBasedDirective(S);
}
@@ -5391,6 +6479,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
Action.Enter(CGF);
EmitOMPTaskLoopBasedDirective(S);
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
@@ -5401,6 +6491,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
Action.Enter(CGF);
EmitOMPTaskLoopBasedDirective(S);
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
@@ -5413,10 +6505,12 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
Action.Enter(CGF);
CGF.EmitOMPTaskLoopBasedDirective(S);
};
- OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false);
+ OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
S.getBeginLoc());
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
emitEmptyBoundParameters);
}
@@ -5433,6 +6527,8 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
S.getBeginLoc());
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
emitEmptyBoundParameters);
}
@@ -5461,19 +6557,43 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
const OMPExecutableDirective &D) {
+ if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
+ EmitOMPScanDirective(*SD);
+ return;
+ }
if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
return;
auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ OMPPrivateScope GlobalsScope(CGF);
+ if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
+ // Capture global firstprivates to avoid crash.
+ for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
+ if (!VD || VD->hasLocalStorage())
+ continue;
+ if (!CGF.LocalDeclMap.count(VD)) {
+ LValue GlobLVal = CGF.EmitLValue(Ref);
+ GlobalsScope.addPrivate(
+ VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
+ }
+ }
+ }
+ }
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+ (void)GlobalsScope.Privatize();
+ ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
} else {
- OMPPrivateScope LoopGlobals(CGF);
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
for (const Expr *E : LD->counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
LValue GlobLVal = CGF.EmitLValue(E);
- LoopGlobals.addPrivate(
+ GlobalsScope.addPrivate(
VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
}
if (isa<OMPCapturedExprDecl>(VD)) {
@@ -5497,14 +6617,20 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
}
}
}
- LoopGlobals.Privatize();
+ (void)GlobalsScope.Privatize();
CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
}
};
- OMPSimdLexicalScope Scope(*this, D);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this,
- isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
- : D.getDirectiveKind(),
- CodeGen);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
+ OMPSimdLexicalScope Scope(*this, D);
+ CGM.getOpenMPRuntime().emitInlinedDirective(
+ *this,
+ isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
+ : D.getDirectiveKind(),
+ CodeGen);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, D);
}
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 59631e802373..65b3b0c5f53d 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -363,8 +363,10 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee,
: FPT->getReturnType();
ReturnValueSlot Slot;
if (!ResultType->isVoidType() &&
- CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
- Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());
+ (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect ||
+ hasAggregateEvaluationKind(ResultType)))
+ Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified(),
+ /*IsUnused=*/false, /*IsExternallyDestructed=*/true);
// Now emit our call.
llvm::CallBase *CallOrInvoke;
@@ -437,7 +439,8 @@ void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD,
// Finish the function to maintain CodeGenFunction invariants.
// FIXME: Don't emit unreachable code.
EmitBlock(createBasicBlock());
- FinishFunction();
+
+ FinishThunk();
}
void CodeGenFunction::generateThunk(llvm::Function *Fn,
@@ -564,7 +567,7 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD,
CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);
// Thunks for variadic methods are special because in general variadic
- // arguments cannot be perferctly forwarded. In the general case, clang
+ // arguments cannot be perfectly forwarded. In the general case, clang
// implements such thunks by cloning the original function body. However, for
// thunks with no return adjustment on targets that support musttail, we can
// use musttail to perfectly forward the variadic arguments.
@@ -616,29 +619,178 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD) {
maybeEmitThunk(GD, Thunk, /*ForVTable=*/false);
}
-void CodeGenVTables::addVTableComponent(
- ConstantArrayBuilder &builder, const VTableLayout &layout,
- unsigned idx, llvm::Constant *rtti, unsigned &nextVTableThunkIndex) {
- auto &component = layout.vtable_components()[idx];
+void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder,
+ llvm::Constant *component,
+ unsigned vtableAddressPoint,
+ bool vtableHasLocalLinkage,
+ bool isCompleteDtor) const {
+ // No need to get the offset of a nullptr.
+ if (component->isNullValue())
+ return builder.add(llvm::ConstantInt::get(CGM.Int32Ty, 0));
+
+ auto *globalVal =
+ cast<llvm::GlobalValue>(component->stripPointerCastsAndAliases());
+ llvm::Module &module = CGM.getModule();
+
+ // We don't want to copy the linkage of the vtable exactly because we still
+ // want the stub/proxy to be emitted for properly calculating the offset.
+ // Examples where there would be no symbol emitted are available_externally
+ // and private linkages.
+ auto stubLinkage = vtableHasLocalLinkage ? llvm::GlobalValue::InternalLinkage
+ : llvm::GlobalValue::ExternalLinkage;
+
+ llvm::Constant *target;
+ if (auto *func = dyn_cast<llvm::Function>(globalVal)) {
+ target = getOrCreateRelativeStub(func, stubLinkage, isCompleteDtor);
+ } else {
+ llvm::SmallString<16> rttiProxyName(globalVal->getName());
+ rttiProxyName.append(".rtti_proxy");
+
+ // The RTTI component may not always be emitted in the same linkage unit as
+ // the vtable. As a general case, we can make a dso_local proxy to the RTTI
+ // that points to the actual RTTI struct somewhere. This will result in a
+ // GOTPCREL relocation when taking the relative offset to the proxy.
+ llvm::GlobalVariable *proxy = module.getNamedGlobal(rttiProxyName);
+ if (!proxy) {
+ proxy = new llvm::GlobalVariable(module, globalVal->getType(),
+ /*isConstant=*/true, stubLinkage,
+ globalVal, rttiProxyName);
+ proxy->setDSOLocal(true);
+ proxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ if (!proxy->hasLocalLinkage()) {
+ proxy->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ proxy->setComdat(module.getOrInsertComdat(rttiProxyName));
+ }
+ }
+ target = proxy;
+ }
- auto addOffsetConstant = [&](CharUnits offset) {
- builder.add(llvm::ConstantExpr::getIntToPtr(
- llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity()),
- CGM.Int8PtrTy));
- };
+ builder.addRelativeOffsetToPosition(CGM.Int32Ty, target,
+ /*position=*/vtableAddressPoint);
+}
+
+llvm::Function *CodeGenVTables::getOrCreateRelativeStub(
+ llvm::Function *func, llvm::GlobalValue::LinkageTypes stubLinkage,
+ bool isCompleteDtor) const {
+ // A complete object destructor can later be substituted in the vtable for an
+ // appropriate base object destructor when optimizations are enabled. This can
+ // happen for child classes that don't have their own destructor. In the case
+ // where a parent virtual destructor is not guaranteed to be in the same
+ // linkage unit as the child vtable, it's possible for an external reference
+ // for this destructor to be substituted into the child vtable, preventing it
+ // from being in rodata. If this function is a complete virtual destructor, we
+ // can just force a stub to be emitted for it.
+ if (func->isDSOLocal() && !isCompleteDtor)
+ return func;
+
+ llvm::SmallString<16> stubName(func->getName());
+ stubName.append(".stub");
+
+ // Instead of taking the offset between the vtable and virtual function
+ // directly, we emit a dso_local stub that just contains a tail call to the
+ // original virtual function and take the offset between that and the
+ // vtable. We do this because there are some cases where the original
+ // function that would've been inserted into the vtable is not dso_local
+ // which may require some kind of dynamic relocation which prevents the
+ // vtable from being readonly. On x86_64, taking the offset between the
+ // function and the vtable gets lowered to the offset between the PLT entry
+ // for the function and the vtable which gives us a PLT32 reloc. On AArch64,
+ // right now only CALL26 and JUMP26 instructions generate PLT relocations,
+ // so we manifest them with stubs that are just jumps to the original
+ // function.
+ auto &module = CGM.getModule();
+ llvm::Function *stub = module.getFunction(stubName);
+ if (stub) {
+ assert(stub->isDSOLocal() &&
+ "The previous definition of this stub should've been dso_local.");
+ return stub;
+ }
+
+ stub = llvm::Function::Create(func->getFunctionType(), stubLinkage, stubName,
+ module);
+
+ // Propogate function attributes.
+ stub->setAttributes(func->getAttributes());
+
+ stub->setDSOLocal(true);
+ stub->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ if (!stub->hasLocalLinkage()) {
+ stub->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ stub->setComdat(module.getOrInsertComdat(stubName));
+ }
+
+ // Fill the stub with a tail call that will be optimized.
+ llvm::BasicBlock *block =
+ llvm::BasicBlock::Create(module.getContext(), "entry", stub);
+ llvm::IRBuilder<> block_builder(block);
+ llvm::SmallVector<llvm::Value *, 8> args;
+ for (auto &arg : stub->args())
+ args.push_back(&arg);
+ llvm::CallInst *call = block_builder.CreateCall(func, args);
+ call->setAttributes(func->getAttributes());
+ call->setTailCall();
+ if (call->getType()->isVoidTy())
+ block_builder.CreateRetVoid();
+ else
+ block_builder.CreateRet(call);
+
+ return stub;
+}
+
+bool CodeGenVTables::useRelativeLayout() const {
+ return CGM.getTarget().getCXXABI().isItaniumFamily() &&
+ CGM.getItaniumVTableContext().isRelativeLayout();
+}
+
+llvm::Type *CodeGenVTables::getVTableComponentType() const {
+ if (useRelativeLayout())
+ return CGM.Int32Ty;
+ return CGM.Int8PtrTy;
+}
+
+static void AddPointerLayoutOffset(const CodeGenModule &CGM,
+ ConstantArrayBuilder &builder,
+ CharUnits offset) {
+ builder.add(llvm::ConstantExpr::getIntToPtr(
+ llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity()),
+ CGM.Int8PtrTy));
+}
+
+static void AddRelativeLayoutOffset(const CodeGenModule &CGM,
+ ConstantArrayBuilder &builder,
+ CharUnits offset) {
+ builder.add(llvm::ConstantInt::get(CGM.Int32Ty, offset.getQuantity()));
+}
+
+void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
+ const VTableLayout &layout,
+ unsigned componentIndex,
+ llvm::Constant *rtti,
+ unsigned &nextVTableThunkIndex,
+ unsigned vtableAddressPoint,
+ bool vtableHasLocalLinkage) {
+ auto &component = layout.vtable_components()[componentIndex];
+
+ auto addOffsetConstant =
+ useRelativeLayout() ? AddRelativeLayoutOffset : AddPointerLayoutOffset;
switch (component.getKind()) {
case VTableComponent::CK_VCallOffset:
- return addOffsetConstant(component.getVCallOffset());
+ return addOffsetConstant(CGM, builder, component.getVCallOffset());
case VTableComponent::CK_VBaseOffset:
- return addOffsetConstant(component.getVBaseOffset());
+ return addOffsetConstant(CGM, builder, component.getVBaseOffset());
case VTableComponent::CK_OffsetToTop:
- return addOffsetConstant(component.getOffsetToTop());
+ return addOffsetConstant(CGM, builder, component.getOffsetToTop());
case VTableComponent::CK_RTTI:
- return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy));
+ if (useRelativeLayout())
+ return addRelativeComponent(builder, rtti, vtableAddressPoint,
+ vtableHasLocalLinkage,
+ /*isCompleteDtor=*/false);
+ else
+ return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy));
case VTableComponent::CK_FunctionPointer:
case VTableComponent::CK_CompleteDtorPointer:
@@ -672,11 +824,21 @@ void CodeGenVTables::addVTableComponent(
? MD->hasAttr<CUDADeviceAttr>()
: (MD->hasAttr<CUDAHostAttr>() || !MD->hasAttr<CUDADeviceAttr>());
if (!CanEmitMethod)
- return builder.addNullPointer(CGM.Int8PtrTy);
+ return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int8PtrTy));
// Method is acceptable, continue processing as usual.
}
auto getSpecialVirtualFn = [&](StringRef name) -> llvm::Constant * {
+ // FIXME(PR43094): When merging comdat groups, lld can select a local
+ // symbol as the signature symbol even though it cannot be accessed
+ // outside that symbol's TU. The relative vtables ABI would make
+ // __cxa_pure_virtual and __cxa_deleted_virtual local symbols, and
+ // depending on link order, the comdat groups could resolve to the one
+ // with the local symbol. As a temporary solution, fill these components
+ // with zero. We shouldn't be calling these in the first place anyway.
+ if (useRelativeLayout())
+ return llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
+
// For NVPTX devices in OpenMP emit special functon as null pointers,
// otherwise linking ends up with unresolved references.
if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsDevice &&
@@ -697,19 +859,20 @@ void CodeGenVTables::addVTableComponent(
if (cast<CXXMethodDecl>(GD.getDecl())->isPure()) {
if (!PureVirtualFn)
PureVirtualFn =
- getSpecialVirtualFn(CGM.getCXXABI().GetPureVirtualCallName());
+ getSpecialVirtualFn(CGM.getCXXABI().GetPureVirtualCallName());
fnPtr = PureVirtualFn;
// Deleted virtual member functions.
} else if (cast<CXXMethodDecl>(GD.getDecl())->isDeleted()) {
if (!DeletedVirtualFn)
DeletedVirtualFn =
- getSpecialVirtualFn(CGM.getCXXABI().GetDeletedVirtualCallName());
+ getSpecialVirtualFn(CGM.getCXXABI().GetDeletedVirtualCallName());
fnPtr = DeletedVirtualFn;
// Thunks.
} else if (nextVTableThunkIndex < layout.vtable_thunks().size() &&
- layout.vtable_thunks()[nextVTableThunkIndex].first == idx) {
+ layout.vtable_thunks()[nextVTableThunkIndex].first ==
+ componentIndex) {
auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second;
nextVTableThunkIndex++;
@@ -721,13 +884,19 @@ void CodeGenVTables::addVTableComponent(
fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true);
}
- fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy);
- builder.add(fnPtr);
- return;
+ if (useRelativeLayout()) {
+ return addRelativeComponent(
+ builder, fnPtr, vtableAddressPoint, vtableHasLocalLinkage,
+ component.getKind() == VTableComponent::CK_CompleteDtorPointer);
+ } else
+ return builder.add(llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy));
}
case VTableComponent::CK_UnusedFunctionPointer:
- return builder.addNullPointer(CGM.Int8PtrTy);
+ if (useRelativeLayout())
+ return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int32Ty));
+ else
+ return builder.addNullPointer(CGM.Int8PtrTy);
}
llvm_unreachable("Unexpected vtable component kind");
@@ -735,34 +904,41 @@ void CodeGenVTables::addVTableComponent(
llvm::Type *CodeGenVTables::getVTableType(const VTableLayout &layout) {
SmallVector<llvm::Type *, 4> tys;
- for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) {
- tys.push_back(llvm::ArrayType::get(CGM.Int8PtrTy, layout.getVTableSize(i)));
- }
+ llvm::Type *componentType = getVTableComponentType();
+ for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i)
+ tys.push_back(llvm::ArrayType::get(componentType, layout.getVTableSize(i)));
return llvm::StructType::get(CGM.getLLVMContext(), tys);
}
void CodeGenVTables::createVTableInitializer(ConstantStructBuilder &builder,
const VTableLayout &layout,
- llvm::Constant *rtti) {
+ llvm::Constant *rtti,
+ bool vtableHasLocalLinkage) {
+ llvm::Type *componentType = getVTableComponentType();
+
+ const auto &addressPoints = layout.getAddressPointIndices();
unsigned nextVTableThunkIndex = 0;
- for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) {
- auto vtableElem = builder.beginArray(CGM.Int8PtrTy);
- size_t thisIndex = layout.getVTableOffset(i);
- size_t nextIndex = thisIndex + layout.getVTableSize(i);
- for (unsigned i = thisIndex; i != nextIndex; ++i) {
- addVTableComponent(vtableElem, layout, i, rtti, nextVTableThunkIndex);
+ for (unsigned vtableIndex = 0, endIndex = layout.getNumVTables();
+ vtableIndex != endIndex; ++vtableIndex) {
+ auto vtableElem = builder.beginArray(componentType);
+
+ size_t vtableStart = layout.getVTableOffset(vtableIndex);
+ size_t vtableEnd = vtableStart + layout.getVTableSize(vtableIndex);
+ for (size_t componentIndex = vtableStart; componentIndex < vtableEnd;
+ ++componentIndex) {
+ addVTableComponent(vtableElem, layout, componentIndex, rtti,
+ nextVTableThunkIndex, addressPoints[vtableIndex],
+ vtableHasLocalLinkage);
}
vtableElem.finishAndAddTo(builder);
}
}
-llvm::GlobalVariable *
-CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
- const BaseSubobject &Base,
- bool BaseIsVirtual,
- llvm::GlobalVariable::LinkageTypes Linkage,
- VTableAddressPointsMapTy& AddressPoints) {
+llvm::GlobalVariable *CodeGenVTables::GenerateConstructionVTable(
+ const CXXRecordDecl *RD, const BaseSubobject &Base, bool BaseIsVirtual,
+ llvm::GlobalVariable::LinkageTypes Linkage,
+ VTableAddressPointsMapTy &AddressPoints) {
if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
DI->completeClassData(Base.getBase());
@@ -779,7 +955,15 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
cast<ItaniumMangleContext>(CGM.getCXXABI().getMangleContext())
.mangleCXXCtorVTable(RD, Base.getBaseOffset().getQuantity(),
Base.getBase(), Out);
- StringRef Name = OutName.str();
+ SmallString<256> Name(OutName);
+
+ bool UsingRelativeLayout = getItaniumVTableContext().isRelativeLayout();
+ bool VTableAliasExists =
+ UsingRelativeLayout && CGM.getModule().getNamedAlias(Name);
+ if (VTableAliasExists) {
+ // We previously made the vtable hidden and changed its name.
+ Name.append(".local");
+ }
llvm::Type *VTType = getVTableType(*VTLayout);
@@ -806,7 +990,8 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
// Create and set the initializer.
ConstantInitBuilder builder(CGM);
auto components = builder.beginStruct();
- createVTableInitializer(components, *VTLayout, RTTI);
+ createVTableInitializer(components, *VTLayout, RTTI,
+ VTable->hasLocalLinkage());
components.finishAndSetAsInitializer(VTable);
// Set properties only after the initializer has been set to ensure that the
@@ -816,9 +1001,68 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get());
+ if (UsingRelativeLayout && !VTable->isDSOLocal())
+ GenerateRelativeVTableAlias(VTable, OutName);
+
return VTable;
}
+// If the VTable is not dso_local, then we will not be able to indicate that
+// the VTable does not need a relocation and move into rodata. A frequent
+// time this can occur is for classes that should be made public from a DSO
+// (like in libc++). For cases like these, we can make the vtable hidden or
+// private and create a public alias with the same visibility and linkage as
+// the original vtable type.
+void CodeGenVTables::GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable,
+ llvm::StringRef AliasNameRef) {
+ assert(getItaniumVTableContext().isRelativeLayout() &&
+ "Can only use this if the relative vtable ABI is used");
+ assert(!VTable->isDSOLocal() && "This should be called only if the vtable is "
+ "not guaranteed to be dso_local");
+
+ // If the vtable is available_externally, we shouldn't (or need to) generate
+ // an alias for it in the first place since the vtable won't actually by
+ // emitted in this compilation unit.
+ if (VTable->hasAvailableExternallyLinkage())
+ return;
+
+ // Create a new string in the event the alias is already the name of the
+ // vtable. Using the reference directly could lead to use of an inititialized
+ // value in the module's StringMap.
+ llvm::SmallString<256> AliasName(AliasNameRef);
+ VTable->setName(AliasName + ".local");
+
+ auto Linkage = VTable->getLinkage();
+ assert(llvm::GlobalAlias::isValidLinkage(Linkage) &&
+ "Invalid vtable alias linkage");
+
+ llvm::GlobalAlias *VTableAlias = CGM.getModule().getNamedAlias(AliasName);
+ if (!VTableAlias) {
+ VTableAlias = llvm::GlobalAlias::create(VTable->getValueType(),
+ VTable->getAddressSpace(), Linkage,
+ AliasName, &CGM.getModule());
+ } else {
+ assert(VTableAlias->getValueType() == VTable->getValueType());
+ assert(VTableAlias->getLinkage() == Linkage);
+ }
+ VTableAlias->setVisibility(VTable->getVisibility());
+ VTableAlias->setUnnamedAddr(VTable->getUnnamedAddr());
+
+ // Both of these imply dso_local for the vtable.
+ if (!VTable->hasComdat()) {
+ // If this is in a comdat, then we shouldn't make the linkage private due to
+ // an issue in lld where private symbols can be used as the key symbol when
+ // choosing the prevelant group. This leads to "relocation refers to a
+ // symbol in a discarded section".
+ VTable->setLinkage(llvm::GlobalValue::PrivateLinkage);
+ } else {
+ // We should at least make this hidden since we don't want to expose it.
+ VTable->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ }
+
+ VTableAlias->setAliasee(VTable);
+}
+
static bool shouldEmitAvailableExternallyVTable(const CodeGenModule &CGM,
const CXXRecordDecl *RD) {
return CGM.getCodeGenOpts().OptimizationLevel > 0 &&
@@ -1011,6 +1255,26 @@ void CodeGenModule::EmitDeferredVTables() {
DeferredVTables.clear();
}
+bool CodeGenModule::HasLTOVisibilityPublicStd(const CXXRecordDecl *RD) {
+ if (!getCodeGenOpts().LTOVisibilityPublicStd)
+ return false;
+
+ const DeclContext *DC = RD;
+ while (1) {
+ auto *D = cast<Decl>(DC);
+ DC = DC->getParent();
+ if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
+ if (auto *ND = dyn_cast<NamespaceDecl>(D))
+ if (const IdentifierInfo *II = ND->getIdentifier())
+ if (II->isStr("std") || II->isStr("stdext"))
+ return true;
+ break;
+ }
+ }
+
+ return false;
+}
+
bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
LinkageInfo LV = RD->getLinkageAndVisibility();
if (!isExternallyVisible(LV.getLinkage()))
@@ -1027,22 +1291,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
return false;
}
- if (getCodeGenOpts().LTOVisibilityPublicStd) {
- const DeclContext *DC = RD;
- while (1) {
- auto *D = cast<Decl>(DC);
- DC = DC->getParent();
- if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
- if (auto *ND = dyn_cast<NamespaceDecl>(D))
- if (const IdentifierInfo *II = ND->getIdentifier())
- if (II->isStr("std") || II->isStr("stdext"))
- return false;
- break;
- }
- }
- }
-
- return true;
+ return !HasLTOVisibilityPublicStd(RD);
}
llvm::GlobalObject::VCallVisibility
@@ -1131,9 +1380,10 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
}
}
- if (getCodeGenOpts().VirtualFunctionElimination) {
+ if (getCodeGenOpts().VirtualFunctionElimination ||
+ getCodeGenOpts().WholeProgramVTables) {
llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD);
if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic)
- VTable->addVCallVisibilityMetadata(TypeVis);
+ VTable->setVCallVisibilityMetadata(TypeVis);
}
}
diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h
index a47841bfc6c3..bdfc075ee305 100644
--- a/clang/lib/CodeGen/CGVTables.h
+++ b/clang/lib/CodeGen/CGVTables.h
@@ -62,16 +62,39 @@ class CodeGenVTables {
bool ForVTable);
void addVTableComponent(ConstantArrayBuilder &builder,
- const VTableLayout &layout, unsigned idx,
- llvm::Constant *rtti,
- unsigned &nextVTableThunkIndex);
+ const VTableLayout &layout, unsigned componentIndex,
+ llvm::Constant *rtti, unsigned &nextVTableThunkIndex,
+ unsigned vtableAddressPoint,
+ bool vtableHasLocalLinkage);
+
+ /// Add a 32-bit offset to a component relative to the vtable when using the
+ /// relative vtables ABI. The array builder points to the start of the vtable.
+ void addRelativeComponent(ConstantArrayBuilder &builder,
+ llvm::Constant *component,
+ unsigned vtableAddressPoint,
+ bool vtableHasLocalLinkage,
+ bool isCompleteDtor) const;
+
+ /// Create a dso_local stub that will be used for a relative reference in the
+ /// relative vtable layout. This stub will just be a tail call to the original
+ /// function and propagate any function attributes from the original. If the
+ /// original function is already dso_local, the original is returned instead
+ /// and a stub is not created.
+ llvm::Function *
+ getOrCreateRelativeStub(llvm::Function *func,
+ llvm::GlobalValue::LinkageTypes stubLinkage,
+ bool isCompleteDtor) const;
+
+ bool useRelativeLayout() const;
+
+ llvm::Type *getVTableComponentType() const;
public:
/// Add vtable components for the given vtable layout to the given
/// global initializer.
void createVTableInitializer(ConstantStructBuilder &builder,
- const VTableLayout &layout,
- llvm::Constant *rtti);
+ const VTableLayout &layout, llvm::Constant *rtti,
+ bool vtableHasLocalLinkage);
CodeGenVTables(CodeGenModule &CGM);
@@ -124,6 +147,13 @@ public:
/// arrays of pointers, with one struct element for each vtable in the vtable
/// group.
llvm::Type *getVTableType(const VTableLayout &layout);
+
+ /// Generate a public facing alias for the vtable and make the vtable either
+ /// hidden or private. The alias will have the original linkage and visibility
+ /// of the vtable. This is used for cases under the relative vtables ABI
+ /// when a vtable may not be dso_local.
+ void GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable,
+ llvm::StringRef AliasNameRef);
};
} // end namespace CodeGen
diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h
index 9fd07bdb187d..70e6fed3f4f6 100644
--- a/clang/lib/CodeGen/CGValue.h
+++ b/clang/lib/CodeGen/CGValue.h
@@ -170,7 +170,8 @@ class LValue {
VectorElt, // This is a vector element l-value (V[i]), use getVector*
BitField, // This is a bitfield l-value, use getBitfield*.
ExtVectorElt, // This is an extended vector subset, use getExtVectorComp
- GlobalReg // This is a register l-value, use getGlobalReg()
+ GlobalReg, // This is a register l-value, use getGlobalReg()
+ MatrixElt // This is a matrix element, use getVector*
} LVType;
llvm::Value *V;
@@ -254,6 +255,7 @@ public:
bool isBitField() const { return LVType == BitField; }
bool isExtVectorElt() const { return LVType == ExtVectorElt; }
bool isGlobalReg() const { return LVType == GlobalReg; }
+ bool isMatrixElt() const { return LVType == MatrixElt; }
bool isVolatileQualified() const { return Quals.hasVolatile(); }
bool isRestrictQualified() const { return Quals.hasRestrict(); }
@@ -337,8 +339,26 @@ public:
Address getVectorAddress() const {
return Address(getVectorPointer(), getAlignment());
}
- llvm::Value *getVectorPointer() const { assert(isVectorElt()); return V; }
- llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; }
+ llvm::Value *getVectorPointer() const {
+ assert(isVectorElt());
+ return V;
+ }
+ llvm::Value *getVectorIdx() const {
+ assert(isVectorElt());
+ return VectorIdx;
+ }
+
+ Address getMatrixAddress() const {
+ return Address(getMatrixPointer(), getAlignment());
+ }
+ llvm::Value *getMatrixPointer() const {
+ assert(isMatrixElt());
+ return V;
+ }
+ llvm::Value *getMatrixIdx() const {
+ assert(isMatrixElt());
+ return VectorIdx;
+ }
// extended vector elements.
Address getExtVectorAddress() const {
@@ -430,6 +450,18 @@ public:
return R;
}
+ static LValue MakeMatrixElt(Address matAddress, llvm::Value *Idx,
+ QualType type, LValueBaseInfo BaseInfo,
+ TBAAAccessInfo TBAAInfo) {
+ LValue R;
+ R.LVType = MatrixElt;
+ R.V = matAddress.getPointer();
+ R.VectorIdx = Idx;
+ R.Initialize(type, type.getQualifiers(), matAddress.getAlignment(),
+ BaseInfo, TBAAInfo);
+ return R;
+ }
+
RValue asAggregateRValue(CodeGenFunction &CGF) const {
return RValue::getAggregate(getAddress(CGF), isVolatileQualified());
}
diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp
index 6b6a116cf259..d3a16a1d5acc 100644
--- a/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -16,7 +16,9 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/CodeGenABITypes.h"
+#include "CGCXXABI.h"
#include "CGRecordLayout.h"
+#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Lex/HeaderSearchOptions.h"
@@ -25,6 +27,11 @@
using namespace clang;
using namespace CodeGen;
+void CodeGen::addDefaultFunctionDefinitionAttributes(CodeGenModule &CGM,
+ llvm::AttrBuilder &attrs) {
+ CGM.addDefaultFunctionDefinitionAttributes(attrs);
+}
+
const CGFunctionInfo &
CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM,
const ObjCMethodDecl *MD,
@@ -63,6 +70,30 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
info, {}, args);
}
+ImplicitCXXConstructorArgs
+CodeGen::getImplicitCXXConstructorArgs(CodeGenModule &CGM,
+ const CXXConstructorDecl *D) {
+ // We have to create a dummy CodeGenFunction here to pass to
+ // getImplicitConstructorArgs(). In some cases (base and delegating
+ // constructor calls), getImplicitConstructorArgs() can reach into the
+ // CodeGenFunction to find parameters of the calling constructor to pass on to
+ // the called constructor, but that can't happen here because we're asking for
+ // the args for a complete, non-delegating constructor call.
+ CodeGenFunction CGF(CGM, /* suppressNewContext= */ true);
+ CGCXXABI::AddedStructorArgs addedArgs =
+ CGM.getCXXABI().getImplicitConstructorArgs(CGF, D, Ctor_Complete,
+ /* ForVirtualBase= */ false,
+ /* Delegating= */ false);
+ ImplicitCXXConstructorArgs implicitArgs;
+ for (const auto &arg : addedArgs.Prefix) {
+ implicitArgs.Prefix.push_back(arg.Value);
+ }
+ for (const auto &arg : addedArgs.Suffix) {
+ implicitArgs.Suffix.push_back(arg.Value);
+ }
+ return implicitArgs;
+}
+
llvm::FunctionType *
CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) {
assert(FD != nullptr && "Expected a non-null function declaration!");
@@ -84,3 +115,16 @@ unsigned CodeGen::getLLVMFieldNumber(CodeGenModule &CGM,
const FieldDecl *FD) {
return CGM.getTypes().getCGRecordLayout(RD).getLLVMFieldNo(FD);
}
+
+llvm::Value *CodeGen::getCXXDestructorImplicitParam(
+ CodeGenModule &CGM, llvm::BasicBlock *InsertBlock,
+ llvm::BasicBlock::iterator InsertPoint, const CXXDestructorDecl *D,
+ CXXDtorType Type, bool ForVirtualBase, bool Delegating) {
+ CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+ CGF.CurCodeDecl = D;
+ CGF.CurFuncDecl = D;
+ CGF.CurFn = InsertBlock->getParent();
+ CGF.Builder.SetInsertPoint(InsertBlock, InsertPoint);
+ return CGM.getCXXABI().getCXXDestructorImplicitParam(
+ CGF, D, Type, ForVirtualBase, Delegating);
+}
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index 7065e78f19a2..55925110708e 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -32,8 +32,8 @@
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Pass.h"
@@ -86,15 +86,15 @@ namespace clang {
const CodeGenOptions CodeGenOpts) {
handleAllErrors(
std::move(E),
- [&](const RemarkSetupFileError &E) {
+ [&](const LLVMRemarkSetupFileError &E) {
Diags.Report(diag::err_cannot_open_file)
<< CodeGenOpts.OptRecordFile << E.message();
},
- [&](const RemarkSetupPatternError &E) {
+ [&](const LLVMRemarkSetupPatternError &E) {
Diags.Report(diag::err_drv_optimization_remark_pattern)
<< E.message() << CodeGenOpts.OptRecordPasses;
},
- [&](const RemarkSetupFormatError &E) {
+ [&](const LLVMRemarkSetupFormatError &E) {
Diags.Report(diag::err_drv_optimization_remark_format)
<< CodeGenOpts.OptRecordFormat;
});
@@ -246,7 +246,7 @@ namespace clang {
for (auto &LM : LinkModules) {
if (LM.PropagateAttrs)
for (Function &F : *LM.Module)
- Gen->CGM().AddDefaultFnAttrs(F);
+ Gen->CGM().addDefaultFunctionDefinitionAttributes(F);
CurLinkModule = LM.Module.get();
@@ -309,7 +309,7 @@ namespace clang {
CodeGenOpts, this));
Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
- setupOptimizationRemarks(
+ setupLLVMOptimizationRemarks(
Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses,
CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness,
CodeGenOpts.DiagnosticsHotnessThreshold);
@@ -633,8 +633,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
void BackendConsumer::UnsupportedDiagHandler(
const llvm::DiagnosticInfoUnsupported &D) {
- // We only support errors.
- assert(D.getSeverity() == llvm::DS_Error);
+ // We only support warnings or errors.
+ assert(D.getSeverity() == llvm::DS_Error ||
+ D.getSeverity() == llvm::DS_Warning);
StringRef Filename;
unsigned Line, Column;
@@ -652,7 +653,11 @@ void BackendConsumer::UnsupportedDiagHandler(
DiagnosticPrinterRawOStream DP(MsgStream);
D.print(DP);
}
- Diags.Report(Loc, diag::err_fe_backend_unsupported) << MsgStream.str();
+
+ auto DiagType = D.getSeverity() == llvm::DS_Error
+ ? diag::err_fe_backend_unsupported
+ : diag::warn_fe_backend_unsupported;
+ Diags.Report(Loc, DiagType) << MsgStream.str();
if (BadDebugInfo)
// If we were not able to translate the file:line:col information
@@ -994,7 +999,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
- CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
+ CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, std::string(InFile),
std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
BEConsumer = Result.get();
@@ -1146,11 +1151,14 @@ void CodeGenAction::ExecuteAction() {
CI.getTargetOpts(), CI.getLangOpts(),
CI.getFrontendOpts().ShowTimers,
std::move(LinkModules), *VMContext, nullptr);
+ // PR44896: Force DiscardValueNames as false. DiscardValueNames cannot be
+ // true here because the valued names are needed for reading textual IR.
+ Ctx.setDiscardValueNames(false);
Ctx.setDiagnosticHandler(
std::make_unique<ClangDiagnosticHandler>(CodeGenOpts, &Result));
Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
- setupOptimizationRemarks(
+ setupLLVMOptimizationRemarks(
Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses,
CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness,
CodeGenOpts.DiagnosticsHotnessThreshold);
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 2bf94f697e01..4a7c84562dee 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -32,6 +32,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/FPEnv.h"
@@ -64,67 +65,36 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
: CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()),
Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(),
CGBuilderInserterTy(this)),
- SanOpts(CGM.getLangOpts().Sanitize), DebugInfo(CGM.getModuleDebugInfo()),
- PGO(cgm), ShouldEmitLifetimeMarkers(shouldEmitLifetimeMarkers(
- CGM.getCodeGenOpts(), CGM.getLangOpts())) {
+ SanOpts(CGM.getLangOpts().Sanitize), CurFPFeatures(CGM.getLangOpts()),
+ DebugInfo(CGM.getModuleDebugInfo()), PGO(cgm),
+ ShouldEmitLifetimeMarkers(
+ shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) {
if (!suppressNewContext)
CGM.getCXXABI().getMangleContext().startNewFunction();
- llvm::FastMathFlags FMF;
- if (CGM.getLangOpts().FastMath)
- FMF.setFast();
- if (CGM.getLangOpts().FiniteMathOnly) {
- FMF.setNoNaNs();
- FMF.setNoInfs();
- }
- if (CGM.getCodeGenOpts().NoNaNsFPMath) {
- FMF.setNoNaNs();
- }
- if (CGM.getCodeGenOpts().NoSignedZeros) {
- FMF.setNoSignedZeros();
- }
- if (CGM.getCodeGenOpts().ReciprocalMath) {
- FMF.setAllowReciprocal();
- }
- if (CGM.getCodeGenOpts().Reassociate) {
- FMF.setAllowReassoc();
- }
- Builder.setFastMathFlags(FMF);
+ SetFastMathFlags(CurFPFeatures);
SetFPModel();
}
CodeGenFunction::~CodeGenFunction() {
assert(LifetimeExtendedCleanupStack.empty() && "failed to emit a cleanup");
- // If there are any unclaimed block infos, go ahead and destroy them
- // now. This can happen if IR-gen gets clever and skips evaluating
- // something.
- if (FirstBlockInfo)
- destroyBlockInfos(FirstBlockInfo);
-
if (getLangOpts().OpenMP && CurFn)
CGM.getOpenMPRuntime().functionFinished(*this);
-}
-
-// Map the LangOption for rounding mode into
-// the corresponding enum in the IR.
-static llvm::fp::RoundingMode ToConstrainedRoundingMD(
- LangOptions::FPRoundingModeKind Kind) {
- switch (Kind) {
- case LangOptions::FPR_ToNearest: return llvm::fp::rmToNearest;
- case LangOptions::FPR_Downward: return llvm::fp::rmDownward;
- case LangOptions::FPR_Upward: return llvm::fp::rmUpward;
- case LangOptions::FPR_TowardZero: return llvm::fp::rmTowardZero;
- case LangOptions::FPR_Dynamic: return llvm::fp::rmDynamic;
- }
- llvm_unreachable("Unsupported FP RoundingMode");
+ // If we have an OpenMPIRBuilder we want to finalize functions (incl.
+ // outlining etc) at some point. Doing it once the function codegen is done
+ // seems to be a reasonable spot. We do it here, as opposed to the deletion
+ // time of the CodeGenModule, because we have to ensure the IR has not yet
+ // been "emitted" to the outside, thus, modifications are still sensible.
+ if (CGM.getLangOpts().OpenMPIRBuilder)
+ CGM.getOpenMPRuntime().getOMPBuilder().finalize();
}
// Map the LangOption for exception behavior into
// the corresponding enum in the IR.
-static llvm::fp::ExceptionBehavior ToConstrainedExceptMD(
- LangOptions::FPExceptionModeKind Kind) {
+llvm::fp::ExceptionBehavior
+clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
switch (Kind) {
case LangOptions::FPE_Ignore: return llvm::fp::ebIgnore;
@@ -135,81 +105,79 @@ static llvm::fp::ExceptionBehavior ToConstrainedExceptMD(
}
void CodeGenFunction::SetFPModel() {
- auto fpRoundingMode = ToConstrainedRoundingMD(
- getLangOpts().getFPRoundingMode());
+ llvm::RoundingMode RM = getLangOpts().getFPRoundingMode();
auto fpExceptionBehavior = ToConstrainedExceptMD(
getLangOpts().getFPExceptionMode());
- if (fpExceptionBehavior == llvm::fp::ebIgnore &&
- fpRoundingMode == llvm::fp::rmToNearest)
- // Constrained intrinsics are not used.
- ;
- else {
- Builder.setIsFPConstrained(true);
- Builder.setDefaultConstrainedRounding(fpRoundingMode);
- Builder.setDefaultConstrainedExcept(fpExceptionBehavior);
- }
-}
-
-CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T,
- LValueBaseInfo *BaseInfo,
- TBAAAccessInfo *TBAAInfo) {
- return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo,
- /* forPointeeType= */ true);
-}
-
-CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
- LValueBaseInfo *BaseInfo,
- TBAAAccessInfo *TBAAInfo,
- bool forPointeeType) {
- if (TBAAInfo)
- *TBAAInfo = CGM.getTBAAAccessInfo(T);
-
- // Honor alignment typedef attributes even on incomplete types.
- // We also honor them straight for C++ class types, even as pointees;
- // there's an expressivity gap here.
- if (auto TT = T->getAs<TypedefType>()) {
- if (auto Align = TT->getDecl()->getMaxAlignment()) {
- if (BaseInfo)
- *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType);
- return getContext().toCharUnitsFromBits(Align);
- }
- }
+ Builder.setDefaultConstrainedRounding(RM);
+ Builder.setDefaultConstrainedExcept(fpExceptionBehavior);
+ Builder.setIsFPConstrained(fpExceptionBehavior != llvm::fp::ebIgnore ||
+ RM != llvm::RoundingMode::NearestTiesToEven);
+}
- if (BaseInfo)
- *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
+void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
+ llvm::FastMathFlags FMF;
+ FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());
+ FMF.setNoNaNs(FPFeatures.getNoHonorNaNs());
+ FMF.setNoInfs(FPFeatures.getNoHonorInfs());
+ FMF.setNoSignedZeros(FPFeatures.getNoSignedZero());
+ FMF.setAllowReciprocal(FPFeatures.getAllowReciprocal());
+ FMF.setApproxFunc(FPFeatures.getAllowApproxFunc());
+ FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement());
+ Builder.setFastMathFlags(FMF);
+}
- CharUnits Alignment;
- if (T->isIncompleteType()) {
- Alignment = CharUnits::One(); // Shouldn't be used, but pessimistic is best.
- } else {
- // For C++ class pointees, we don't know whether we're pointing at a
- // base or a complete object, so we generally need to use the
- // non-virtual alignment.
- const CXXRecordDecl *RD;
- if (forPointeeType && (RD = T->getAsCXXRecordDecl())) {
- Alignment = CGM.getClassPointerAlignment(RD);
- } else {
- Alignment = getContext().getTypeAlignInChars(T);
- if (T.getQualifiers().hasUnaligned())
- Alignment = CharUnits::One();
- }
+CodeGenFunction::CGFPOptionsRAII::CGFPOptionsRAII(CodeGenFunction &CGF,
+ FPOptions FPFeatures)
+ : CGF(CGF), OldFPFeatures(CGF.CurFPFeatures) {
+ CGF.CurFPFeatures = FPFeatures;
- // Cap to the global maximum type alignment unless the alignment
- // was somehow explicit on the type.
- if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) {
- if (Alignment.getQuantity() > MaxAlign &&
- !getContext().isAlignmentRequired(T))
- Alignment = CharUnits::fromQuantity(MaxAlign);
- }
- }
- return Alignment;
+ if (OldFPFeatures == FPFeatures)
+ return;
+
+ FMFGuard.emplace(CGF.Builder);
+
+ llvm::RoundingMode NewRoundingBehavior =
+ static_cast<llvm::RoundingMode>(FPFeatures.getRoundingMode());
+ CGF.Builder.setDefaultConstrainedRounding(NewRoundingBehavior);
+ auto NewExceptionBehavior =
+ ToConstrainedExceptMD(static_cast<LangOptions::FPExceptionModeKind>(
+ FPFeatures.getFPExceptionMode()));
+ CGF.Builder.setDefaultConstrainedExcept(NewExceptionBehavior);
+
+ CGF.SetFastMathFlags(FPFeatures);
+
+ assert((CGF.CurFuncDecl == nullptr || CGF.Builder.getIsFPConstrained() ||
+ isa<CXXConstructorDecl>(CGF.CurFuncDecl) ||
+ isa<CXXDestructorDecl>(CGF.CurFuncDecl) ||
+ (NewExceptionBehavior == llvm::fp::ebIgnore &&
+ NewRoundingBehavior == llvm::RoundingMode::NearestTiesToEven)) &&
+ "FPConstrained should be enabled on entire function");
+
+ auto mergeFnAttrValue = [&](StringRef Name, bool Value) {
+ auto OldValue =
+ CGF.CurFn->getFnAttribute(Name).getValueAsString() == "true";
+ auto NewValue = OldValue & Value;
+ if (OldValue != NewValue)
+ CGF.CurFn->addFnAttr(Name, llvm::toStringRef(NewValue));
+ };
+ mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs());
+ mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs());
+ mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero());
+ mergeFnAttrValue("unsafe-fp-math", FPFeatures.getAllowFPReassociate() &&
+ FPFeatures.getAllowReciprocal() &&
+ FPFeatures.getAllowApproxFunc() &&
+ FPFeatures.getNoSignedZero());
+}
+
+CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() {
+ CGF.CurFPFeatures = OldFPFeatures;
}
LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
LValueBaseInfo BaseInfo;
TBAAAccessInfo TBAAInfo;
- CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo);
+ CharUnits Alignment = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo);
return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo,
TBAAInfo);
}
@@ -220,8 +188,8 @@ LValue
CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) {
LValueBaseInfo BaseInfo;
TBAAAccessInfo TBAAInfo;
- CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo,
- /* forPointeeType= */ true);
+ CharUnits Align = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo,
+ /* forPointeeType= */ true);
return MakeAddrLValue(Address(V, Align), T, BaseInfo, TBAAInfo);
}
@@ -259,11 +227,13 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
case Type::MemberPointer:
case Type::Vector:
case Type::ExtVector:
+ case Type::ConstantMatrix:
case Type::FunctionProto:
case Type::FunctionNoProto:
case Type::Enum:
case Type::ObjCObjectPointer:
case Type::Pipe:
+ case Type::ExtInt:
return TEK_Scalar;
// Complexes.
@@ -486,13 +456,15 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// Scan function arguments for vector width.
for (llvm::Argument &A : CurFn->args())
if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
// Update vector width based on return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
- LargestVectorWidth = std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getFixedSize());
+ LargestVectorWidth =
+ std::max((uint64_t)LargestVectorWidth,
+ VT->getPrimitiveSizeInBits().getKnownMinSize());
// Add the required-vector-width attribute. This contains the max width from:
// 1. min-vector-width attribute used in the source program.
@@ -799,35 +771,54 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass)
SanOpts.Mask &= ~SanitizerKind::Null;
- if (D) {
- // Apply xray attributes to the function (as a string, for now)
- if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
- if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
- XRayInstrKind::Function)) {
- if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction())
- Fn->addFnAttr("function-instrument", "xray-always");
- if (XRayAttr->neverXRayInstrument())
- Fn->addFnAttr("function-instrument", "xray-never");
- if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>())
- if (ShouldXRayInstrumentFunction())
- Fn->addFnAttr("xray-log-args",
- llvm::utostr(LogArgs->getArgumentCount()));
- }
- } else {
- if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc))
- Fn->addFnAttr(
- "xray-instruction-threshold",
- llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+ // Apply xray attributes to the function (as a string, for now)
+ if (const auto *XRayAttr = D ? D->getAttr<XRayInstrumentAttr>() : nullptr) {
+ if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::FunctionEntry) ||
+ CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::FunctionExit)) {
+ if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction())
+ Fn->addFnAttr("function-instrument", "xray-always");
+ if (XRayAttr->neverXRayInstrument())
+ Fn->addFnAttr("function-instrument", "xray-never");
+ if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>())
+ if (ShouldXRayInstrumentFunction())
+ Fn->addFnAttr("xray-log-args",
+ llvm::utostr(LogArgs->getArgumentCount()));
}
+ } else {
+ if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc))
+ Fn->addFnAttr(
+ "xray-instruction-threshold",
+ llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+ }
- if (const auto *Attr = D->getAttr<PatchableFunctionEntryAttr>()) {
- // Attr->getStart is currently ignored.
- Fn->addFnAttr("patchable-function-entry",
- std::to_string(Attr->getCount()));
- } else if (unsigned Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount) {
- Fn->addFnAttr("patchable-function-entry",
- std::to_string(Count));
- }
+ if (ShouldXRayInstrumentFunction()) {
+ if (CGM.getCodeGenOpts().XRayIgnoreLoops)
+ Fn->addFnAttr("xray-ignore-loops");
+
+ if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::FunctionExit))
+ Fn->addFnAttr("xray-skip-exit");
+
+ if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::FunctionEntry))
+ Fn->addFnAttr("xray-skip-entry");
+ }
+
+ unsigned Count, Offset;
+ if (const auto *Attr =
+ D ? D->getAttr<PatchableFunctionEntryAttr>() : nullptr) {
+ Count = Attr->getCount();
+ Offset = Attr->getOffset();
+ } else {
+ Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount;
+ Offset = CGM.getCodeGenOpts().PatchableFunctionEntryOffset;
+ }
+ if (Count && Offset <= Count) {
+ Fn->addFnAttr("patchable-function-entry", std::to_string(Count - Offset));
+ if (Offset)
+ Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset));
}
// Add no-jump-tables value.
@@ -842,6 +833,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
if (CGM.getCodeGenOpts().ProfileSampleAccurate)
Fn->addFnAttr("profile-sample-accurate");
+ if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
+ Fn->addFnAttr("use-sample-profile");
+
if (D && D->hasAttr<CFICanonicalJumpTableAttr>())
Fn->addFnAttr("cfi-canonical-jump-table");
@@ -889,14 +883,26 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
// If we're in C++ mode and the function name is "main", it is guaranteed
// to be norecurse by the standard (3.6.1.3 "The function main shall not be
// used within a program").
- if (getLangOpts().CPlusPlus)
- if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
- if (FD->isMain())
- Fn->addFnAttr(llvm::Attribute::NoRecurse);
-
- if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+ //
+ // OpenCL C 2.0 v2.2-11 s6.9.i:
+ // Recursion is not supported.
+ //
+ // SYCL v1.2.1 s3.10:
+ // kernels cannot include RTTI information, exception classes,
+ // recursive code, virtual functions or make use of C++ libraries that
+ // are not compiled for the device.
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ if ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL ||
+ getLangOpts().SYCLIsDevice ||
+ (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>()))
+ Fn->addFnAttr(llvm::Attribute::NoRecurse);
+ }
+
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ Builder.setIsFPConstrained(FD->usesFPIntrin());
if (FD->usesFPIntrin())
Fn->addFnAttr(llvm::Attribute::StrictFP);
+ }
// If a custom alignment is used, force realigning to this alignment on
// any main function which certainly will need it.
@@ -1021,7 +1027,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
llvm::Value *Addr = Builder.CreateStructGEP(nullptr, &*EI, Idx);
ReturnValuePointer = Address(Addr, getPointerAlign());
Addr = Builder.CreateAlignedLoad(Addr, getPointerAlign(), "agg.result");
- ReturnValue = Address(Addr, getNaturalTypeAlignment(RetTy));
+ ReturnValue = Address(Addr, CGM.getNaturalTypeAlignment(RetTy));
} else {
ReturnValue = CreateIRTemp(RetTy, "retval");
@@ -1978,6 +1984,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
case Type::Complex:
case Type::Vector:
case Type::ExtVector:
+ case Type::ConstantMatrix:
case Type::Record:
case Type::Enum:
case Type::Elaborated:
@@ -1986,6 +1993,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
case Type::ObjCObject:
case Type::ObjCInterface:
case Type::ObjCObjectPointer:
+ case Type::ExtInt:
llvm_unreachable("type class is never variably-modified!");
case Type::Adjusted:
@@ -2141,21 +2149,47 @@ void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) {
protection.Inst->eraseFromParent();
}
-void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
+void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
QualType Ty, SourceLocation Loc,
SourceLocation AssumptionLoc,
llvm::Value *Alignment,
llvm::Value *OffsetValue) {
- llvm::Value *TheCheck;
- llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
- CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
+ if (Alignment->getType() != IntPtrTy)
+ Alignment =
+ Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align");
+ if (OffsetValue && OffsetValue->getType() != IntPtrTy)
+ OffsetValue =
+ Builder.CreateIntCast(OffsetValue, IntPtrTy, true, "casted.offset");
+ llvm::Value *TheCheck = nullptr;
if (SanOpts.has(SanitizerKind::Alignment)) {
- EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
- OffsetValue, TheCheck, Assumption);
+ llvm::Value *PtrIntValue =
+ Builder.CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
+
+ if (OffsetValue) {
+ bool IsOffsetZero = false;
+ if (const auto *CI = dyn_cast<llvm::ConstantInt>(OffsetValue))
+ IsOffsetZero = CI->isZero();
+
+ if (!IsOffsetZero)
+ PtrIntValue = Builder.CreateSub(PtrIntValue, OffsetValue, "offsetptr");
+ }
+
+ llvm::Value *Zero = llvm::ConstantInt::get(IntPtrTy, 0);
+ llvm::Value *Mask =
+ Builder.CreateSub(Alignment, llvm::ConstantInt::get(IntPtrTy, 1));
+ llvm::Value *MaskedPtr = Builder.CreateAnd(PtrIntValue, Mask, "maskedptr");
+ TheCheck = Builder.CreateICmpEQ(MaskedPtr, Zero, "maskcond");
}
+ llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
+ CGM.getDataLayout(), PtrValue, Alignment, OffsetValue);
+
+ if (!SanOpts.has(SanitizerKind::Alignment))
+ return;
+ emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
+ OffsetValue, TheCheck, Assumption);
}
-void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
+void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
const Expr *E,
SourceLocation AssumptionLoc,
llvm::Value *Alignment,
@@ -2165,7 +2199,7 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
QualType Ty = E->getType();
SourceLocation Loc = E->getExprLoc();
- EmitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
+ emitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
OffsetValue);
}
@@ -2319,8 +2353,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
SmallVector<StringRef, 1> ReqFeatures;
llvm::StringMap<bool> CalleeFeatureMap;
- CGM.getContext().getFunctionFeatureMap(CalleeFeatureMap,
- GlobalDecl(TargetDecl));
+ CGM.getContext().getFunctionFeatureMap(CalleeFeatureMap, TargetDecl);
for (const auto &F : ParsedAttr.Features) {
if (F[0] == '+' && CalleeFeatureMap.lookup(F.substr(1)))
@@ -2433,13 +2466,13 @@ void CodeGenFunction::EmitMultiVersionResolver(
// Loc), the diagnostic will additionally point a "Note:" to this location.
// It should be the location where the __attribute__((assume_aligned))
// was written e.g.
-void CodeGenFunction::EmitAlignmentAssumptionCheck(
+void CodeGenFunction::emitAlignmentAssumptionCheck(
llvm::Value *Ptr, QualType Ty, SourceLocation Loc,
SourceLocation SecondaryLoc, llvm::Value *Alignment,
llvm::Value *OffsetValue, llvm::Value *TheCheck,
llvm::Instruction *Assumption) {
assert(Assumption && isa<llvm::CallInst>(Assumption) &&
- cast<llvm::CallInst>(Assumption)->getCalledValue() ==
+ cast<llvm::CallInst>(Assumption)->getCalledOperand() ==
llvm::Intrinsic::getDeclaration(
Builder.GetInsertBlock()->getParent()->getParent(),
llvm::Intrinsic::assume) &&
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 3d8bc93eb965..d794f4f0fa81 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -26,6 +26,7 @@
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/ExprOpenMP.h"
+#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/Type.h"
#include "clang/Basic/ABI.h"
#include "clang/Basic/CapturedStmt.h"
@@ -36,6 +37,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
@@ -75,7 +77,11 @@ class ObjCAtTryStmt;
class ObjCAtThrowStmt;
class ObjCAtSynchronizedStmt;
class ObjCAutoreleasePoolStmt;
+class OMPUseDevicePtrClause;
+class OMPUseDeviceAddrClause;
class ReturnsNonNullAttr;
+class SVETypeFlags;
+class OMPExecutableDirective;
namespace analyze_os_log {
class OSLogBufferLayout;
@@ -118,6 +124,7 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1) \
SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0) \
SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \
+ SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0) \
SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \
SANITIZER_CHECK(MissingReturn, missing_return, 0) \
SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \
@@ -258,6 +265,9 @@ public:
CodeGenModule &CGM; // Per-module state.
const TargetInfo &Target;
+ // For EH/SEH outlined funclets, this field points to parent's CGF
+ CodeGenFunction *ParentCGF = nullptr;
+
typedef std::pair<llvm::Value *, llvm::Value *> ComplexPairTy;
LoopInfoStack LoopStack;
CGBuilderTy Builder;
@@ -332,6 +342,10 @@ public:
/// This is invalid if sret is not in use.
Address ReturnValuePointer = Address::invalid();
+ /// If a return statement is being visited, this holds the return statment's
+ /// result expression.
+ const Expr *RetExpr = nullptr;
+
/// Return true if a label was seen in the current scope.
bool hasLabelBeenSeenInCurrentScope() const {
if (CurLexicalScope)
@@ -485,6 +499,9 @@ public:
/// region.
bool IsInPreservedAIRegion = false;
+ /// True if the current statement has nomerge attribute.
+ bool InNoMergeAttributedStmt = false;
+
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
llvm::Value *BlockPointer = nullptr;
@@ -533,9 +550,6 @@ public:
unsigned NextCleanupDestIndex = 1;
- /// FirstBlockInfo - The head of a singly-linked-list of block layouts.
- CGBlockInfo *FirstBlockInfo = nullptr;
-
/// EHResumeBlock - Unified block containing a call to llvm.eh.resume.
llvm::BasicBlock *EHResumeBlock = nullptr;
@@ -560,11 +574,49 @@ public:
llvm::BasicBlock *getInvokeDestImpl();
+ /// Parent loop-based directive for scan directive.
+ const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr;
+ llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+ llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+ llvm::BasicBlock *OMPScanExitBlock = nullptr;
+ llvm::BasicBlock *OMPScanDispatch = nullptr;
+ bool OMPFirstScanLoop = false;
+
+ /// Manages parent directive for scan directives.
+ class ParentLoopDirectiveForScanRegion {
+ CodeGenFunction &CGF;
+ const OMPExecutableDirective *ParentLoopDirectiveForScan;
+
+ public:
+ ParentLoopDirectiveForScanRegion(
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &ParentLoopDirectiveForScan)
+ : CGF(CGF),
+ ParentLoopDirectiveForScan(CGF.OMPParentLoopDirectiveForScan) {
+ CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan;
+ }
+ ~ParentLoopDirectiveForScanRegion() {
+ CGF.OMPParentLoopDirectiveForScan = ParentLoopDirectiveForScan;
+ }
+ };
+
template <class T>
typename DominatingValue<T>::saved_type saveValueInCond(T value) {
return DominatingValue<T>::save(*this, value);
}
+ class CGFPOptionsRAII {
+ public:
+ CGFPOptionsRAII(CodeGenFunction &CGF, FPOptions FPFeatures);
+ ~CGFPOptionsRAII();
+
+ private:
+ CodeGenFunction &CGF;
+ FPOptions OldFPFeatures;
+ Optional<CGBuilderTy::FastMathFlagGuard> FMFGuard;
+ };
+ FPOptions CurFPFeatures;
+
public:
/// ObjCEHValueStack - Stack of Objective-C exception values, used for
/// rethrows.
@@ -1541,6 +1593,169 @@ public:
CallArgList OldCXXInheritedCtorInitExprArgs;
};
+ // Helper class for the OpenMP IR Builder. Allows reusability of code used for
+ // region body, and finalization codegen callbacks. This will class will also
+ // contain privatization functions used by the privatization call backs
+ //
+ // TODO: this is temporary class for things that are being moved out of
+ // CGOpenMPRuntime, new versions of current CodeGenFunction methods, or
+ // utility function for use with the OMPBuilder. Once that move to use the
+ // OMPBuilder is done, everything here will either become part of CodeGenFunc.
+ // directly, or a new helper class that will contain functions used by both
+ // this and the OMPBuilder
+
+ struct OMPBuilderCBHelpers {
+
+ OMPBuilderCBHelpers() = delete;
+ OMPBuilderCBHelpers(const OMPBuilderCBHelpers &) = delete;
+ OMPBuilderCBHelpers &operator=(const OMPBuilderCBHelpers &) = delete;
+
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ /// Cleanup action for allocate support.
+ class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
+
+ private:
+ llvm::CallInst *RTLFnCI;
+
+ public:
+ OMPAllocateCleanupTy(llvm::CallInst *RLFnCI) : RTLFnCI(RLFnCI) {
+ RLFnCI->removeFromParent();
+ }
+
+ void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+ if (!CGF.HaveInsertPoint())
+ return;
+ CGF.Builder.Insert(RTLFnCI);
+ }
+ };
+
+ /// Returns address of the threadprivate variable for the current
+ /// thread. This Also create any necessary OMP runtime calls.
+ ///
+ /// \param VD VarDecl for Threadprivate variable.
+ /// \param VDAddr Address of the Vardecl
+ /// \param Loc The location where the barrier directive was encountered
+ static Address getAddrOfThreadPrivate(CodeGenFunction &CGF,
+ const VarDecl *VD, Address VDAddr,
+ SourceLocation Loc);
+
+ /// Gets the OpenMP-specific address of the local variable /p VD.
+ static Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+ const VarDecl *VD);
+ /// Get the platform-specific name separator.
+ /// \param Parts different parts of the final name that needs separation
+ /// \param FirstSeparator First separator used between the initial two
+ /// parts of the name.
+ /// \param Separator separator used between all of the rest consecutinve
+ /// parts of the name
+ static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
+ StringRef FirstSeparator = ".",
+ StringRef Separator = ".");
+ /// Emit the Finalization for an OMP region
+ /// \param CGF The Codegen function this belongs to
+ /// \param IP Insertion point for generating the finalization code.
+ static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) {
+ CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+ assert(IP.getBlock()->end() != IP.getPoint() &&
+ "OpenMP IR Builder should cause terminated block!");
+
+ llvm::BasicBlock *IPBB = IP.getBlock();
+ llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor();
+ assert(DestBB && "Finalization block should have one successor!");
+
+ // erase and replace with cleanup branch.
+ IPBB->getTerminator()->eraseFromParent();
+ CGF.Builder.SetInsertPoint(IPBB);
+ CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB);
+ CGF.EmitBranchThroughCleanup(Dest);
+ }
+
+ /// Emit the body of an OMP region
+ /// \param CGF The Codegen function this belongs to
+ /// \param RegionBodyStmt The body statement for the OpenMP region being
+ /// generated
+ /// \param CodeGenIP Insertion point for generating the body code.
+ /// \param FiniBB The finalization basic block
+ static void EmitOMPRegionBody(CodeGenFunction &CGF,
+ const Stmt *RegionBodyStmt,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+ if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator())
+ CodeGenIPBBTI->eraseFromParent();
+
+ CGF.Builder.SetInsertPoint(CodeGenIPBB);
+
+ CGF.EmitStmt(RegionBodyStmt);
+
+ if (CGF.Builder.saveIP().isSet())
+ CGF.Builder.CreateBr(&FiniBB);
+ }
+
+ /// RAII for preserving necessary info during Outlined region body codegen.
+ class OutlinedRegionBodyRAII {
+
+ llvm::AssertingVH<llvm::Instruction> OldAllocaIP;
+ CodeGenFunction::JumpDest OldReturnBlock;
+ CGBuilderTy::InsertPoint IP;
+ CodeGenFunction &CGF;
+
+ public:
+ OutlinedRegionBodyRAII(CodeGenFunction &cgf, InsertPointTy &AllocaIP,
+ llvm::BasicBlock &RetBB)
+ : CGF(cgf) {
+ assert(AllocaIP.isSet() &&
+ "Must specify Insertion point for allocas of outlined function");
+ OldAllocaIP = CGF.AllocaInsertPt;
+ CGF.AllocaInsertPt = &*AllocaIP.getPoint();
+ IP = CGF.Builder.saveIP();
+
+ OldReturnBlock = CGF.ReturnBlock;
+ CGF.ReturnBlock = CGF.getJumpDestInCurrentScope(&RetBB);
+ }
+
+ ~OutlinedRegionBodyRAII() {
+ CGF.AllocaInsertPt = OldAllocaIP;
+ CGF.ReturnBlock = OldReturnBlock;
+ CGF.Builder.restoreIP(IP);
+ }
+ };
+
+ /// RAII for preserving necessary info during inlined region body codegen.
+ class InlinedRegionBodyRAII {
+
+ llvm::AssertingVH<llvm::Instruction> OldAllocaIP;
+ CodeGenFunction &CGF;
+
+ public:
+ InlinedRegionBodyRAII(CodeGenFunction &cgf, InsertPointTy &AllocaIP,
+ llvm::BasicBlock &FiniBB)
+ : CGF(cgf) {
+ // Alloca insertion block should be in the entry block of the containing
+ // function so it expects an empty AllocaIP in which case will reuse the
+ // old alloca insertion point, or a new AllocaIP in the same block as
+ // the old one
+ assert((!AllocaIP.isSet() ||
+ CGF.AllocaInsertPt->getParent() == AllocaIP.getBlock()) &&
+ "Insertion point should be in the entry block of containing "
+ "function!");
+ OldAllocaIP = CGF.AllocaInsertPt;
+ if (AllocaIP.isSet())
+ CGF.AllocaInsertPt = &*AllocaIP.getPoint();
+
+ // TODO: Remove the call, after making sure the counter is not used by
+ // the EHStack.
+ // Since this is an inlined region, it should not modify the
+ // ReturnBlock, and should reuse the one for the enclosing outlined
+ // region. So, the JumpDest being return by the function is discarded
+ (void)CGF.getJumpDestInCurrentScope(&FiniBB);
+ }
+
+ ~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; }
+ };
+ };
+
private:
/// CXXThisDecl - When generating code for a C++ member function,
/// this will hold the implicit 'this' declaration.
@@ -1772,7 +1987,6 @@ public:
/// information about the block, including the block invoke function, the
/// captured variables, etc.
llvm::Value *EmitBlockLiteral(const BlockExpr *);
- static void destroyBlockInfos(CGBlockInfo *info);
llvm::Function *GenerateBlockFunction(GlobalDecl GD,
const CGBlockInfo &Info,
@@ -2155,13 +2369,6 @@ public:
LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T);
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T);
- CharUnits getNaturalTypeAlignment(QualType T,
- LValueBaseInfo *BaseInfo = nullptr,
- TBAAAccessInfo *TBAAInfo = nullptr,
- bool forPointeeType = false);
- CharUnits getNaturalPointeeTypeAlignment(QualType T,
- LValueBaseInfo *BaseInfo = nullptr,
- TBAAAccessInfo *TBAAInfo = nullptr);
Address EmitLoadOfReference(LValue RefLVal,
LValueBaseInfo *PointeeBaseInfo = nullptr,
@@ -2264,8 +2471,9 @@ public:
/// CreateAggTemp - Create a temporary memory object for the given
/// aggregate type.
- AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp") {
- return AggValueSlot::forAddr(CreateMemTemp(T, Name),
+ AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp",
+ Address *Alloca = nullptr) {
+ return AggValueSlot::forAddr(CreateMemTemp(T, Name, Alloca),
T.getQualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
@@ -2594,7 +2802,8 @@ public:
Address EmitCXXUuidofExpr(const CXXUuidofExpr *E);
/// Situations in which we might emit a check for the suitability of a
- /// pointer or glvalue.
+ /// pointer or glvalue. Needs to be kept in sync with ubsan_handlers.cpp in
+ /// compiler-rt.
enum TypeCheckKind {
/// Checking the operand of a load. Must be suitably sized and aligned.
TCK_Load,
@@ -2826,7 +3035,7 @@ public:
PeepholeProtection protectFromPeepholes(RValue rvalue);
void unprotectFromPeepholes(PeepholeProtection protection);
- void EmitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty,
+ void emitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty,
SourceLocation Loc,
SourceLocation AssumptionLoc,
llvm::Value *Alignment,
@@ -2834,13 +3043,14 @@ public:
llvm::Value *TheCheck,
llvm::Instruction *Assumption);
- void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
+ void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
SourceLocation Loc, SourceLocation AssumptionLoc,
llvm::Value *Alignment,
llvm::Value *OffsetValue = nullptr);
- void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E,
- SourceLocation AssumptionLoc, llvm::Value *Alignment,
+ void emitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E,
+ SourceLocation AssumptionLoc,
+ llvm::Value *Alignment,
llvm::Value *OffsetValue = nullptr);
//===--------------------------------------------------------------------===//
@@ -2983,7 +3193,8 @@ public:
llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
- llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
+ llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+ SourceLocation Loc);
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
SmallVectorImpl<llvm::Value *> &CapturedVars);
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
@@ -3037,7 +3248,10 @@ public:
void EmitOMPPrivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
void EmitOMPUseDevicePtrClause(
- const OMPClause &C, OMPPrivateScope &PrivateScope,
+ const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
+ const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
+ void EmitOMPUseDeviceAddrClause(
+ const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
/// Emit code for copyin clause in \a D directive. The next code is
/// generated at the start of outlined functions for directives:
@@ -3091,7 +3305,8 @@ public:
/// proper codegen in internal captured statement.
///
void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
- OMPPrivateScope &PrivateScope);
+ OMPPrivateScope &PrivateScope,
+ bool ForInscan = false);
/// Emit final update of reduction values to original variables at
/// the end of the directive.
///
@@ -3149,6 +3364,8 @@ public:
void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S);
void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S);
void EmitOMPFlushDirective(const OMPFlushDirective &S);
+ void EmitOMPDepobjDirective(const OMPDepobjDirective &S);
+ void EmitOMPScanDirective(const OMPScanDirective &S);
void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
void EmitOMPTargetDirective(const OMPTargetDirective &S);
@@ -3250,8 +3467,8 @@ public:
/// \param PostIncGen Genrator for post-increment code (required for ordered
/// loop directvies).
void EmitOMPInnerLoop(
- const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
- const Expr *IncExpr,
+ const OMPExecutableDirective &S, bool RequiresCleanup,
+ const Expr *LoopCond, const Expr *IncExpr,
const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
const llvm::function_ref<void(CodeGenFunction &)> PostIncGen);
@@ -3517,6 +3734,7 @@ public:
LValue EmitUnaryOpLValue(const UnaryOperator *E);
LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
bool Accessed = false);
+ LValue EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E);
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
bool IsLowerBound = true);
LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
@@ -3722,6 +3940,8 @@ public:
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue);
+ RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue);
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E, ReturnValueSlot ReturnValue);
@@ -3757,6 +3977,13 @@ public:
llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch);
+ llvm::Value *EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+ ReturnValueSlot ReturnValue,
+ llvm::Triple::ArchType Arch);
+ llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::IntegerType *ITy,
+ QualType RTy);
+ llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::ArrayType *ATy,
+ QualType RTy);
llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
unsigned LLVMIntrinsic,
@@ -3775,12 +4002,62 @@ public:
SmallVectorImpl<llvm::Value*> &O,
const char *name,
unsigned shift = 0, bool rightshift = false);
+ llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx,
+ const llvm::ElementCount &Count);
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty,
bool negateForRightShift);
llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
llvm::Type *Ty, bool usgn, const char *name);
llvm::Value *vectorWrapScalar16(llvm::Value *Op);
+ /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
+ /// access builtin. Only required if it can't be inferred from the base
+ /// pointer operand.
+ llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags);
+
+ SmallVector<llvm::Type *, 2> getSVEOverloadTypes(SVETypeFlags TypeFlags,
+ llvm::Type *ReturnType,
+ ArrayRef<llvm::Value *> Ops);
+ llvm::Type *getEltType(SVETypeFlags TypeFlags);
+ llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags);
+ llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags);
+ llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags);
+ llvm::Value *EmitSVEDupX(llvm::Value *Scalar);
+ llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty);
+ llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty);
+ llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned BuiltinID);
+ llvm::Value *EmitSVEMovl(SVETypeFlags TypeFlags,
+ llvm::ArrayRef<llvm::Value *> Ops,
+ unsigned BuiltinID);
+ llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred,
+ llvm::ScalableVectorType *VTy);
+ llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned BuiltinID, bool IsZExtReturn);
+ llvm::Value *EmitSVEMaskedStore(const CallExpr *,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned BuiltinID);
+ llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned BuiltinID);
+ llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags,
+ SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID);
+ llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
llvm::Triple::ArchType Arch);
llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
@@ -3794,6 +4071,9 @@ public:
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E);
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
+ llvm::AtomicOrdering &AO,
+ llvm::SyncScope::ID &SSID);
private:
enum class MSVCIntrin;
@@ -3924,6 +4204,10 @@ public:
/// aggregate type into a temporary LValue.
LValue EmitAggExprToLValue(const Expr *E);
+ /// Build all the stores needed to initialize an aggregate at Dest with the
+ /// value Val.
+ void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
+
/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
/// make sure it survives garbage collection until this point.
void EmitExtendGCLifetime(llvm::Value *object);
@@ -3974,6 +4258,9 @@ public:
/// Call atexit() with function dtorStub.
void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);
+ /// Call unatexit() with function dtorStub.
+ llvm::Value *unregisterGlobalDtorWithUnAtExit(llvm::Function *dtorStub);
+
/// Emit code in this function to perform a guarded variable
/// initialization. Guarded initializations are used when it's not
/// possible to prove that an initialization will be done exactly
@@ -3997,12 +4284,12 @@ public:
ArrayRef<llvm::Function *> CXXThreadLocals,
ConstantAddress Guard = ConstantAddress::invalid());
- /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
+ /// GenerateCXXGlobalCleanUpFunc - Generates code for cleaning up global
/// variables.
- void GenerateCXXGlobalDtorsFunc(
+ void GenerateCXXGlobalCleanUpFunc(
llvm::Function *Fn,
const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH,
- llvm::Constant *>> &DtorsAndObjects);
+ llvm::Constant *>> &DtorsOrStermFinalizers);
void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
const VarDecl *D,
@@ -4013,14 +4300,6 @@ public:
void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp);
- void enterFullExpression(const FullExpr *E) {
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(E))
- if (EWC->getNumObjects() == 0)
- return;
- enterNonTrivialFullExpression(E);
- }
- void enterNonTrivialFullExpression(const FullExpr *E);
-
void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true);
RValue EmitAtomicExpr(AtomicExpr *E);
@@ -4175,6 +4454,9 @@ public:
/// SetFPModel - Control floating point behavior via fp-model settings.
void SetFPModel();
+ /// Set the codegen fast-math flags.
+ void SetFastMathFlags(FPOptions FPFeatures);
+
private:
llvm::MDNode *getRangeForLoadFromType(QualType Ty);
void EmitReturnOfRValue(RValue RV, QualType Ty);
@@ -4195,7 +4477,7 @@ private:
///
/// \param AI - The first function argument of the expansion.
void ExpandTypeFromArgs(QualType Ty, LValue Dst,
- SmallVectorImpl<llvm::Value *>::iterator &AI);
+ llvm::Function::arg_iterator &AI);
/// ExpandTypeToArgs - Expand an CallArg \arg Arg, with the LLVM type for \arg
/// Ty, into individual arguments on the provided vector \arg IRCallArgs,
@@ -4411,10 +4693,15 @@ inline llvm::Value *DominatingLLVMValue::restore(CodeGenFunction &CGF,
// Otherwise, it should be an alloca instruction, as set up in save().
auto alloca = cast<llvm::AllocaInst>(value.getPointer());
- return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment());
+ return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlign());
}
} // end namespace CodeGen
+
+// Map the LangOption for floating point exception behavior into
+// the corresponding enum in the IR.
+llvm::fp::ExceptionBehavior
+ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind);
} // end namespace clang
#endif
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 57beda26677c..4ae8ce7e5ccf 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -38,6 +38,7 @@
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
@@ -83,6 +84,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
case TargetCXXABI::GenericMIPS:
case TargetCXXABI::GenericItanium:
case TargetCXXABI::WebAssembly:
+ case TargetCXXABI::XL:
return CreateItaniumCXXABI(CGM);
case TargetCXXABI::Microsoft:
return CreateMicrosoftCXXABI(CGM);
@@ -110,6 +112,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
Int32Ty = llvm::Type::getInt32Ty(LLVMContext);
Int64Ty = llvm::Type::getInt64Ty(LLVMContext);
HalfTy = llvm::Type::getHalfTy(LLVMContext);
+ BFloatTy = llvm::Type::getBFloatTy(LLVMContext);
FloatTy = llvm::Type::getFloatTy(LLVMContext);
DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
PointerWidthInBits = C.getTargetInfo().getPointerWidth(0);
@@ -219,14 +222,6 @@ void CodeGenModule::createOpenMPRuntime() {
OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
break;
}
-
- // The OpenMP-IR-Builder should eventually replace the above runtime codegens
- // but we are not there yet so they both reside in CGModule for now and the
- // OpenMP-IR-Builder is opt-in only.
- if (LangOpts.OpenMPIRBuilder) {
- OMPBuilder.reset(new llvm::OpenMPIRBuilder(TheModule));
- OMPBuilder->initialize();
- }
}
void CodeGenModule::createCUDARuntime() {
@@ -408,7 +403,7 @@ void CodeGenModule::Release() {
checkAliases();
emitMultiVersionFunctions();
EmitCXXGlobalInitFunc();
- EmitCXXGlobalDtorFunc();
+ EmitCXXGlobalCleanUpFunc();
registerGlobalDtorsWithAtExit();
EmitCXXThreadLocalInitFunc();
if (ObjCRuntime)
@@ -447,6 +442,10 @@ void CodeGenModule::Release() {
CodeGenFunction(*this).EmitCfiCheckStub();
}
emitAtAvailableLinkGuard();
+ if (Context.getTargetInfo().getTriple().isWasm() &&
+ !Context.getTargetInfo().getTriple().isOSEmscripten()) {
+ EmitMainVoidAlias();
+ }
emitLLVMUsed();
if (SanStats)
SanStats->finish();
@@ -483,6 +482,14 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version",
CodeGenOpts.DwarfVersion);
}
+
+ if (Context.getLangOpts().SemanticInterposition)
+ // Require various optimization to respect semantic interposition.
+ getModule().setSemanticInterposition(1);
+ else if (Context.getLangOpts().ExplicitNoSemanticInterposition)
+ // Allow dso_local on applicable targets.
+ getModule().setSemanticInterposition(0);
+
if (CodeGenOpts.EmitCodeView) {
// Indicate that we want CodeView in the metadata.
getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1);
@@ -513,7 +520,7 @@ void CodeGenModule::Release() {
"StrictVTablePointersRequirement",
llvm::MDNode::get(VMContext, Ops));
}
- if (DebugInfo)
+ if (getModuleDebugInfo())
// We support a single version in the linked module. The LLVM
// parser will drop debug info with a different version number
// (and warn about it, too).
@@ -537,11 +544,26 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
}
+ if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) {
+ StringRef ABIStr = Target.getABI();
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ getModule().addModuleFlag(llvm::Module::Error, "target-abi",
+ llvm::MDString::get(Ctx, ABIStr));
+ }
+
if (CodeGenOpts.SanitizeCfiCrossDso) {
// Indicate that we want cross-DSO control flow integrity checks.
getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
}
+ if (CodeGenOpts.WholeProgramVTables) {
+ // Indicate whether VFE was enabled for this module, so that the
+ // vcall_visibility metadata added under whole program vtables is handled
+ // appropriately in the optimizer.
+ getModule().addModuleFlag(llvm::Module::Error, "Virtual Function Elim",
+ CodeGenOpts.VirtualFunctionElimination);
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) {
getModule().addModuleFlag(llvm::Module::Override,
"CFI Canonical Jump Tables",
@@ -567,7 +589,8 @@ void CodeGenModule::Release() {
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
// property.)
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
- CodeGenOpts.FlushDenorm ? 1 : 0);
+ CodeGenOpts.FP32DenormalMode.Output !=
+ llvm::DenormalMode::IEEE);
}
// Emit OpenCL specific module metadata: OpenCL/SPIR version.
@@ -623,8 +646,8 @@ void CodeGenModule::Release() {
if (getCodeGenOpts().EmitGcovArcs || getCodeGenOpts().EmitGcovNotes)
EmitCoverageFile();
- if (DebugInfo)
- DebugInfo->finalize();
+ if (CGDebugInfo *DI = getModuleDebugInfo())
+ DI->finalize();
if (getCodeGenOpts().EmitVersionIdentMetadata)
EmitVersionIdentMetadata();
@@ -632,7 +655,9 @@ void CodeGenModule::Release() {
if (!getCodeGenOpts().RecordCommandLine.empty())
EmitCommandLineMetadata();
- EmitTargetMetadata();
+ getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
+
+ EmitBackendOptionsMetadata(getCodeGenOpts());
}
void CodeGenModule::EmitOpenCLMetadata() {
@@ -652,6 +677,19 @@ void CodeGenModule::EmitOpenCLMetadata() {
OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
}
+void CodeGenModule::EmitBackendOptionsMetadata(
+ const CodeGenOptions CodeGenOpts) {
+ switch (getTriple().getArch()) {
+ default:
+ break;
+ case llvm::Triple::riscv32:
+ case llvm::Triple::riscv64:
+ getModule().addModuleFlag(llvm::Module::Error, "SmallDataLimit",
+ CodeGenOpts.SmallDataLimit);
+ break;
+ }
+}
+
void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
// Make sure that this type is translated.
Types.UpdateCompletedType(TD);
@@ -671,6 +709,19 @@ llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) {
TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) {
if (!TBAA)
return TBAAAccessInfo();
+ if (getLangOpts().CUDAIsDevice) {
+ // As CUDA builtin surface/texture types are replaced, skip generating TBAA
+ // access info.
+ if (AccessType->isCUDADeviceBuiltinSurfaceType()) {
+ if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() !=
+ nullptr)
+ return TBAAAccessInfo();
+ } else if (AccessType->isCUDADeviceBuiltinTextureType()) {
+ if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() !=
+ nullptr)
+ return TBAAAccessInfo();
+ }
+ }
return TBAA->getAccessInfo(AccessType);
}
@@ -856,7 +907,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static)
return true;
- // Otherwise don't assue it is local.
+ // Otherwise don't assume it is local.
return false;
}
@@ -912,9 +963,9 @@ static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) {
.Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel);
}
-static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(
- CodeGenOptions::TLSModel M) {
- switch (M) {
+llvm::GlobalVariable::ThreadLocalMode
+CodeGenModule::GetDefaultLLVMTLSModel() const {
+ switch (CodeGenOpts.getDefaultTLSModel()) {
case CodeGenOptions::GeneralDynamicTLSModel:
return llvm::GlobalVariable::GeneralDynamicTLSModel;
case CodeGenOptions::LocalDynamicTLSModel:
@@ -931,7 +982,7 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const {
assert(D.getTLSKind() && "setting TLS mode on non-TLS var!");
llvm::GlobalValue::ThreadLocalMode TLM;
- TLM = GetLLVMTLSModel(CodeGenOpts.getDefaultTLSModel());
+ TLM = GetDefaultLLVMTLSModel();
// Override the TLS model if it is explicitly specified.
if (const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>()) {
@@ -997,23 +1048,19 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
MangleContext &MC = CGM.getCXXABI().getMangleContext();
- if (MC.shouldMangleDeclName(ND)) {
- llvm::raw_svector_ostream Out(Buffer);
- if (const auto *D = dyn_cast<CXXConstructorDecl>(ND))
- MC.mangleCXXCtor(D, GD.getCtorType(), Out);
- else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND))
- MC.mangleCXXDtor(D, GD.getDtorType(), Out);
- else
- MC.mangleName(ND, Out);
- } else {
+ if (MC.shouldMangleDeclName(ND))
+ MC.mangleName(GD.getWithDecl(ND), Out);
+ else {
IdentifierInfo *II = ND->getIdentifier();
assert(II && "Attempt to mangle unnamed decl.");
const auto *FD = dyn_cast<FunctionDecl>(ND);
if (FD &&
FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
- llvm::raw_svector_ostream Out(Buffer);
Out << "__regcall3__" << II->getName();
+ } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+ Out << "__device_stub__" << II->getName();
} else {
Out << II->getName();
}
@@ -1036,7 +1083,7 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
}
}
- return Out.str();
+ return std::string(Out.str());
}
void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD,
@@ -1101,11 +1148,25 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
const auto *ND = cast<NamedDecl>(GD.getDecl());
std::string MangledName = getMangledNameImpl(*this, GD, ND);
- // Adjust kernel stub mangling as we may need to be able to differentiate
- // them from the kernel itself (e.g., for HIP).
- if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
- if (!getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>())
- MangledName = getCUDARuntime().getDeviceStubName(MangledName);
+ // Ensure either we have different ABIs between host and device compilations,
+ // says host compilation following MSVC ABI but device compilation follows
+ // Itanium C++ ABI or, if they follow the same ABI, kernel names after
+ // mangling should be the same after name stubbing. The later checking is
+ // very important as the device kernel name being mangled in host-compilation
+ // is used to resolve the device binaries to be executed. Inconsistent naming
+ // result in undefined behavior. Even though we cannot check that naming
+ // directly between host- and device-compilations, the host- and
+ // device-mangling in host compilation could help catching certain ones.
+ assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() ||
+ getLangOpts().CUDAIsDevice ||
+ (getContext().getAuxTargetInfo() &&
+ (getContext().getAuxTargetInfo()->getCXXABI() !=
+ getContext().getTargetInfo().getCXXABI())) ||
+ getCUDARuntime().getDeviceSideName(ND) ==
+ getMangledNameImpl(
+ *this,
+ GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel),
+ ND));
auto Result = Manglings.insert(std::make_pair(MangledName, GD));
return MangledDeclNames[CanonicalGD] = Result.first->first();
@@ -1357,7 +1418,7 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
std::string typeName;
if (isPipe)
typeName = ty.getCanonicalType()
- ->getAs<PipeType>()
+ ->castAs<PipeType>()
->getElementType()
.getAsString(Policy);
else
@@ -1371,7 +1432,7 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
std::string baseTypeName;
if (isPipe)
baseTypeName = ty.getCanonicalType()
- ->getAs<PipeType>()
+ ->castAs<PipeType>()
->getElementType()
.getCanonicalType()
.getAsString(Policy);
@@ -1493,6 +1554,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if (CodeGenOpts.UnwindTables)
B.addAttribute(llvm::Attribute::UWTable);
+ if (CodeGenOpts.StackClashProtector)
+ B.addAttribute("probe-stack", "inline-asm");
+
if (!hasUnwindExceptions(LangOpts))
B.addAttribute(llvm::Attribute::NoUnwind);
@@ -1840,9 +1904,16 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
else if (const auto *SA = FD->getAttr<SectionAttr>())
F->setSection(SA->getName());
+ // If we plan on emitting this inline builtin, we can't treat it as a builtin.
if (FD->isInlineBuiltinDeclaration()) {
- F->addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoBuiltin);
+ const FunctionDecl *FDBody;
+ bool HasBody = FD->hasBody(FDBody);
+ (void)HasBody;
+ assert(HasBody && "Inline builtin declarations should always have an "
+ "available body!");
+ if (shouldEmitFunction(FDBody))
+ F->addAttribute(llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoBuiltin);
}
if (FD->isReplaceableGlobalAllocationFunction()) {
@@ -1850,15 +1921,6 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
// default, only if it is invoked by a new-expression or delete-expression.
F->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoBuiltin);
-
- // A sane operator new returns a non-aliasing pointer.
- // FIXME: Also add NonNull attribute to the return value
- // for the non-nothrow forms?
- auto Kind = FD->getDeclName().getCXXOverloadedOperator();
- if (getCodeGenOpts().AssumeSaneOperatorNew &&
- (Kind == OO_New || Kind == OO_Array_New))
- F->addAttribute(llvm::AttributeList::ReturnIndex,
- llvm::Attribute::NoAlias);
}
if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD))
@@ -2375,13 +2437,8 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
return true;
}
-ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
- const CXXUuidofExpr* E) {
- // Sema has verified that IIDSource has a __declspec(uuid()), and that its
- // well-formed.
- StringRef Uuid = E->getUuidStr();
- std::string Name = "_GUID_" + Uuid.lower();
- std::replace(Name.begin(), Name.end(), '-', '_');
+ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) {
+ StringRef Name = getMangledName(GD);
// The UUID descriptor should be pointer aligned.
CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes);
@@ -2390,8 +2447,30 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name))
return ConstantAddress(GV, Alignment);
- llvm::Constant *Init = EmitUuidofInitializer(Uuid);
- assert(Init && "failed to initialize as constant");
+ ConstantEmitter Emitter(*this);
+ llvm::Constant *Init;
+
+ APValue &V = GD->getAsAPValue();
+ if (!V.isAbsent()) {
+ // If possible, emit the APValue version of the initializer. In particular,
+ // this gets the type of the constant right.
+ Init = Emitter.emitForInitializer(
+ GD->getAsAPValue(), GD->getType().getAddressSpace(), GD->getType());
+ } else {
+ // As a fallback, directly construct the constant.
+ // FIXME: This may get padding wrong under esoteric struct layout rules.
+ // MSVC appears to create a complete type 'struct __s_GUID' that it
+ // presumably uses to represent these constants.
+ MSGuidDecl::Parts Parts = GD->getParts();
+ llvm::Constant *Fields[4] = {
+ llvm::ConstantInt::get(Int32Ty, Parts.Part1),
+ llvm::ConstantInt::get(Int16Ty, Parts.Part2),
+ llvm::ConstantInt::get(Int16Ty, Parts.Part3),
+ llvm::ConstantDataArray::getRaw(
+ StringRef(reinterpret_cast<char *>(Parts.Part4And5), 8), 8,
+ Int8Ty)};
+ Init = llvm::ConstantStruct::getAnon(Fields);
+ }
auto *GV = new llvm::GlobalVariable(
getModule(), Init->getType(),
@@ -2399,7 +2478,16 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
if (supportsCOMDAT())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
setDSOLocal(GV);
- return ConstantAddress(GV, Alignment);
+
+ llvm::Constant *Addr = GV;
+ if (!V.isAbsent()) {
+ Emitter.finalize(GV);
+ } else {
+ llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType());
+ Addr = llvm::ConstantExpr::getBitCast(
+ GV, Ty->getPointerTo(GV->getAddressSpace()));
+ }
+ return ConstantAddress(Addr, Alignment);
}
ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
@@ -2461,7 +2549,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
!Global->hasAttr<CUDAGlobalAttr>() &&
!Global->hasAttr<CUDAConstantAttr>() &&
!Global->hasAttr<CUDASharedAttr>() &&
- !(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>()))
+ !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
+ !Global->getType()->isCUDADeviceBuiltinTextureType())
return;
} else {
// We need to emit host-side 'shadows' for all global
@@ -2554,11 +2643,6 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
return;
}
- // Check if this must be emitted as declare variant.
- if (LangOpts.OpenMP && isa<FunctionDecl>(Global) && OpenMPRuntime &&
- OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/false))
- return;
-
// If we're deferring emission of a C++ variable with an
// initializer, remember the order in which it appeared in the file.
if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) &&
@@ -2741,8 +2825,8 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
// PR9614. Avoid cases where the source code is lying to us. An available
// externally function should have an equivalent function somewhere else,
- // but a function that calls itself is clearly not equivalent to the real
- // implementation.
+ // but a function that calls itself through asm label/`__builtin_` trickery is
+ // clearly not equivalent to the real implementation.
// This happens in glibc's btowc and in some configure checks.
return !isTriviallyRecursive(F);
}
@@ -2764,50 +2848,6 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
EmitGlobalFunctionDefinition(GD, GV);
}
-void CodeGenModule::emitOpenMPDeviceFunctionRedefinition(
- GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) {
- assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
- OpenMPRuntime && "Expected OpenMP device mode.");
- const auto *D = cast<FunctionDecl>(OldGD.getDecl());
-
- // Compute the function info and LLVM type.
- const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD);
- llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
-
- // Get or create the prototype for the function.
- if (!GV || (GV->getType()->getElementType() != Ty)) {
- GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction(
- getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false,
- /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(),
- ForDefinition));
- SetFunctionAttributes(OldGD, cast<llvm::Function>(GV),
- /*IsIncompleteFunction=*/false,
- /*IsThunk=*/false);
- }
- // We need to set linkage and visibility on the function before
- // generating code for it because various parts of IR generation
- // want to propagate this information down (e.g. to local static
- // declarations).
- auto *Fn = cast<llvm::Function>(GV);
- setFunctionLinkage(OldGD, Fn);
-
- // FIXME: this is redundant with part of
- // setFunctionDefinitionAttributes
- setGVProperties(Fn, OldGD);
-
- MaybeHandleStaticInExternC(D, Fn);
-
- maybeSetTrivialComdat(*D, *Fn);
-
- CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI);
-
- setNonAliasAttributes(OldGD, Fn);
- SetLLVMFunctionAttributesForDefinition(D, Fn);
-
- if (D->hasAttr<AnnotateAttr>())
- AddGlobalAnnotations(D, Fn);
-}
-
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const auto *D = cast<ValueDecl>(GD.getDecl());
@@ -3122,14 +3162,9 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
EmitGlobal(GDDef);
}
}
- // Check if this must be emitted as declare variant and emit reference to
- // the the declare variant function.
- if (LangOpts.OpenMP && OpenMPRuntime)
- (void)OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true);
if (FD->isMultiVersion()) {
- const auto *TA = FD->getAttr<TargetAttr>();
- if (TA && TA->isDefaultVersion())
+ if (FD->hasAttr<TargetAttr>())
UpdateMultiVersionNames(GD, FD);
if (!IsForDefinition)
return GetOrCreateMultiVersionResolver(GD, Ty, FD);
@@ -3169,7 +3204,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
}
if ((isa<llvm::Function>(Entry) || isa<llvm::GlobalAlias>(Entry)) &&
- (Entry->getType()->getElementType() == Ty)) {
+ (Entry->getValueType() == Ty)) {
return Entry;
}
@@ -3218,7 +3253,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
}
llvm::Constant *BC = llvm::ConstantExpr::getBitCast(
- F, Entry->getType()->getElementType()->getPointerTo());
+ F, Entry->getValueType()->getPointerTo());
addGlobalValReplacement(Entry, BC);
}
@@ -3277,7 +3312,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// Make sure the result is of the requested type.
if (!IsIncompleteFunction) {
- assert(F->getType()->getElementType() == Ty);
+ assert(F->getFunctionType() == Ty);
return F;
}
@@ -3293,6 +3328,8 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
bool ForVTable,
bool DontDefer,
ForDefinition_t IsForDefinition) {
+ assert(!cast<FunctionDecl>(GD.getDecl())->isConsteval() &&
+ "consteval function should never be emitted");
// If there was no specific requested type, just convert it now.
if (!Ty) {
const auto *FD = cast<FunctionDecl>(GD.getDecl());
@@ -3568,7 +3605,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl);
if (Init) {
auto *InitType = Init->getType();
- if (GV->getType()->getElementType() != InitType) {
+ if (GV->getValueType() != InitType) {
// The type of the initializer does not match the definition.
// This happens when an initializer has a different type from
// the type of the global (because of padding at the end of a
@@ -3611,26 +3648,29 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
}
llvm::Constant *
-CodeGenModule::GetAddrOfGlobal(GlobalDecl GD,
- ForDefinition_t IsForDefinition) {
+CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) {
const Decl *D = GD.getDecl();
+
if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr,
/*DontDefer=*/false, IsForDefinition);
- else if (isa<CXXMethodDecl>(D)) {
- auto FInfo = &getTypes().arrangeCXXMethodDeclaration(
- cast<CXXMethodDecl>(D));
+
+ if (isa<CXXMethodDecl>(D)) {
+ auto FInfo =
+ &getTypes().arrangeCXXMethodDeclaration(cast<CXXMethodDecl>(D));
auto Ty = getTypes().GetFunctionType(*FInfo);
return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
IsForDefinition);
- } else if (isa<FunctionDecl>(D)) {
+ }
+
+ if (isa<FunctionDecl>(D)) {
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
IsForDefinition);
- } else
- return GetAddrOfGlobalVar(cast<VarDecl>(D), /*Ty=*/nullptr,
- IsForDefinition);
+ }
+
+ return GetAddrOfGlobalVar(cast<VarDecl>(D), /*Ty=*/nullptr, IsForDefinition);
}
llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
@@ -3641,7 +3681,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
if (GV) {
// Check if the variable has the right type.
- if (GV->getType()->getElementType() == Ty)
+ if (GV->getValueType() == Ty)
return GV;
// Because C++ name mangling, the only way we can end up with an already
@@ -3915,12 +3955,16 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
!getLangOpts().CUDAIsDevice &&
(D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
D->hasAttr<CUDASharedAttr>());
+ bool IsCUDADeviceShadowVar =
+ getLangOpts().CUDAIsDevice &&
+ (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ D->getType()->isCUDADeviceBuiltinTextureType());
// HIP pinned shadow of initialized host-side global variables are also
// left undefined.
- bool IsHIPPinnedShadowVar =
- getLangOpts().CUDAIsDevice && D->hasAttr<HIPPinnedShadowAttr>();
if (getLangOpts().CUDA &&
- (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar))
+ (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar))
+ Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+ else if (D->hasAttr<LoaderUninitializedAttr>())
Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
else if (!InitExpr) {
// This is a tentative definition; tentative definitions are
@@ -3979,7 +4023,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// "extern int x[];") and then a definition of a different type (e.g.
// "int x[10];"). This also happens when an initializer has a different type
// from the type of the global (this happens with unions).
- if (!GV || GV->getType()->getElementType() != InitType ||
+ if (!GV || GV->getValueType() != InitType ||
GV->getType()->getAddressSpace() !=
getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) {
@@ -4026,34 +4070,56 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// global variables become internal definitions. These have to
// be internal in order to prevent name conflicts with global
// host variables with the same name in a different TUs.
- if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
- D->hasAttr<HIPPinnedShadowAttr>()) {
+ if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
Linkage = llvm::GlobalValue::InternalLinkage;
-
- // Shadow variables and their properties must be registered
- // with CUDA runtime.
- unsigned Flags = 0;
- if (!D->hasDefinition())
- Flags |= CGCUDARuntime::ExternDeviceVar;
- if (D->hasAttr<CUDAConstantAttr>())
- Flags |= CGCUDARuntime::ConstantDeviceVar;
- // Extern global variables will be registered in the TU where they are
- // defined.
+ // Shadow variables and their properties must be registered with CUDA
+ // runtime. Skip Extern global variables, which will be registered in
+ // the TU where they are defined.
if (!D->hasExternalStorage())
- getCUDARuntime().registerDeviceVar(D, *GV, Flags);
- } else if (D->hasAttr<CUDASharedAttr>())
+ getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
+ D->hasAttr<CUDAConstantAttr>());
+ } else if (D->hasAttr<CUDASharedAttr>()) {
// __shared__ variables are odd. Shadows do get created, but
// they are not registered with the CUDA runtime, so they
// can't really be used to access their device-side
// counterparts. It's not clear yet whether it's nvcc's bug or
// a feature, but we've got to do the same for compatibility.
Linkage = llvm::GlobalValue::InternalLinkage;
+ } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ D->getType()->isCUDADeviceBuiltinTextureType()) {
+ // Builtin surfaces and textures and their template arguments are
+ // also registered with CUDA runtime.
+ Linkage = llvm::GlobalValue::InternalLinkage;
+ const ClassTemplateSpecializationDecl *TD =
+ cast<ClassTemplateSpecializationDecl>(
+ D->getType()->getAs<RecordType>()->getDecl());
+ const TemplateArgumentList &Args = TD->getTemplateArgs();
+ if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
+ assert(Args.size() == 2 &&
+ "Unexpected number of template arguments of CUDA device "
+ "builtin surface type.");
+ auto SurfType = Args[1].getAsIntegral();
+ if (!D->hasExternalStorage())
+ getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
+ SurfType.getSExtValue());
+ } else {
+ assert(Args.size() == 3 &&
+ "Unexpected number of template arguments of CUDA device "
+ "builtin texture type.");
+ auto TexType = Args[1].getAsIntegral();
+ auto Normalized = Args[2].getAsIntegral();
+ if (!D->hasExternalStorage())
+ getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
+ TexType.getSExtValue(),
+ Normalized.getZExtValue());
+ }
+ }
}
}
- if (!IsHIPPinnedShadowVar)
- GV->setInitializer(Init);
- if (emitter) emitter->finalize(GV);
+ GV->setInitializer(Init);
+ if (emitter)
+ emitter->finalize(GV);
// If it is safe to mark the global 'constant', do so now.
GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
@@ -4068,17 +4134,24 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
GV->setAlignment(getContext().getDeclAlign(D).getAsAlign());
- // On Darwin, if the normal linkage of a C++ thread_local variable is
- // LinkOnce or Weak, we keep the normal linkage to prevent multiple
- // copies within a linkage unit; otherwise, the backing variable has
- // internal linkage and all accesses should just be calls to the
- // Itanium-specified entry point, which has the normal linkage of the
- // variable. This is to preserve the ability to change the implementation
- // behind the scenes.
- if (!D->isStaticLocal() && D->getTLSKind() == VarDecl::TLS_Dynamic &&
+ // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper
+ // function is only defined alongside the variable, not also alongside
+ // callers. Normally, all accesses to a thread_local go through the
+ // thread-wrapper in order to ensure initialization has occurred, underlying
+ // variable will never be used other than the thread-wrapper, so it can be
+ // converted to internal linkage.
+ //
+ // However, if the variable has the 'constinit' attribute, it _can_ be
+ // referenced directly, without calling the thread-wrapper, so the linkage
+ // must not be changed.
+ //
+ // Additionally, if the variable isn't plain external linkage, e.g. if it's
+ // weak or linkonce, the de-duplication semantics are important to preserve,
+ // so we don't change the linkage.
+ if (D->getTLSKind() == VarDecl::TLS_Dynamic &&
+ Linkage == llvm::GlobalValue::ExternalLinkage &&
Context.getTargetInfo().getTriple().isOSDarwin() &&
- !llvm::GlobalVariable::isLinkOnceLinkage(Linkage) &&
- !llvm::GlobalVariable::isWeakLinkage(Linkage))
+ !D->hasAttr<ConstInitAttr>())
Linkage = llvm::GlobalValue::InternalLinkage;
GV->setLinkage(Linkage);
@@ -4421,11 +4494,6 @@ void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
llvm::GlobalValue *GV) {
- // Check if this must be emitted as declare variant.
- if (LangOpts.OpenMP && OpenMPRuntime &&
- OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true))
- return;
-
const auto *D = cast<FunctionDecl>(GD.getDecl());
// Compute the function info and LLVM type.
@@ -4433,7 +4501,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
// Get or create the prototype for the function.
- if (!GV || (GV->getType()->getElementType() != Ty))
+ if (!GV || (GV->getValueType() != Ty))
GV = cast<llvm::GlobalValue>(GetAddrOfFunction(GD, Ty, /*ForVTable=*/false,
/*DontDefer=*/true,
ForDefinition));
@@ -4457,7 +4525,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
maybeSetTrivialComdat(*D, *Fn);
- CodeGenFunction(*this).GenerateCode(D, Fn, FI);
+ CodeGenFunction(*this).GenerateCode(GD, Fn, FI);
setNonAliasAttributes(GD, Fn);
SetLLVMFunctionAttributesForDefinition(D, Fn);
@@ -4509,8 +4577,9 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
}
// Create the new alias itself, but don't set a name yet.
+ unsigned AS = Aliasee->getType()->getPointerAddressSpace();
auto *GA =
- llvm::GlobalAlias::create(DeclTy, 0, LT, "", Aliasee, &getModule());
+ llvm::GlobalAlias::create(DeclTy, AS, LT, "", Aliasee, &getModule());
if (Entry) {
if (GA->getAliasee() == Entry) {
@@ -5258,6 +5327,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
if (D->isTemplated())
return;
+ // Consteval function shouldn't be emitted.
+ if (auto *FD = dyn_cast<FunctionDecl>(D))
+ if (FD->isConsteval())
+ return;
+
switch (D->getKind()) {
case Decl::CXXConversion:
case Decl::CXXMethod:
@@ -5293,17 +5367,17 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
break;
case Decl::ClassTemplateSpecialization: {
const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
- if (DebugInfo &&
- Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition &&
- Spec->hasDefinition())
- DebugInfo->completeTemplateDefinition(*Spec);
+ if (CGDebugInfo *DI = getModuleDebugInfo())
+ if (Spec->getSpecializationKind() ==
+ TSK_ExplicitInstantiationDefinition &&
+ Spec->hasDefinition())
+ DI->completeTemplateDefinition(*Spec);
} LLVM_FALLTHROUGH;
case Decl::CXXRecord:
- if (DebugInfo) {
+ if (CGDebugInfo *DI = getModuleDebugInfo())
if (auto *ES = D->getASTContext().getExternalSource())
if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
- DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D));
- }
+ DI->completeUnusedClass(cast<CXXRecordDecl>(*D));
// Emit any static data members, they may be definitions.
for (auto *I : cast<CXXRecordDecl>(D)->decls())
if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I))
@@ -5324,15 +5398,15 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
case Decl::Using: // using X; [C++]
if (CGDebugInfo *DI = getModuleDebugInfo())
DI->EmitUsingDecl(cast<UsingDecl>(*D));
- return;
+ break;
case Decl::NamespaceAlias:
if (CGDebugInfo *DI = getModuleDebugInfo())
DI->EmitNamespaceAlias(cast<NamespaceAliasDecl>(*D));
- return;
+ break;
case Decl::UsingDirective: // using namespace X; [C++]
if (CGDebugInfo *DI = getModuleDebugInfo())
DI->EmitUsingDirective(cast<UsingDirectiveDecl>(*D));
- return;
+ break;
case Decl::CXXConstructor:
getCXXABI().EmitCXXConstructors(cast<CXXConstructorDecl>(D));
break;
@@ -5515,10 +5589,10 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) {
case Decl::CXXConstructor:
case Decl::CXXDestructor: {
if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody())
- return;
+ break;
SourceManager &SM = getContext().getSourceManager();
if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc()))
- return;
+ break;
auto I = DeferredEmptyCoverageMappingDecls.find(D);
if (I == DeferredEmptyCoverageMappingDecls.end())
DeferredEmptyCoverageMappingDecls[D] = true;
@@ -5584,6 +5658,17 @@ void CodeGenModule::EmitDeferredUnusedCoverageMappings() {
}
}
+void CodeGenModule::EmitMainVoidAlias() {
+ // In order to transition away from "__original_main" gracefully, emit an
+ // alias for "main" in the no-argument case so that libc can detect when
+ // new-style no-argument main is in used.
+ if (llvm::Function *F = getModule().getFunction("main")) {
+ if (!F->isDeclaration() && F->arg_size() == 0 && !F->isVarArg() &&
+ F->getReturnType()->isIntegerTy(Context.getTargetInfo().getIntWidth()))
+ addUsedGlobal(llvm::GlobalAlias::create("__main_void", F));
+ }
+}
+
/// Turns the given pointer into a constant.
static llvm::Constant *GetPointerConstant(llvm::LLVMContext &Context,
const void *Ptr) {
@@ -5698,21 +5783,6 @@ void CodeGenModule::EmitCommandLineMetadata() {
CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode));
}
-void CodeGenModule::EmitTargetMetadata() {
- // Warning, new MangledDeclNames may be appended within this loop.
- // We rely on MapVector insertions adding new elements to the end
- // of the container.
- // FIXME: Move this loop into the one target that needs it, and only
- // loop over those declarations for which we couldn't emit the target
- // metadata when we emitted the declaration.
- for (unsigned I = 0; I != MangledDeclNames.size(); ++I) {
- auto Val = *(MangledDeclNames.begin() + I);
- const Decl *D = Val.first.getDecl()->getMostRecentDecl();
- llvm::GlobalValue *GV = GetGlobalValue(Val.second);
- getTargetCodeGenInfo().emitTargetMD(D, GV, *this);
- }
-}
-
void CodeGenModule::EmitCoverageFile() {
if (getCodeGenOpts().CoverageDataFile.empty() &&
getCodeGenOpts().CoverageNotesFile.empty())
@@ -5735,39 +5805,14 @@ void CodeGenModule::EmitCoverageFile() {
}
}
-llvm::Constant *CodeGenModule::EmitUuidofInitializer(StringRef Uuid) {
- // Sema has checked that all uuid strings are of the form
- // "12345678-1234-1234-1234-1234567890ab".
- assert(Uuid.size() == 36);
- for (unsigned i = 0; i < 36; ++i) {
- if (i == 8 || i == 13 || i == 18 || i == 23) assert(Uuid[i] == '-');
- else assert(isHexDigit(Uuid[i]));
- }
-
- // The starts of all bytes of Field3 in Uuid. Field 3 is "1234-1234567890ab".
- const unsigned Field3ValueOffsets[8] = { 19, 21, 24, 26, 28, 30, 32, 34 };
-
- llvm::Constant *Field3[8];
- for (unsigned Idx = 0; Idx < 8; ++Idx)
- Field3[Idx] = llvm::ConstantInt::get(
- Int8Ty, Uuid.substr(Field3ValueOffsets[Idx], 2), 16);
-
- llvm::Constant *Fields[4] = {
- llvm::ConstantInt::get(Int32Ty, Uuid.substr(0, 8), 16),
- llvm::ConstantInt::get(Int16Ty, Uuid.substr(9, 4), 16),
- llvm::ConstantInt::get(Int16Ty, Uuid.substr(14, 4), 16),
- llvm::ConstantArray::get(llvm::ArrayType::get(Int8Ty, 8), Field3)
- };
-
- return llvm::ConstantStruct::getAnon(Fields);
-}
-
llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
bool ForEH) {
// Return a bogus pointer if RTTI is disabled, unless it's for EH.
// FIXME: should we even be calling this method if RTTI is disabled
// and it's not for EH?
- if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice)
+ if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice ||
+ (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
+ getTriple().isNVPTX()))
return llvm::Constant::getNullValue(Int8PtrTy);
if (ForEH && Ty->isObjCObjectPointerType() &&
@@ -5911,3 +5956,99 @@ CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E,
"__translate_sampler_initializer"),
{C});
}
+
+CharUnits CodeGenModule::getNaturalPointeeTypeAlignment(
+ QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) {
+ return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo,
+ /* forPointeeType= */ true);
+}
+
+CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T,
+ LValueBaseInfo *BaseInfo,
+ TBAAAccessInfo *TBAAInfo,
+ bool forPointeeType) {
+ if (TBAAInfo)
+ *TBAAInfo = getTBAAAccessInfo(T);
+
+ // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But
+ // that doesn't return the information we need to compute BaseInfo.
+
+ // Honor alignment typedef attributes even on incomplete types.
+ // We also honor them straight for C++ class types, even as pointees;
+ // there's an expressivity gap here.
+ if (auto TT = T->getAs<TypedefType>()) {
+ if (auto Align = TT->getDecl()->getMaxAlignment()) {
+ if (BaseInfo)
+ *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType);
+ return getContext().toCharUnitsFromBits(Align);
+ }
+ }
+
+ bool AlignForArray = T->isArrayType();
+
+ // Analyze the base element type, so we don't get confused by incomplete
+ // array types.
+ T = getContext().getBaseElementType(T);
+
+ if (T->isIncompleteType()) {
+ // We could try to replicate the logic from
+ // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the
+ // type is incomplete, so it's impossible to test. We could try to reuse
+ // getTypeAlignIfKnown, but that doesn't return the information we need
+ // to set BaseInfo. So just ignore the possibility that the alignment is
+ // greater than one.
+ if (BaseInfo)
+ *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
+ return CharUnits::One();
+ }
+
+ if (BaseInfo)
+ *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
+
+ CharUnits Alignment;
+ // For C++ class pointees, we don't know whether we're pointing at a
+ // base or a complete object, so we generally need to use the
+ // non-virtual alignment.
+ const CXXRecordDecl *RD;
+ if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) {
+ Alignment = getClassPointerAlignment(RD);
+ } else {
+ Alignment = getContext().getTypeAlignInChars(T);
+ if (T.getQualifiers().hasUnaligned())
+ Alignment = CharUnits::One();
+ }
+
+ // Cap to the global maximum type alignment unless the alignment
+ // was somehow explicit on the type.
+ if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) {
+ if (Alignment.getQuantity() > MaxAlign &&
+ !getContext().isAlignmentRequired(T))
+ Alignment = CharUnits::fromQuantity(MaxAlign);
+ }
+ return Alignment;
+}
+
+bool CodeGenModule::stopAutoInit() {
+ unsigned StopAfter = getContext().getLangOpts().TrivialAutoVarInitStopAfter;
+ if (StopAfter) {
+ // This number is positive only when -ftrivial-auto-var-init-stop-after=* is
+ // used
+ if (NumAutoVarInit >= StopAfter) {
+ return true;
+ }
+ if (!NumAutoVarInit) {
+ unsigned DiagID = getDiags().getCustomDiagID(
+ DiagnosticsEngine::Warning,
+ "-ftrivial-auto-var-init-stop-after=%0 has been enabled to limit the "
+ "number of times ftrivial-auto-var-init=%1 gets applied.");
+ getDiags().Report(DiagID)
+ << StopAfter
+ << (getContext().getLangOpts().getTrivialAutoVarInit() ==
+ LangOptions::TrivialAutoVarInitKind::Zero
+ ? "zero"
+ : "pattern");
+ }
+ ++NumAutoVarInit;
+ }
+ return false;
+}
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 115e754bb392..a6c4a1f7b278 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -26,6 +26,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SanitizerBlacklist.h"
+#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/XRayLists.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
@@ -301,6 +302,7 @@ private:
const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info.
const PreprocessorOptions &PreprocessorOpts; // Only used for debug info.
const CodeGenOptions &CodeGenOpts;
+ unsigned NumAutoVarInit = 0;
llvm::Module &TheModule;
DiagnosticsEngine &Diags;
const TargetInfo &Target;
@@ -322,7 +324,6 @@ private:
std::unique_ptr<CGObjCRuntime> ObjCRuntime;
std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime;
std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime;
- std::unique_ptr<llvm::OpenMPIRBuilder> OMPBuilder;
std::unique_ptr<CGCUDARuntime> CUDARuntime;
std::unique_ptr<CGDebugInfo> DebugInfo;
std::unique_ptr<ObjCEntrypoints> ObjCData;
@@ -395,6 +396,10 @@ private:
/// emitted when the translation unit is complete.
CtorList GlobalDtors;
+ /// A unique trailing identifier as a part of sinit/sterm function when
+ /// UseSinitAndSterm of CXXABI is set as true.
+ std::string GlobalUniqueModuleId;
+
/// An ordered map of canonical GlobalDecls to their mangled names.
llvm::MapVector<GlobalDecl, StringRef> MangledDeclNames;
llvm::StringMap<GlobalDecl, llvm::BumpPtrAllocator> Manglings;
@@ -463,9 +468,11 @@ private:
SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits;
/// Global destructor functions and arguments that need to run on termination.
+ /// When UseSinitAndSterm is set, it instead contains sterm finalizer
+ /// functions, which also run on unloading a shared library.
std::vector<
std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, llvm::Constant *>>
- CXXGlobalDtors;
+ CXXGlobalDtorsOrStermFinalizers;
/// The complete set of modules that has been imported.
llvm::SetVector<clang::Module *> ImportedModules;
@@ -589,9 +596,6 @@ public:
return *OpenMPRuntime;
}
- /// Return a pointer to the configured OpenMPIRBuilder, if any.
- llvm::OpenMPIRBuilder *getOpenMPIRBuilder() { return OMPBuilder.get(); }
-
/// Return a reference to the configured CUDA runtime.
CGCUDARuntime &getCUDARuntime() {
assert(CUDARuntime != nullptr);
@@ -788,6 +792,9 @@ public:
/// variable declaration D.
void setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const;
+ /// Get LLVM TLS mode from CodeGenOptions.
+ llvm::GlobalVariable::ThreadLocalMode GetDefaultLLVMTLSModel() const;
+
static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) {
switch (V) {
case DefaultVisibility: return llvm::GlobalValue::DefaultVisibility;
@@ -810,11 +817,10 @@ public:
llvm::GlobalValue::LinkageTypes Linkage,
unsigned Alignment);
- llvm::Function *
- CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name,
- const CGFunctionInfo &FI,
- SourceLocation Loc = SourceLocation(),
- bool TLS = false);
+ llvm::Function *CreateGlobalInitOrCleanUpFunction(
+ llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI,
+ SourceLocation Loc = SourceLocation(), bool TLS = false,
+ bool IsExternalLinkage = false);
/// Return the AST address space of the underlying global variable for D, as
/// determined by its declaration. Normally this is the same as the address
@@ -855,8 +861,8 @@ public:
/// Get the address of the RTTI descriptor for the given type.
llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
- /// Get the address of a uuid descriptor .
- ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
+ /// Get the address of a GUID.
+ ConstantAddress GetAddrOfMSGuidDecl(const MSGuidDecl *GD);
/// Get the address of the thunk for the given global decl.
llvm::Constant *GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
@@ -868,6 +874,17 @@ public:
/// Returns the assumed alignment of an opaque pointer to the given class.
CharUnits getClassPointerAlignment(const CXXRecordDecl *CD);
+ /// Returns the minimum object size for an object of the given class type
+ /// (or a class derived from it).
+ CharUnits getMinimumClassObjectSize(const CXXRecordDecl *CD);
+
+ /// Returns the minimum object size for an object of the given type.
+ CharUnits getMinimumObjectSize(QualType Ty) {
+ if (CXXRecordDecl *RD = Ty->getAsCXXRecordDecl())
+ return getMinimumClassObjectSize(RD);
+ return getContext().getTypeSizeInChars(Ty);
+ }
+
/// Returns the assumed alignment of a virtual base of a class.
CharUnits getVBaseAlignment(CharUnits DerivedAlign,
const CXXRecordDecl *Derived,
@@ -1012,6 +1029,9 @@ public:
/// for the uninstrumented functions.
void EmitDeferredUnusedCoverageMappings();
+ /// Emit an alias for "main" if it has no arguments (needed for wasm).
+ void EmitMainVoidAlias();
+
/// Tell the consumer that this variable has been instantiated.
void HandleCXXStaticMemberVarInstantiation(VarDecl *VD);
@@ -1029,8 +1049,14 @@ public:
/// Add a destructor and object to add to the C++ global destructor function.
void AddCXXDtorEntry(llvm::FunctionCallee DtorFn, llvm::Constant *Object) {
- CXXGlobalDtors.emplace_back(DtorFn.getFunctionType(), DtorFn.getCallee(),
- Object);
+ CXXGlobalDtorsOrStermFinalizers.emplace_back(DtorFn.getFunctionType(),
+ DtorFn.getCallee(), Object);
+ }
+
+ /// Add an sterm finalizer to the C++ global cleanup function.
+ void AddCXXStermFinalizerEntry(llvm::FunctionCallee DtorFn) {
+ CXXGlobalDtorsOrStermFinalizers.emplace_back(DtorFn.getFunctionType(),
+ DtorFn.getCallee(), nullptr);
}
/// Create or return a runtime function declaration with the specified type
@@ -1155,7 +1181,11 @@ public:
/// on the function more conservative. But it's unsafe to call this on a
/// function which relies on particular fast-math attributes for correctness.
/// It's up to you to ensure that this is safe.
- void AddDefaultFnAttrs(llvm::Function &F);
+ void addDefaultFunctionDefinitionAttributes(llvm::Function &F);
+
+ /// Like the overload taking a `Function &`, but intended specifically
+ /// for frontends that want to build on Clang's target-configuration logic.
+ void addDefaultFunctionDefinitionAttributes(llvm::AttrBuilder &attrs);
StringRef getMangledName(GlobalDecl GD);
StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD);
@@ -1282,16 +1312,16 @@ public:
/// \param D Requires declaration
void EmitOMPRequiresDecl(const OMPRequiresDecl *D);
- /// Emits the definition of \p OldGD function with body from \p NewGD.
- /// Required for proper handling of declare variant directive on the GPU.
- void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD,
- llvm::GlobalValue *GV);
-
/// Returns whether the given record has hidden LTO visibility and therefore
/// may participate in (single-module) CFI and whole-program vtable
/// optimization.
bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
+ /// Returns whether the given record has public std LTO visibility
+ /// and therefore may not participate in (single-module) CFI and whole-program
+ /// vtable optimization.
+ bool HasLTOVisibilityPublicStd(const CXXRecordDecl *RD);
+
/// Returns the vcall visibility of the given type. This is the scope in which
/// a virtual function call could be made which ends up being dispatched to a
/// member function of this class. This scope can be wider than the visibility
@@ -1367,6 +1397,15 @@ public:
/// \param QT is the clang QualType of the null pointer.
llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT);
+ CharUnits getNaturalTypeAlignment(QualType T,
+ LValueBaseInfo *BaseInfo = nullptr,
+ TBAAAccessInfo *TBAAInfo = nullptr,
+ bool forPointeeType = false);
+ CharUnits getNaturalPointeeTypeAlignment(QualType T,
+ LValueBaseInfo *BaseInfo = nullptr,
+ TBAAAccessInfo *TBAAInfo = nullptr);
+ bool stopAutoInit();
+
private:
llvm::Constant *GetOrCreateLLVMFunction(
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
@@ -1417,8 +1456,8 @@ private:
/// Emit the function that initializes C++ globals.
void EmitCXXGlobalInitFunc();
- /// Emit the function that destroys C++ globals.
- void EmitCXXGlobalDtorFunc();
+ /// Emit the function that performs cleanup associated with C++ globals.
+ void EmitCXXGlobalCleanUpFunc();
/// Emit the function that initializes the specified global (if PerformInit is
/// true) and registers its destructor.
@@ -1489,8 +1528,9 @@ private:
/// Emit the Clang commandline as llvm.commandline metadata.
void EmitCommandLineMetadata();
- /// Emits target specific Metadata for global declarations.
- void EmitTargetMetadata();
+ /// Emit the module flag metadata used to pass options controlling the
+ /// the backend to LLVM.
+ void EmitBackendOptionsMetadata(const CodeGenOptions CodeGenOpts);
/// Emits OpenCL specific Metadata e.g. OpenCL version.
void EmitOpenCLMetadata();
@@ -1499,9 +1539,6 @@ private:
/// .gcda files in a way that persists in .bc files.
void EmitCoverageFile();
- /// Emits the initializer for a uuidof string.
- llvm::Constant *EmitUuidofInitializer(StringRef uuidstr);
-
/// Determine whether the definition must be emitted; if this returns \c
/// false, the definition can be emitted lazily if it's used.
bool MustBeEmitted(const ValueDecl *D);
@@ -1516,11 +1553,12 @@ private:
/// function.
void SimplifyPersonality();
- /// Helper function for ConstructAttributeList and AddDefaultFnAttrs.
- /// Constructs an AttrList for a function with the given properties.
- void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
- bool AttrOnCallSite,
- llvm::AttrBuilder &FuncAttrs);
+ /// Helper function for ConstructAttributeList and
+ /// addDefaultFunctionDefinitionAttributes. Builds a set of function
+ /// attributes to add to a function with the given properties.
+ void getDefaultFunctionAttributes(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs);
llvm::Metadata *CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map,
StringRef Suffix);
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index bad796bf92dc..e810f608ab78 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -52,9 +52,10 @@ void CodeGenPGO::setFuncName(llvm::Function *Fn) {
enum PGOHashVersion : unsigned {
PGO_HASH_V1,
PGO_HASH_V2,
+ PGO_HASH_V3,
// Keep this set to the latest hash version.
- PGO_HASH_LATEST = PGO_HASH_V2
+ PGO_HASH_LATEST = PGO_HASH_V3
};
namespace {
@@ -122,7 +123,7 @@ public:
BinaryOperatorGE,
BinaryOperatorEQ,
BinaryOperatorNE,
- // The preceding values are available with PGO_HASH_V2.
+ // The preceding values are available since PGO_HASH_V2.
// Keep this last. It's for the static assert that follows.
LastHashType
@@ -144,7 +145,9 @@ static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
CodeGenModule &CGM) {
if (PGOReader->getVersion() <= 4)
return PGO_HASH_V1;
- return PGO_HASH_V2;
+ if (PGOReader->getVersion() <= 5)
+ return PGO_HASH_V2;
+ return PGO_HASH_V3;
}
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
@@ -288,7 +291,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
return PGOHash::BinaryOperatorLAnd;
if (BO->getOpcode() == BO_LOr)
return PGOHash::BinaryOperatorLOr;
- if (HashVersion == PGO_HASH_V2) {
+ if (HashVersion >= PGO_HASH_V2) {
switch (BO->getOpcode()) {
default:
break;
@@ -310,7 +313,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
}
}
- if (HashVersion == PGO_HASH_V2) {
+ if (HashVersion >= PGO_HASH_V2) {
switch (S->getStmtClass()) {
default:
break;
@@ -747,13 +750,21 @@ uint64_t PGOHash::finalize() {
return Working;
// Check for remaining work in Working.
- if (Working)
- MD5.update(Working);
+ if (Working) {
+ // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
+ // is buggy because it converts a uint64_t into an array of uint8_t.
+ if (HashVersion < PGO_HASH_V3) {
+ MD5.update({(uint8_t)Working});
+ } else {
+ using namespace llvm::support;
+ uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+ MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+ }
+ }
// Finalize the MD5 and return the hash.
llvm::MD5::MD5Result Result;
MD5.final(Result);
- using namespace llvm::support;
return Result.low();
}
@@ -1051,8 +1062,7 @@ llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
if (!PGO.haveRegionCounts())
return nullptr;
Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
- assert(CondCount.hasValue() && "missing expected loop condition count");
- if (*CondCount == 0)
+ if (!CondCount || *CondCount == 0)
return nullptr;
return createProfileWeights(LoopCount,
std::max(*CondCount, LoopCount) - LoopCount);
diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h
index a3778b549910..dda8c66b6db2 100644
--- a/clang/lib/CodeGen/CodeGenPGO.h
+++ b/clang/lib/CodeGen/CodeGenPGO.h
@@ -40,8 +40,8 @@ private:
uint64_t CurrentRegionCount;
public:
- CodeGenPGO(CodeGenModule &CGM)
- : CGM(CGM), FuncNameVar(nullptr), NumValueSites({{0}}),
+ CodeGenPGO(CodeGenModule &CGModule)
+ : CGM(CGModule), FuncNameVar(nullptr), NumValueSites({{0}}),
NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {}
/// Whether or not we have PGO region data for the current function. This is
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 7d730cb1ed15..f4ebe6885675 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -141,6 +141,34 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
case BuiltinType::UInt128:
return getTypeInfo(Context.Int128Ty);
+ case BuiltinType::UShortFract:
+ return getTypeInfo(Context.ShortFractTy);
+ case BuiltinType::UFract:
+ return getTypeInfo(Context.FractTy);
+ case BuiltinType::ULongFract:
+ return getTypeInfo(Context.LongFractTy);
+
+ case BuiltinType::SatUShortFract:
+ return getTypeInfo(Context.SatShortFractTy);
+ case BuiltinType::SatUFract:
+ return getTypeInfo(Context.SatFractTy);
+ case BuiltinType::SatULongFract:
+ return getTypeInfo(Context.SatLongFractTy);
+
+ case BuiltinType::UShortAccum:
+ return getTypeInfo(Context.ShortAccumTy);
+ case BuiltinType::UAccum:
+ return getTypeInfo(Context.AccumTy);
+ case BuiltinType::ULongAccum:
+ return getTypeInfo(Context.LongAccumTy);
+
+ case BuiltinType::SatUShortAccum:
+ return getTypeInfo(Context.SatShortAccumTy);
+ case BuiltinType::SatUAccum:
+ return getTypeInfo(Context.SatAccumTy);
+ case BuiltinType::SatULongAccum:
+ return getTypeInfo(Context.SatLongAccumTy);
+
// Treat all other builtin types as distinct types. This includes
// treating wchar_t, char16_t, and char32_t as distinct from their
// "underlying types".
@@ -181,6 +209,15 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
return createScalarTypeNode(OutName, getChar(), Size);
}
+ if (const auto *EIT = dyn_cast<ExtIntType>(Ty)) {
+ SmallString<256> OutName;
+ llvm::raw_svector_ostream Out(OutName);
+ // Don't specify signed/unsigned since integer types can alias despite sign
+ // differences.
+ Out << "_ExtInt(" << EIT->getNumBits() << ')';
+ return createScalarTypeNode(OutName, getChar(), Size);
+ }
+
// For now, handle any other kind of type conservatively.
return getChar();
}
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h
index ed4b773afd13..20a3263c0b1a 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -35,8 +35,8 @@ struct CodeGenTypeCache {
/// i8, i16, i32, and i64
llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
- /// float, double
- llvm::Type *HalfTy, *FloatTy, *DoubleTy;
+ /// half, bfloat, float, double
+ llvm::Type *HalfTy, *BFloatTy, *FloatTy, *DoubleTy;
/// int
llvm::IntegerType *IntTy;
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index a458811d7a30..d431c0263666 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -36,8 +36,6 @@ CodeGenTypes::CodeGenTypes(CodeGenModule &cgm)
}
CodeGenTypes::~CodeGenTypes() {
- llvm::DeleteContainerSeconds(CGRecordLayouts);
-
for (llvm::FoldingSet<CGFunctionInfo>::iterator
I = FunctionInfos.begin(), E = FunctionInfos.end(); I != E; )
delete &*I++;
@@ -83,19 +81,26 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
/// ConvertType in that it is used to convert to the memory representation for
/// a type. For example, the scalar representation for _Bool is i1, but the
/// memory representation is usually i8 or i32, depending on the target.
-llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
+llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
+ if (T->isConstantMatrixType()) {
+ const Type *Ty = Context.getCanonicalType(T).getTypePtr();
+ const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty);
+ return llvm::ArrayType::get(ConvertType(MT->getElementType()),
+ MT->getNumRows() * MT->getNumColumns());
+ }
+
llvm::Type *R = ConvertType(T);
- // If this is a non-bool type, don't map it.
- if (!R->isIntegerTy(1))
- return R;
+ // If this is a bool type, or an ExtIntType in a bitfield representation,
+ // map this integer to the target-specified size.
+ if ((ForBitField && T->isExtIntType()) || R->isIntegerTy(1))
+ return llvm::IntegerType::get(getLLVMContext(),
+ (unsigned)Context.getTypeSize(T));
- // Otherwise, return an integer of the target-specified size.
- return llvm::IntegerType::get(getLLVMContext(),
- (unsigned)Context.getTypeSize(T));
+ // Else, don't map it.
+ return R;
}
-
/// isRecordLayoutComplete - Return true if the specified type is already
/// completely laid out.
bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const {
@@ -295,6 +300,8 @@ static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext,
else
return llvm::Type::getInt16Ty(VMContext);
}
+ if (&format == &llvm::APFloat::BFloat())
+ return llvm::Type::getBFloatTy(VMContext);
if (&format == &llvm::APFloat::IEEEsingle())
return llvm::Type::getFloatTy(VMContext);
if (&format == &llvm::APFloat::IEEEdouble())
@@ -383,6 +390,20 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
const Type *Ty = T.getTypePtr();
+ // For the device-side compilation, CUDA device builtin surface/texture types
+ // may be represented in different types.
+ if (Context.getLangOpts().CUDAIsDevice) {
+ if (T->isCUDADeviceBuiltinSurfaceType()) {
+ if (auto *Ty = CGM.getTargetCodeGenInfo()
+ .getCUDADeviceBuiltinSurfaceDeviceType())
+ return Ty;
+ } else if (T->isCUDADeviceBuiltinTextureType()) {
+ if (auto *Ty = CGM.getTargetCodeGenInfo()
+ .getCUDADeviceBuiltinTextureDeviceType())
+ return Ty;
+ }
+ }
+
// RecordTypes are cached and processed specially.
if (const RecordType *RT = dyn_cast<RecordType>(Ty))
return ConvertRecordDeclType(RT->getDecl());
@@ -479,6 +500,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
Context.getLangOpts().NativeHalfType ||
!Context.getTargetInfo().useFP16ConversionIntrinsics());
break;
+ case BuiltinType::BFloat16:
case BuiltinType::Float:
case BuiltinType::Double:
case BuiltinType::LongDouble:
@@ -511,23 +533,99 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::OCLReserveID:
ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
break;
-
- // TODO: real CodeGen support for SVE types requires more infrastructure
- // to be added first. Report an error until then.
-#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
-#include "clang/Basic/AArch64SVEACLETypes.def"
- {
- unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "cannot yet generate code for SVE type '%0'");
- auto *BT = cast<BuiltinType>(Ty);
- auto Name = BT->getName(CGM.getContext().getPrintingPolicy());
- CGM.getDiags().Report(DiagID) << Name;
- // Return something safe.
- ResultType = llvm::IntegerType::get(getLLVMContext(), 32);
- break;
- }
-
+#define GET_SVE_INT_VEC(BITS, ELTS) \
+ llvm::ScalableVectorType::get( \
+ llvm::IntegerType::get(getLLVMContext(), BITS), ELTS);
+ case BuiltinType::SveInt8:
+ case BuiltinType::SveUint8:
+ return GET_SVE_INT_VEC(8, 16);
+ case BuiltinType::SveInt8x2:
+ case BuiltinType::SveUint8x2:
+ return GET_SVE_INT_VEC(8, 32);
+ case BuiltinType::SveInt8x3:
+ case BuiltinType::SveUint8x3:
+ return GET_SVE_INT_VEC(8, 48);
+ case BuiltinType::SveInt8x4:
+ case BuiltinType::SveUint8x4:
+ return GET_SVE_INT_VEC(8, 64);
+ case BuiltinType::SveInt16:
+ case BuiltinType::SveUint16:
+ return GET_SVE_INT_VEC(16, 8);
+ case BuiltinType::SveInt16x2:
+ case BuiltinType::SveUint16x2:
+ return GET_SVE_INT_VEC(16, 16);
+ case BuiltinType::SveInt16x3:
+ case BuiltinType::SveUint16x3:
+ return GET_SVE_INT_VEC(16, 24);
+ case BuiltinType::SveInt16x4:
+ case BuiltinType::SveUint16x4:
+ return GET_SVE_INT_VEC(16, 32);
+ case BuiltinType::SveInt32:
+ case BuiltinType::SveUint32:
+ return GET_SVE_INT_VEC(32, 4);
+ case BuiltinType::SveInt32x2:
+ case BuiltinType::SveUint32x2:
+ return GET_SVE_INT_VEC(32, 8);
+ case BuiltinType::SveInt32x3:
+ case BuiltinType::SveUint32x3:
+ return GET_SVE_INT_VEC(32, 12);
+ case BuiltinType::SveInt32x4:
+ case BuiltinType::SveUint32x4:
+ return GET_SVE_INT_VEC(32, 16);
+ case BuiltinType::SveInt64:
+ case BuiltinType::SveUint64:
+ return GET_SVE_INT_VEC(64, 2);
+ case BuiltinType::SveInt64x2:
+ case BuiltinType::SveUint64x2:
+ return GET_SVE_INT_VEC(64, 4);
+ case BuiltinType::SveInt64x3:
+ case BuiltinType::SveUint64x3:
+ return GET_SVE_INT_VEC(64, 6);
+ case BuiltinType::SveInt64x4:
+ case BuiltinType::SveUint64x4:
+ return GET_SVE_INT_VEC(64, 8);
+ case BuiltinType::SveBool:
+ return GET_SVE_INT_VEC(1, 16);
+#undef GET_SVE_INT_VEC
+#define GET_SVE_FP_VEC(TY, ISFP16, ELTS) \
+ llvm::ScalableVectorType::get( \
+ getTypeForFormat(getLLVMContext(), \
+ Context.getFloatTypeSemantics(Context.TY), \
+ /* UseNativeHalf = */ ISFP16), \
+ ELTS);
+ case BuiltinType::SveFloat16:
+ return GET_SVE_FP_VEC(HalfTy, true, 8);
+ case BuiltinType::SveFloat16x2:
+ return GET_SVE_FP_VEC(HalfTy, true, 16);
+ case BuiltinType::SveFloat16x3:
+ return GET_SVE_FP_VEC(HalfTy, true, 24);
+ case BuiltinType::SveFloat16x4:
+ return GET_SVE_FP_VEC(HalfTy, true, 32);
+ case BuiltinType::SveFloat32:
+ return GET_SVE_FP_VEC(FloatTy, false, 4);
+ case BuiltinType::SveFloat32x2:
+ return GET_SVE_FP_VEC(FloatTy, false, 8);
+ case BuiltinType::SveFloat32x3:
+ return GET_SVE_FP_VEC(FloatTy, false, 12);
+ case BuiltinType::SveFloat32x4:
+ return GET_SVE_FP_VEC(FloatTy, false, 16);
+ case BuiltinType::SveFloat64:
+ return GET_SVE_FP_VEC(DoubleTy, false, 2);
+ case BuiltinType::SveFloat64x2:
+ return GET_SVE_FP_VEC(DoubleTy, false, 4);
+ case BuiltinType::SveFloat64x3:
+ return GET_SVE_FP_VEC(DoubleTy, false, 6);
+ case BuiltinType::SveFloat64x4:
+ return GET_SVE_FP_VEC(DoubleTy, false, 8);
+ case BuiltinType::SveBFloat16:
+ return GET_SVE_FP_VEC(BFloat16Ty, false, 8);
+ case BuiltinType::SveBFloat16x2:
+ return GET_SVE_FP_VEC(BFloat16Ty, false, 16);
+ case BuiltinType::SveBFloat16x3:
+ return GET_SVE_FP_VEC(BFloat16Ty, false, 24);
+ case BuiltinType::SveBFloat16x4:
+ return GET_SVE_FP_VEC(BFloat16Ty, false, 32);
+#undef GET_SVE_FP_VEC
case BuiltinType::Dependent:
#define BUILTIN_TYPE(Id, SingletonId)
#define PLACEHOLDER_TYPE(Id, SingletonId) \
@@ -560,7 +658,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
llvm::Type *PointeeType = ConvertTypeForMem(ETy);
if (PointeeType->isVoidTy())
PointeeType = llvm::Type::getInt8Ty(getLLVMContext());
- unsigned AS = Context.getTargetAddressSpace(ETy);
+
+ unsigned AS = PointeeType->isFunctionTy()
+ ? getDataLayout().getProgramAddressSpace()
+ : Context.getTargetAddressSpace(ETy);
+
ResultType = llvm::PointerType::get(PointeeType, AS);
break;
}
@@ -605,8 +707,15 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case Type::ExtVector:
case Type::Vector: {
const VectorType *VT = cast<VectorType>(Ty);
- ResultType = llvm::VectorType::get(ConvertType(VT->getElementType()),
- VT->getNumElements());
+ ResultType = llvm::FixedVectorType::get(ConvertType(VT->getElementType()),
+ VT->getNumElements());
+ break;
+ }
+ case Type::ConstantMatrix: {
+ const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty);
+ ResultType =
+ llvm::FixedVectorType::get(ConvertType(MT->getElementType()),
+ MT->getNumRows() * MT->getNumColumns());
break;
}
case Type::FunctionNoProto:
@@ -692,6 +801,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty));
break;
}
+ case Type::ExtInt: {
+ const auto &EIT = cast<ExtIntType>(Ty);
+ ResultType = llvm::Type::getIntNTy(getLLVMContext(), EIT->getNumBits());
+ break;
+ }
}
assert(ResultType && "Didn't convert a type?");
@@ -749,8 +863,8 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
}
// Layout fields.
- CGRecordLayout *Layout = ComputeRecordLayout(RD, Ty);
- CGRecordLayouts[Key] = Layout;
+ std::unique_ptr<CGRecordLayout> Layout = ComputeRecordLayout(RD, Ty);
+ CGRecordLayouts[Key] = std::move(Layout);
// We're done laying out this struct.
bool EraseResult = RecordsBeingLaidOut.erase(Key); (void)EraseResult;
@@ -776,17 +890,18 @@ const CGRecordLayout &
CodeGenTypes::getCGRecordLayout(const RecordDecl *RD) {
const Type *Key = Context.getTagDeclType(RD).getTypePtr();
- const CGRecordLayout *Layout = CGRecordLayouts.lookup(Key);
- if (!Layout) {
- // Compute the type information.
- ConvertRecordDeclType(RD);
+ auto I = CGRecordLayouts.find(Key);
+ if (I != CGRecordLayouts.end())
+ return *I->second;
+ // Compute the type information.
+ ConvertRecordDeclType(RD);
- // Now try again.
- Layout = CGRecordLayouts.lookup(Key);
- }
+ // Now try again.
+ I = CGRecordLayouts.find(Key);
- assert(Layout && "Unable to find record layout information for type");
- return *Layout;
+ assert(I != CGRecordLayouts.end() &&
+ "Unable to find record layout information for type");
+ return *I->second;
}
bool CodeGenTypes::isPointerZeroInitializable(QualType T) {
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 03102329507e..f8f7542e4c83 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -75,7 +75,7 @@ class CodeGenTypes {
llvm::DenseMap<const ObjCInterfaceType*, llvm::Type *> InterfaceTypes;
/// Maps clang struct type with corresponding record layout info.
- llvm::DenseMap<const Type*, CGRecordLayout *> CGRecordLayouts;
+ llvm::DenseMap<const Type*, std::unique_ptr<CGRecordLayout>> CGRecordLayouts;
/// Contains the LLVM IR type for any converted RecordDecl.
llvm::DenseMap<const Type*, llvm::StructType *> RecordDeclTypes;
@@ -134,7 +134,7 @@ public:
/// ConvertType in that it is used to convert to the memory representation for
/// a type. For example, the scalar representation for _Bool is i1, but the
/// memory representation is usually i8 or i32, depending on the target.
- llvm::Type *ConvertTypeForMem(QualType T);
+ llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
/// GetFunctionType - Get the LLVM function type for \arg Info.
llvm::FunctionType *GetFunctionType(const CGFunctionInfo &Info);
@@ -272,8 +272,8 @@ public:
RequiredArgs args);
/// Compute a new LLVM record layout object for the given record.
- CGRecordLayout *ComputeRecordLayout(const RecordDecl *D,
- llvm::StructType *Ty);
+ std::unique_ptr<CGRecordLayout> ComputeRecordLayout(const RecordDecl *D,
+ llvm::StructType *Ty);
/// addRecordTypeName - Compute a name from the given record decl with an
/// optional suffix and name the given LLVM type using it.
diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h
index 121acbac4fa9..188b82e56f53 100644
--- a/clang/lib/CodeGen/ConstantEmitter.h
+++ b/clang/lib/CodeGen/ConstantEmitter.h
@@ -110,6 +110,8 @@ public:
llvm::Constant *tryEmitAbstract(const APValue &value, QualType T);
llvm::Constant *tryEmitAbstractForMemory(const APValue &value, QualType T);
+ llvm::Constant *tryEmitConstantExpr(const ConstantExpr *CE);
+
llvm::Constant *emitNullForMemory(QualType T) {
return emitNullForMemory(CGM, T);
}
diff --git a/clang/lib/CodeGen/ConstantInitBuilder.cpp b/clang/lib/CodeGen/ConstantInitBuilder.cpp
index 2d63d88020be..24e3ca19709c 100644
--- a/clang/lib/CodeGen/ConstantInitBuilder.cpp
+++ b/clang/lib/CodeGen/ConstantInitBuilder.cpp
@@ -128,8 +128,14 @@ void ConstantAggregateBuilderBase::addSize(CharUnits size) {
llvm::Constant *
ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType,
llvm::Constant *target) {
+ return getRelativeOffsetToPosition(offsetType, target,
+ Builder.Buffer.size() - Begin);
+}
+
+llvm::Constant *ConstantAggregateBuilderBase::getRelativeOffsetToPosition(
+ llvm::IntegerType *offsetType, llvm::Constant *target, size_t position) {
// Compute the address of the relative-address slot.
- auto base = getAddrOfCurrentPosition(offsetType);
+ auto base = getAddrOfPosition(offsetType, position);
// Subtract.
base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy);
@@ -145,6 +151,20 @@ ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType,
}
llvm::Constant *
+ConstantAggregateBuilderBase::getAddrOfPosition(llvm::Type *type,
+ size_t position) {
+ // Make a global variable. We will replace this with a GEP to this
+ // position after installing the initializer.
+ auto dummy = new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
+ llvm::GlobalVariable::PrivateLinkage,
+ nullptr, "");
+ Builder.SelfReferences.emplace_back(dummy);
+ auto &entry = Builder.SelfReferences.back();
+ (void)getGEPIndicesTo(entry.Indices, position + Begin);
+ return dummy;
+}
+
+llvm::Constant *
ConstantAggregateBuilderBase::getAddrOfCurrentPosition(llvm::Type *type) {
// Make a global variable. We will replace this with a GEP to this
// position after installing the initializer.
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index bdecff39c88f..78b268f423cb 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -13,10 +13,13 @@
#include "CoverageMappingGen.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ProfileData/Coverage/CoverageMapping.h"
#include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
@@ -24,6 +27,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+// This selects the coverage mapping format defined when `InstrProfData.inc`
+// is textually included.
+#define COVMAP_V3
+
using namespace clang;
using namespace CodeGen;
using namespace llvm::coverage;
@@ -901,6 +908,18 @@ struct CounterCoverageMappingBuilder
terminateRegion(S);
}
+ void VisitCoroutineBodyStmt(const CoroutineBodyStmt *S) {
+ extendRegion(S);
+ Visit(S->getBody());
+ }
+
+ void VisitCoreturnStmt(const CoreturnStmt *S) {
+ extendRegion(S);
+ if (S->getOperand())
+ Visit(S->getOperand());
+ terminateRegion(S);
+ }
+
void VisitCXXThrowExpr(const CXXThrowExpr *E) {
extendRegion(E);
if (E->getSubExpr())
@@ -1272,17 +1291,11 @@ struct CounterCoverageMappingBuilder
}
};
-std::string getCoverageSection(const CodeGenModule &CGM) {
- return llvm::getInstrProfSectionName(
- llvm::IPSK_covmap,
- CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
-}
-
std::string normalizeFilename(StringRef Filename) {
llvm::SmallString<256> Path(Filename);
llvm::sys::fs::make_absolute(Path);
llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
- return Path.str().str();
+ return std::string(Path);
}
} // end anonymous namespace
@@ -1317,30 +1330,71 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
}
}
-void CoverageMappingModuleGen::addFunctionMappingRecord(
- llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
- const std::string &CoverageMapping, bool IsUsed) {
+static std::string getInstrProfSection(const CodeGenModule &CGM,
+ llvm::InstrProfSectKind SK) {
+ return llvm::getInstrProfSectionName(
+ SK, CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
+}
+
+void CoverageMappingModuleGen::emitFunctionMappingRecord(
+ const FunctionInfo &Info, uint64_t FilenamesRef) {
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
- if (!FunctionRecordTy) {
+
+ // Assign a name to the function record. This is used to merge duplicates.
+ std::string FuncRecordName = "__covrec_" + llvm::utohexstr(Info.NameHash);
+
+ // A dummy description for a function included-but-not-used in a TU can be
+ // replaced by full description provided by a different TU. The two kinds of
+ // descriptions play distinct roles: therefore, assign them different names
+ // to prevent `linkonce_odr` merging.
+ if (Info.IsUsed)
+ FuncRecordName += "u";
+
+ // Create the function record type.
+ const uint64_t NameHash = Info.NameHash;
+ const uint64_t FuncHash = Info.FuncHash;
+ const std::string &CoverageMapping = Info.CoverageMapping;
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
- llvm::Type *FunctionRecordTypes[] = {
- #include "llvm/ProfileData/InstrProfData.inc"
- };
- FunctionRecordTy =
- llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
- /*isPacked=*/true);
- }
+ llvm::Type *FunctionRecordTypes[] = {
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *FunctionRecordTy =
+ llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
+ /*isPacked=*/true);
- #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
+ // Create the function record constant.
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
llvm::Constant *FunctionRecordVals[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
- FunctionRecords.push_back(llvm::ConstantStruct::get(
- FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
+ auto *FuncRecordConstant = llvm::ConstantStruct::get(
+ FunctionRecordTy, makeArrayRef(FunctionRecordVals));
+
+ // Create the function record global.
+ auto *FuncRecord = new llvm::GlobalVariable(
+ CGM.getModule(), FunctionRecordTy, /*isConstant=*/true,
+ llvm::GlobalValue::LinkOnceODRLinkage, FuncRecordConstant,
+ FuncRecordName);
+ FuncRecord->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ FuncRecord->setSection(getInstrProfSection(CGM, llvm::IPSK_covfun));
+ FuncRecord->setAlignment(llvm::Align(8));
+ if (CGM.supportsCOMDAT())
+ FuncRecord->setComdat(CGM.getModule().getOrInsertComdat(FuncRecordName));
+
+ // Make sure the data doesn't get deleted.
+ CGM.addUsedGlobal(FuncRecord);
+}
+
+void CoverageMappingModuleGen::addFunctionMappingRecord(
+ llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
+ const std::string &CoverageMapping, bool IsUsed) {
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue);
+ FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed});
+
if (!IsUsed)
FunctionNames.push_back(
llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
- CoverageMappings.push_back(CoverageMapping);
if (CGM.getCodeGenOpts().DumpCoverageMapping) {
// Dump the coverage mapping data for this function by decoding the
@@ -1385,37 +1439,22 @@ void CoverageMappingModuleGen::emit() {
FilenameRefs[I] = FilenameStrs[I];
}
- std::string FilenamesAndCoverageMappings;
- llvm::raw_string_ostream OS(FilenamesAndCoverageMappings);
- CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
-
- // Stream the content of CoverageMappings to OS while keeping
- // memory consumption under control.
- size_t CoverageMappingSize = 0;
- for (auto &S : CoverageMappings) {
- CoverageMappingSize += S.size();
- OS << S;
- S.clear();
- S.shrink_to_fit();
- }
- CoverageMappings.clear();
- CoverageMappings.shrink_to_fit();
-
- size_t FilenamesSize = OS.str().size() - CoverageMappingSize;
- // Append extra zeroes if necessary to ensure that the size of the filenames
- // and coverage mappings is a multiple of 8.
- if (size_t Rem = OS.str().size() % 8) {
- CoverageMappingSize += 8 - Rem;
- OS.write_zeros(8 - Rem);
+ std::string Filenames;
+ {
+ llvm::raw_string_ostream OS(Filenames);
+ CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
}
- auto *FilenamesAndMappingsVal =
- llvm::ConstantDataArray::getString(Ctx, OS.str(), false);
+ auto *FilenamesVal =
+ llvm::ConstantDataArray::getString(Ctx, Filenames, false);
+ const int64_t FilenamesRef = llvm::IndexedInstrProf::ComputeHash(Filenames);
- // Create the deferred function records array
- auto RecordsTy =
- llvm::ArrayType::get(FunctionRecordTy, FunctionRecords.size());
- auto RecordsVal = llvm::ConstantArray::get(RecordsTy, FunctionRecords);
+ // Emit the function records.
+ for (const FunctionInfo &Info : FunctionRecords)
+ emitFunctionMappingRecord(Info, FilenamesRef);
+ const unsigned NRecords = 0;
+ const size_t FilenamesSize = Filenames.size();
+ const unsigned CoverageMappingSize = 0;
llvm::Type *CovDataHeaderTypes[] = {
#define COVMAP_HEADER(Type, LLVMType, Name, Init) LLVMType,
#include "llvm/ProfileData/InstrProfData.inc"
@@ -1430,18 +1469,16 @@ void CoverageMappingModuleGen::emit() {
CovDataHeaderTy, makeArrayRef(CovDataHeaderVals));
// Create the coverage data record
- llvm::Type *CovDataTypes[] = {CovDataHeaderTy, RecordsTy,
- FilenamesAndMappingsVal->getType()};
+ llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()};
auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes));
- llvm::Constant *TUDataVals[] = {CovDataHeaderVal, RecordsVal,
- FilenamesAndMappingsVal};
+ llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal};
auto CovDataVal =
llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals));
auto CovData = new llvm::GlobalVariable(
- CGM.getModule(), CovDataTy, true, llvm::GlobalValue::InternalLinkage,
+ CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage,
CovDataVal, llvm::getCoverageMappingVarName());
- CovData->setSection(getCoverageSection(CGM));
+ CovData->setSection(getInstrProfSection(CGM, llvm::IPSK_covmap));
CovData->setAlignment(llvm::Align(8));
// Make sure the data doesn't get deleted.
diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h
index 3bf51f590479..5d79d1e65670 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.h
+++ b/clang/lib/CodeGen/CoverageMappingGen.h
@@ -47,17 +47,27 @@ class CodeGenModule;
/// Organizes the cross-function state that is used while generating
/// code coverage mapping data.
class CoverageMappingModuleGen {
+ /// Information needed to emit a coverage record for a function.
+ struct FunctionInfo {
+ uint64_t NameHash;
+ uint64_t FuncHash;
+ std::string CoverageMapping;
+ bool IsUsed;
+ };
+
CodeGenModule &CGM;
CoverageSourceInfo &SourceInfo;
llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries;
- std::vector<llvm::Constant *> FunctionRecords;
std::vector<llvm::Constant *> FunctionNames;
- llvm::StructType *FunctionRecordTy;
- std::vector<std::string> CoverageMappings;
+ std::vector<FunctionInfo> FunctionRecords;
+
+ /// Emit a function record.
+ void emitFunctionMappingRecord(const FunctionInfo &Info,
+ uint64_t FilenamesRef);
public:
CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
- : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {}
+ : CGM(CGM), SourceInfo(SourceInfo) {}
CoverageSourceInfo &getSourceInfo() const {
return SourceInfo;
diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h
index 0ed67aabcd62..3a640d6117d6 100644
--- a/clang/lib/CodeGen/EHScopeStack.h
+++ b/clang/lib/CodeGen/EHScopeStack.h
@@ -85,11 +85,6 @@ enum CleanupKind : unsigned {
NormalAndEHCleanup = EHCleanup | NormalCleanup,
- InactiveCleanup = 0x4,
- InactiveEHCleanup = EHCleanup | InactiveCleanup,
- InactiveNormalCleanup = NormalCleanup | InactiveCleanup,
- InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup,
-
LifetimeMarker = 0x8,
NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup,
};
@@ -158,9 +153,10 @@ public:
/// Generation flags.
class Flags {
enum {
- F_IsForEH = 0x1,
+ F_IsForEH = 0x1,
F_IsNormalCleanupKind = 0x2,
- F_IsEHCleanupKind = 0x4
+ F_IsEHCleanupKind = 0x4,
+ F_HasExitSwitch = 0x8,
};
unsigned flags;
@@ -179,8 +175,10 @@ public:
/// cleanup.
bool isEHCleanupKind() const { return flags & F_IsEHCleanupKind; }
void setIsEHCleanupKind() { flags |= F_IsEHCleanupKind; }
- };
+ bool hasExitSwitch() const { return flags & F_HasExitSwitch; }
+ void setHasExitSwitch() { flags |= F_HasExitSwitch; }
+ };
/// Emit the cleanup. For normal cleanups, this is run in the
/// same EH context as when the cleanup was pushed, i.e. the
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index b5b8702c551e..80de2a6e3950 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -203,7 +203,7 @@ public:
void EmitCXXConstructors(const CXXConstructorDecl *D) override;
- AddedStructorArgs
+ AddedStructorArgCounts
buildStructorSignature(GlobalDecl GD,
SmallVectorImpl<CanQualType> &ArgTys) override;
@@ -222,10 +222,17 @@ public:
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- AddedStructorArgs
- addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
- CXXCtorType Type, bool ForVirtualBase,
- bool Delegating, CallArgList &Args) override;
+ AddedStructorArgs getImplicitConstructorArgs(CodeGenFunction &CGF,
+ const CXXConstructorDecl *D,
+ CXXCtorType Type,
+ bool ForVirtualBase,
+ bool Delegating) override;
+
+ llvm::Value *getCXXDestructorImplicitParam(CodeGenFunction &CGF,
+ const CXXDestructorDecl *DD,
+ CXXDtorType Type,
+ bool ForVirtualBase,
+ bool Delegating) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
@@ -516,6 +523,22 @@ private:
}
bool canCallMismatchedFunctionType() const override { return false; }
};
+
+class XLCXXABI final : public ItaniumCXXABI {
+public:
+ explicit XLCXXABI(CodeGen::CodeGenModule &CGM)
+ : ItaniumCXXABI(CGM) {}
+
+ void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
+ llvm::FunctionCallee dtor,
+ llvm::Constant *addr) override;
+
+ bool useSinitAndSterm() const override { return true; }
+
+private:
+ void emitCXXStermFinalizer(const VarDecl &D, llvm::Function *dtorStub,
+ llvm::Constant *addr);
+};
}
CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
@@ -546,6 +569,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
case TargetCXXABI::WebAssembly:
return new WebAssemblyCXXABI(CGM);
+ case TargetCXXABI::XL:
+ return new XLCXXABI(CGM);
+
case TargetCXXABI::GenericItanium:
if (CGM.getContext().getTargetInfo().getTriple().getArch()
== llvm::Triple::le32) {
@@ -670,6 +696,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
CGM.HasHiddenLTOVisibility(RD);
bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
CGM.HasHiddenLTOVisibility(RD);
+ bool ShouldEmitWPDInfo =
+ CGM.getCodeGenOpts().WholeProgramVTables &&
+ // Don't insert type tests if we are forcing public std visibility.
+ !CGM.HasLTOVisibilityPublicStd(RD);
llvm::Value *VirtualFn = nullptr;
{
@@ -677,16 +707,17 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
llvm::Value *TypeId = nullptr;
llvm::Value *CheckResult = nullptr;
- if (ShouldEmitCFICheck || ShouldEmitVFEInfo) {
- // If doing CFI or VFE, we will need the metadata node to check against.
+ if (ShouldEmitCFICheck || ShouldEmitVFEInfo || ShouldEmitWPDInfo) {
+ // If doing CFI, VFE or WPD, we will need the metadata node to check
+ // against.
llvm::Metadata *MD =
CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
}
- llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
-
if (ShouldEmitVFEInfo) {
+ llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
+
// If doing VFE, load from the vtable with a type.checked.load intrinsic
// call. Note that we use the GEP to calculate the address to load from
// and pass 0 as the offset to the intrinsic. This is because every
@@ -702,18 +733,30 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
} else {
// When not doing VFE, emit a normal load, as it allows more
// optimisations than type.checked.load.
- if (ShouldEmitCFICheck) {
+ if (ShouldEmitCFICheck || ShouldEmitWPDInfo) {
+ llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
CheckResult = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::type_test),
{Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId});
}
- VFPAddr =
- Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo());
- VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(),
- "memptr.virtualfn");
+
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ VirtualFn = CGF.Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::load_relative,
+ {VTableOffset->getType()}),
+ {VTable, VTableOffset});
+ VirtualFn = CGF.Builder.CreateBitCast(VirtualFn, FTy->getPointerTo());
+ } else {
+ llvm::Value *VFPAddr = CGF.Builder.CreateGEP(VTable, VTableOffset);
+ VFPAddr = CGF.Builder.CreateBitCast(
+ VFPAddr, FTy->getPointerTo()->getPointerTo());
+ VirtualFn = CGF.Builder.CreateAlignedLoad(
+ VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn");
+ }
}
assert(VirtualFn && "Virtual fuction pointer not created!");
- assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) &&
+ assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || !ShouldEmitWPDInfo ||
+ CheckResult) &&
"Check result required but not created!");
if (ShouldEmitCFICheck) {
@@ -984,11 +1027,16 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
llvm::Constant *MemPtr[2];
if (MD->isVirtual()) {
uint64_t Index = CGM.getItaniumVTableContext().getMethodVTableIndex(MD);
-
- const ASTContext &Context = getContext();
- CharUnits PointerWidth =
- Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
- uint64_t VTableOffset = (Index * PointerWidth.getQuantity());
+ uint64_t VTableOffset;
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ // Multiply by 4-byte relative offsets.
+ VTableOffset = Index * 4;
+ } else {
+ const ASTContext &Context = getContext();
+ CharUnits PointerWidth = Context.toCharUnitsFromBits(
+ Context.getTargetInfo().getPointerWidth(0));
+ VTableOffset = Index * PointerWidth.getQuantity();
+ }
if (UseARMMethodPtrABI) {
// ARM C++ ABI 3.2.1:
@@ -1402,8 +1450,19 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
llvm::Value *Value =
CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl);
- // Load the type info.
- Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ // Load the type info.
+ Value = CGF.Builder.CreateBitCast(Value, CGM.Int8PtrTy);
+ Value = CGF.Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}),
+ {Value, llvm::ConstantInt::get(CGM.Int32Ty, -4)});
+
+ // Setup to dereference again since this is a proxy we accessed.
+ Value = CGF.Builder.CreateBitCast(Value, StdTypeInfoPtrTy->getPointerTo());
+ } else {
+ // Load the type info.
+ Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
+ }
return CGF.Builder.CreateAlignedLoad(Value, CGF.getPointerAlign());
}
@@ -1459,28 +1518,37 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF,
Address ThisAddr,
QualType SrcRecordTy,
QualType DestTy) {
- llvm::Type *PtrDiffLTy =
- CGF.ConvertType(CGF.getContext().getPointerDiffType());
llvm::Type *DestLTy = CGF.ConvertType(DestTy);
-
auto *ClassDecl =
cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl());
- // Get the vtable pointer.
- llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(),
- ClassDecl);
+ llvm::Value *OffsetToTop;
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ // Get the vtable pointer.
+ llvm::Value *VTable =
+ CGF.GetVTablePtr(ThisAddr, CGM.Int32Ty->getPointerTo(), ClassDecl);
- // Get the offset-to-top from the vtable.
- llvm::Value *OffsetToTop =
- CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
- OffsetToTop =
- CGF.Builder.CreateAlignedLoad(OffsetToTop, CGF.getPointerAlign(),
- "offset.to.top");
+ // Get the offset-to-top from the vtable.
+ OffsetToTop =
+ CGF.Builder.CreateConstInBoundsGEP1_32(/*Type=*/nullptr, VTable, -2U);
+ OffsetToTop = CGF.Builder.CreateAlignedLoad(
+ OffsetToTop, CharUnits::fromQuantity(4), "offset.to.top");
+ } else {
+ llvm::Type *PtrDiffLTy =
+ CGF.ConvertType(CGF.getContext().getPointerDiffType());
+ // Get the vtable pointer.
+ llvm::Value *VTable =
+ CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(), ClassDecl);
+
+ // Get the offset-to-top from the vtable.
+ OffsetToTop = CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
+ OffsetToTop = CGF.Builder.CreateAlignedLoad(
+ OffsetToTop, CGF.getPointerAlign(), "offset.to.top");
+ }
// Finally, add the offset to the pointer.
llvm::Value *Value = ThisAddr.getPointer();
Value = CGF.EmitCastToVoidPtr(Value);
Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop);
-
return CGF.Builder.CreateBitCast(Value, DestLTy);
}
@@ -1501,17 +1569,22 @@ ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
CharUnits VBaseOffsetOffset =
CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(ClassDecl,
BaseClassDecl);
-
llvm::Value *VBaseOffsetPtr =
CGF.Builder.CreateConstGEP1_64(VTablePtr, VBaseOffsetOffset.getQuantity(),
"vbase.offset.ptr");
- VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr,
- CGM.PtrDiffTy->getPointerTo());
-
- llvm::Value *VBaseOffset =
- CGF.Builder.CreateAlignedLoad(VBaseOffsetPtr, CGF.getPointerAlign(),
- "vbase.offset");
+ llvm::Value *VBaseOffset;
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ VBaseOffsetPtr =
+ CGF.Builder.CreateBitCast(VBaseOffsetPtr, CGF.Int32Ty->getPointerTo());
+ VBaseOffset = CGF.Builder.CreateAlignedLoad(
+ VBaseOffsetPtr, CharUnits::fromQuantity(4), "vbase.offset");
+ } else {
+ VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr,
+ CGM.PtrDiffTy->getPointerTo());
+ VBaseOffset = CGF.Builder.CreateAlignedLoad(
+ VBaseOffsetPtr, CGF.getPointerAlign(), "vbase.offset");
+ }
return VBaseOffset;
}
@@ -1531,7 +1604,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
}
}
-CGCXXABI::AddedStructorArgs
+CGCXXABI::AddedStructorArgCounts
ItaniumCXXABI::buildStructorSignature(GlobalDecl GD,
SmallVectorImpl<CanQualType> &ArgTys) {
ASTContext &Context = getContext();
@@ -1545,9 +1618,9 @@ ItaniumCXXABI::buildStructorSignature(GlobalDecl GD,
cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) {
ArgTys.insert(ArgTys.begin() + 1,
Context.getPointerType(Context.VoidPtrTy));
- return AddedStructorArgs::prefix(1);
+ return AddedStructorArgCounts::prefix(1);
}
- return AddedStructorArgs{};
+ return AddedStructorArgCounts{};
}
void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
@@ -1613,9 +1686,9 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
}
-CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
+CGCXXABI::AddedStructorArgs ItaniumCXXABI::getImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
- bool ForVirtualBase, bool Delegating, CallArgList &Args) {
+ bool ForVirtualBase, bool Delegating) {
if (!NeedsVTTParameter(GlobalDecl(D, Type)))
return AddedStructorArgs{};
@@ -1623,8 +1696,14 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
llvm::Value *VTT =
CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
- Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy));
- return AddedStructorArgs::prefix(1); // Added one arg.
+ return AddedStructorArgs::prefix({{VTT, VTTTy}});
+}
+
+llvm::Value *ItaniumCXXABI::getCXXDestructorImplicitParam(
+ CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type,
+ bool ForVirtualBase, bool Delegating) {
+ GlobalDecl GD(DD, Type);
+ return CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
}
void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
@@ -1633,7 +1712,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
bool Delegating, Address This,
QualType ThisTy) {
GlobalDecl GD(DD, Type);
- llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
+ llvm::Value *VTT =
+ getCXXDestructorImplicitParam(CGF, DD, Type, ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
CGCallee Callee;
@@ -1660,10 +1740,11 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
CGM.GetAddrOfRTTIDescriptor(CGM.getContext().getTagDeclType(RD));
// Create and set the initializer.
- ConstantInitBuilder Builder(CGM);
- auto Components = Builder.beginStruct();
- CGVT.createVTableInitializer(Components, VTLayout, RTTI);
- Components.finishAndSetAsInitializer(VTable);
+ ConstantInitBuilder builder(CGM);
+ auto components = builder.beginStruct();
+ CGVT.createVTableInitializer(components, VTLayout, RTTI,
+ llvm::GlobalValue::isLocalLinkage(Linkage));
+ components.finishAndSetAsInitializer(VTable);
// Set the correct linkage.
VTable->setLinkage(Linkage);
@@ -1687,6 +1768,9 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
if (!VTable->isDeclarationForLinker())
CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout);
+
+ if (VTContext.isRelativeLayout() && !VTable->isDSOLocal())
+ CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName());
}
bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
@@ -1776,7 +1860,9 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
// Use pointer alignment for the vtable. Otherwise we would align them based
// on the size of the initializer which doesn't make sense as only single
// values are read.
- unsigned PAlign = CGM.getTarget().getPointerAlign(0);
+ unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout()
+ ? 32
+ : CGM.getTarget().getPointerAlign(0);
VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, VTableType, llvm::GlobalValue::ExternalLinkage,
@@ -1793,9 +1879,9 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
Address This,
llvm::Type *Ty,
SourceLocation Loc) {
- Ty = Ty->getPointerTo()->getPointerTo();
auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
- llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());
+ llvm::Value *VTable = CGF.GetVTablePtr(
+ This, Ty->getPointerTo()->getPointerTo(), MethodDecl->getParent());
uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
llvm::Value *VFunc;
@@ -1806,10 +1892,21 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
} else {
CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);
- llvm::Value *VFuncPtr =
- CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
- auto *VFuncLoad =
- CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+ llvm::Value *VFuncLoad;
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ VTable = CGF.Builder.CreateBitCast(VTable, CGM.Int8PtrTy);
+ llvm::Value *Load = CGF.Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}),
+ {VTable, llvm::ConstantInt::get(CGM.Int32Ty, 4 * VTableIndex)});
+ VFuncLoad = CGF.Builder.CreateBitCast(Load, Ty->getPointerTo());
+ } else {
+ VTable =
+ CGF.Builder.CreateBitCast(VTable, Ty->getPointerTo()->getPointerTo());
+ llvm::Value *VTableSlotPtr =
+ CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
+ VFuncLoad =
+ CGF.Builder.CreateAlignedLoad(VTableSlotPtr, CGF.getPointerAlign());
+ }
// Add !invariant.load md to virtual function load to indicate that
// function didn't change inside vtable.
@@ -1818,11 +1915,14 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
// the same virtual function loads from the same vtable load, which won't
// happen without enabled devirtualization with -fstrict-vtable-pointers.
if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
- CGM.getCodeGenOpts().StrictVTablePointers)
- VFuncLoad->setMetadata(
- llvm::LLVMContext::MD_invariant_load,
- llvm::MDNode::get(CGM.getLLVMContext(),
- llvm::ArrayRef<llvm::Metadata *>()));
+ CGM.getCodeGenOpts().StrictVTablePointers) {
+ if (auto *VFuncLoadInstr = dyn_cast<llvm::Instruction>(VFuncLoad)) {
+ VFuncLoadInstr->setMetadata(
+ llvm::LLVMContext::MD_invariant_load,
+ llvm::MDNode::get(CGM.getLLVMContext(),
+ llvm::ArrayRef<llvm::Metadata *>()));
+ }
+ }
VFunc = VFuncLoad;
}
@@ -1939,21 +2039,28 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
// Perform the virtual adjustment if we have one.
llvm::Value *ResultPtr;
if (VirtualAdjustment) {
- llvm::Type *PtrDiffTy =
- CGF.ConvertType(CGF.getContext().getPointerDiffType());
-
Address VTablePtrPtr = CGF.Builder.CreateElementBitCast(V, CGF.Int8PtrTy);
llvm::Value *VTablePtr = CGF.Builder.CreateLoad(VTablePtrPtr);
+ llvm::Value *Offset;
llvm::Value *OffsetPtr =
CGF.Builder.CreateConstInBoundsGEP1_64(VTablePtr, VirtualAdjustment);
+ if (CGF.CGM.getItaniumVTableContext().isRelativeLayout()) {
+ // Load the adjustment offset from the vtable as a 32-bit int.
+ OffsetPtr =
+ CGF.Builder.CreateBitCast(OffsetPtr, CGF.Int32Ty->getPointerTo());
+ Offset =
+ CGF.Builder.CreateAlignedLoad(OffsetPtr, CharUnits::fromQuantity(4));
+ } else {
+ llvm::Type *PtrDiffTy =
+ CGF.ConvertType(CGF.getContext().getPointerDiffType());
- OffsetPtr = CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo());
-
- // Load the adjustment offset from the vtable.
- llvm::Value *Offset =
- CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());
+ OffsetPtr =
+ CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo());
+ // Load the adjustment offset from the vtable.
+ Offset = CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());
+ }
// Adjust our pointer.
ResultPtr = CGF.Builder.CreateInBoundsGEP(V.getPointer(), Offset);
} else {
@@ -2438,7 +2545,7 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() {
std::string GlobalInitFnName =
std::string("__GLOBAL_init_") + llvm::to_string(Priority);
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
- llvm::Function *GlobalInitFn = CreateGlobalInitOrDestructFunction(
+ llvm::Function *GlobalInitFn = CreateGlobalInitOrCleanUpFunction(
FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(),
SourceLocation());
ASTContext &Ctx = getContext();
@@ -2592,14 +2699,15 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- InitFunc = CGM.CreateGlobalInitOrDestructFunction(FTy, "__tls_init", FI,
- SourceLocation(),
- /*TLS=*/true);
+ InitFunc = CGM.CreateGlobalInitOrCleanUpFunction(FTy, "__tls_init", FI,
+ SourceLocation(),
+ /*TLS=*/true);
llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
llvm::GlobalVariable::InternalLinkage,
llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard");
Guard->setThreadLocal(true);
+ Guard->setThreadLocalMode(CGM.GetDefaultLLVMTLSModel());
CharUnits GuardAlign = CharUnits::One();
Guard->setAlignment(GuardAlign.getAsAlign());
@@ -3008,6 +3116,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::SatUShortFract:
case BuiltinType::SatUFract:
case BuiltinType::SatULongFract:
+ case BuiltinType::BFloat16:
return false;
case BuiltinType::Dependent:
@@ -3200,9 +3309,11 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
llvm_unreachable("Pipe types shouldn't get here");
case Type::Builtin:
+ case Type::ExtInt:
// GCC treats vector and complex types as fundamental types.
case Type::Vector:
case Type::ExtVector:
+ case Type::ConstantMatrix:
case Type::Complex:
case Type::Atomic:
// FIXME: GCC treats block pointers as fundamental types?!
@@ -3277,17 +3388,32 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
break;
}
- llvm::Constant *VTable =
- CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);
+ llvm::Constant *VTable = nullptr;
+
+ // Check if the alias exists. If it doesn't, then get or create the global.
+ if (CGM.getItaniumVTableContext().isRelativeLayout())
+ VTable = CGM.getModule().getNamedAlias(VTableName);
+ if (!VTable)
+ VTable = CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);
+
CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts()));
llvm::Type *PtrDiffTy =
- CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
+ CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
// The vtable address point is 2.
- llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
- VTable =
- llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two);
+ if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+ // The vtable address point is 8 bytes after its start:
+ // 4 for the offset to top + 4 for the relative offset to rtti.
+ llvm::Constant *Eight = llvm::ConstantInt::get(CGM.Int32Ty, 8);
+ VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
+ VTable =
+ llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8Ty, VTable, Eight);
+ } else {
+ llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
+ VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable,
+ Two);
+ }
VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
Fields.push_back(VTable);
@@ -3438,6 +3564,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
case Type::Builtin:
case Type::Vector:
case Type::ExtVector:
+ case Type::ConstantMatrix:
case Type::Complex:
case Type::BlockPointer:
// Itanium C++ ABI 2.9.5p4:
@@ -3453,7 +3580,10 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
- llvm_unreachable("Pipe type shouldn't get here");
+ break;
+
+ case Type::ExtInt:
+ break;
case Type::ConstantArray:
case Type::IncompleteArray:
@@ -4401,3 +4531,70 @@ void WebAssemblyCXXABI::emitBeginCatch(CodeGenFunction &CGF,
NormalCleanup, cast<llvm::CatchPadInst>(CGF.CurrentFuncletPad));
ItaniumCXXABI::emitBeginCatch(CGF, C);
}
+
+/// Register a global destructor as best as we know how.
+void XLCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
+ llvm::FunctionCallee dtor,
+ llvm::Constant *addr) {
+ if (D.getTLSKind() != VarDecl::TLS_None)
+ llvm::report_fatal_error("thread local storage not yet implemented on AIX");
+
+ // Create __dtor function for the var decl.
+ llvm::Function *dtorStub = CGF.createAtExitStub(D, dtor, addr);
+
+ // Register above __dtor with atexit().
+ CGF.registerGlobalDtorWithAtExit(dtorStub);
+
+ // Emit __finalize function to unregister __dtor and (as appropriate) call
+ // __dtor.
+ emitCXXStermFinalizer(D, dtorStub, addr);
+}
+
+void XLCXXABI::emitCXXStermFinalizer(const VarDecl &D, llvm::Function *dtorStub,
+ llvm::Constant *addr) {
+ llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, false);
+ SmallString<256> FnName;
+ {
+ llvm::raw_svector_ostream Out(FnName);
+ getMangleContext().mangleDynamicStermFinalizer(&D, Out);
+ }
+
+ // Create the finalization action associated with a variable.
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
+ llvm::Function *StermFinalizer = CGM.CreateGlobalInitOrCleanUpFunction(
+ FTy, FnName.str(), FI, D.getLocation());
+
+ CodeGenFunction CGF(CGM);
+
+ CGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, StermFinalizer, FI,
+ FunctionArgList());
+
+ // The unatexit subroutine unregisters __dtor functions that were previously
+ // registered by the atexit subroutine. If the referenced function is found,
+ // the unatexit returns a value of 0, meaning that the cleanup is still
+ // pending (and we should call the __dtor function).
+ llvm::Value *V = CGF.unregisterGlobalDtorWithUnAtExit(dtorStub);
+
+ llvm::Value *NeedsDestruct = CGF.Builder.CreateIsNull(V, "needs_destruct");
+
+ llvm::BasicBlock *DestructCallBlock = CGF.createBasicBlock("destruct.call");
+ llvm::BasicBlock *EndBlock = CGF.createBasicBlock("destruct.end");
+
+ // Check if unatexit returns a value of 0. If it does, jump to
+ // DestructCallBlock, otherwise jump to EndBlock directly.
+ CGF.Builder.CreateCondBr(NeedsDestruct, DestructCallBlock, EndBlock);
+
+ CGF.EmitBlock(DestructCallBlock);
+
+ // Emit the call to dtorStub.
+ llvm::CallInst *CI = CGF.Builder.CreateCall(dtorStub);
+
+ // Make sure the call and the callee agree on calling convention.
+ CI->setCallingConv(dtorStub->getCallingConv());
+
+ CGF.EmitBlock(EndBlock);
+
+ CGF.FinishFunction();
+
+ CGM.AddCXXStermFinalizerEntry(StermFinalizer);
+}
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index aff46135705a..45c6cb6b2e0d 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -206,7 +206,7 @@ public:
// lacks a definition for the destructor, non-base destructors must always
// delegate to or alias the base destructor.
- AddedStructorArgs
+ AddedStructorArgCounts
buildStructorSignature(GlobalDecl GD,
SmallVectorImpl<CanQualType> &ArgTys) override;
@@ -253,10 +253,17 @@ public:
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- AddedStructorArgs
- addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
- CXXCtorType Type, bool ForVirtualBase,
- bool Delegating, CallArgList &Args) override;
+ AddedStructorArgs getImplicitConstructorArgs(CodeGenFunction &CGF,
+ const CXXConstructorDecl *D,
+ CXXCtorType Type,
+ bool ForVirtualBase,
+ bool Delegating) override;
+
+ llvm::Value *getCXXDestructorImplicitParam(CodeGenFunction &CGF,
+ const CXXDestructorDecl *DD,
+ CXXDtorType Type,
+ bool ForVirtualBase,
+ bool Delegating) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
@@ -1261,10 +1268,10 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
}
}
-CGCXXABI::AddedStructorArgs
+CGCXXABI::AddedStructorArgCounts
MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD,
SmallVectorImpl<CanQualType> &ArgTys) {
- AddedStructorArgs Added;
+ AddedStructorArgCounts Added;
// TODO: 'for base' flag
if (isa<CXXDestructorDecl>(GD.getDecl()) &&
GD.getDtorType() == Dtor_Deleting) {
@@ -1553,9 +1560,9 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
}
}
-CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
+CGCXXABI::AddedStructorArgs MicrosoftCXXABI::getImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
- bool ForVirtualBase, bool Delegating, CallArgList &Args) {
+ bool ForVirtualBase, bool Delegating) {
assert(Type == Ctor_Complete || Type == Ctor_Base);
// Check if we need a 'most_derived' parameter.
@@ -1570,13 +1577,16 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
} else {
MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
}
- RValue RV = RValue::get(MostDerivedArg);
if (FPT->isVariadic()) {
- Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy));
- return AddedStructorArgs::prefix(1);
+ return AddedStructorArgs::prefix({{MostDerivedArg, getContext().IntTy}});
}
- Args.add(RV, getContext().IntTy);
- return AddedStructorArgs::suffix(1);
+ return AddedStructorArgs::suffix({{MostDerivedArg, getContext().IntTy}});
+}
+
+llvm::Value *MicrosoftCXXABI::getCXXDestructorImplicitParam(
+ CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type,
+ bool ForVirtualBase, bool Delegating) {
+ return nullptr;
}
void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
@@ -1605,8 +1615,11 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF);
}
+ llvm::Value *Implicit =
+ getCXXDestructorImplicitParam(CGF, DD, Type, ForVirtualBase,
+ Delegating); // = nullptr
CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
- /*ImplicitParam=*/nullptr,
+ /*ImplicitParam=*/Implicit,
/*ImplicitParamTy=*/QualType(), nullptr);
if (BaseDtorEndBB) {
// Complete object handler should continue to be the remaining
@@ -1621,6 +1634,15 @@ void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
if (!CGM.getCodeGenOpts().LTOUnit)
return;
+ // TODO: Should VirtualFunctionElimination also be supported here?
+ // See similar handling in CodeGenModule::EmitVTableTypeMetadata.
+ if (CGM.getCodeGenOpts().WholeProgramVTables) {
+ llvm::GlobalObject::VCallVisibility TypeVis =
+ CGM.GetVCallVisibilityLevel(RD);
+ if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic)
+ VTable->setVCallVisibilityMetadata(TypeVis);
+ }
+
// The location of the first virtual function pointer in the virtual table,
// aka the "address point" on Itanium. This is at offset 0 if RTTI is
// disabled, or sizeof(void*) if RTTI is enabled.
@@ -1681,10 +1703,11 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
[](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
RTTI = getMSCompleteObjectLocator(RD, *Info);
- ConstantInitBuilder Builder(CGM);
- auto Components = Builder.beginStruct();
- CGVT.createVTableInitializer(Components, VTLayout, RTTI);
- Components.finishAndSetAsInitializer(VTable);
+ ConstantInitBuilder builder(CGM);
+ auto components = builder.beginStruct();
+ CGVT.createVTableInitializer(components, VTLayout, RTTI,
+ VTable->hasLocalLinkage());
+ components.finishAndSetAsInitializer(VTable);
emitVTableTypeMetadata(*Info, RD, VTable);
}
@@ -2341,7 +2364,7 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
if (!NonComdatInits.empty()) {
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
- llvm::Function *InitFunc = CGM.CreateGlobalInitOrDestructFunction(
+ llvm::Function *InitFunc = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, "__tls_init", CGM.getTypes().arrangeNullaryFunction(),
SourceLocation(), /*TLS=*/true);
CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, NonComdatInits);
@@ -2515,7 +2538,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
GuardVar->setComdat(
CGM.getModule().getOrInsertComdat(GuardVar->getName()));
if (D.getTLSKind())
- GuardVar->setThreadLocal(true);
+ CGM.setTLSMode(GuardVar, D);
if (GI && !HasPerVariableGuard)
GI->Guard = GuardVar;
}
@@ -3913,7 +3936,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
// Calculate the mangled name.
SmallString<256> ThunkName;
llvm::raw_svector_ostream Out(ThunkName);
- getMangleContext().mangleCXXCtor(CD, CT, Out);
+ getMangleContext().mangleName(GlobalDecl(CD, CT), Out);
// If the thunk has been generated previously, just return it.
if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
@@ -4000,7 +4023,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
// Insert any ABI-specific implicit constructor arguments.
- AddedStructorArgs ExtraArgs =
+ AddedStructorArgCounts ExtraArgs =
addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
/*ForVirtualBase=*/false,
/*Delegating=*/false, Args);
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 284e8022a3c4..0c7e5f4598f8 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -173,8 +173,8 @@ public:
// Prepare CGDebugInfo to emit debug info for a clang module.
auto *DI = Builder->getModuleDebugInfo();
StringRef ModuleName = llvm::sys::path::filename(MainFileName);
- DI->setPCHDescriptor({ModuleName, "", OutputFileName,
- ASTFileSignature{{{~0U, ~0U, ~0U, ~0U, ~1U}}}});
+ DI->setPCHDescriptor(
+ {ModuleName, "", OutputFileName, ASTFileSignature::createDISentinel()});
DI->setModuleMap(MMap);
}
diff --git a/clang/lib/CodeGen/PatternInit.cpp b/clang/lib/CodeGen/PatternInit.cpp
index 3410c7f21533..26ac8b63a9ba 100644
--- a/clang/lib/CodeGen/PatternInit.cpp
+++ b/clang/lib/CodeGen/PatternInit.cpp
@@ -8,6 +8,7 @@
#include "PatternInit.h"
#include "CodeGenModule.h"
+#include "clang/Basic/TargetInfo.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Type.h"
@@ -33,17 +34,15 @@ llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM,
constexpr bool NegativeNaN = true;
constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull;
if (Ty->isIntOrIntVectorTy()) {
- unsigned BitWidth = cast<llvm::IntegerType>(
- Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
- ->getBitWidth();
+ unsigned BitWidth =
+ cast<llvm::IntegerType>(Ty->getScalarType())->getBitWidth();
if (BitWidth <= 64)
return llvm::ConstantInt::get(Ty, IntValue);
return llvm::ConstantInt::get(
Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, IntValue)));
}
if (Ty->isPtrOrPtrVectorTy()) {
- auto *PtrTy = cast<llvm::PointerType>(
- Ty->isVectorTy() ? Ty->getVectorElementType() : Ty);
+ auto *PtrTy = cast<llvm::PointerType>(Ty->getScalarType());
unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth(
PtrTy->getAddressSpace());
if (PtrWidth > 64)
@@ -54,8 +53,7 @@ llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM,
}
if (Ty->isFPOrFPVectorTy()) {
unsigned BitWidth = llvm::APFloat::semanticsSizeInBits(
- (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
- ->getFltSemantics());
+ Ty->getScalarType()->getFltSemantics());
llvm::APInt Payload(64, NaNPayload);
if (BitWidth >= 64)
Payload = llvm::APInt::getSplat(BitWidth, Payload);
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 24ae6c6e362f..cdf83370c41f 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -13,6 +13,7 @@
#include "CodeGenModule.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Type.h"
+#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
@@ -30,16 +31,16 @@ static bool isAsanHwasanOrMemTag(const SanitizerSet& SS) {
void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
SourceLocation Loc, StringRef Name,
QualType Ty, bool IsDynInit,
- bool IsBlacklisted) {
+ bool IsExcluded) {
if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
return;
IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init");
- IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
+ IsExcluded |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
llvm::Metadata *LocDescr = nullptr;
llvm::Metadata *GlobalName = nullptr;
llvm::LLVMContext &VMContext = CGM.getLLVMContext();
- if (!IsBlacklisted) {
+ if (!IsExcluded) {
// Don't generate source location and global name if it is blacklisted -
// it won't be instrumented anyway.
LocDescr = getLocationMetadata(Loc);
@@ -52,7 +53,7 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsDynInit)),
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- llvm::Type::getInt1Ty(VMContext), IsBlacklisted))};
+ llvm::Type::getInt1Ty(VMContext), IsExcluded))};
llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalMetadata);
llvm::NamedMDNode *AsanGlobals =
@@ -68,12 +69,12 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
llvm::raw_string_ostream OS(QualName);
D.printQualifiedName(OS);
- bool IsBlacklisted = false;
+ bool IsExcluded = false;
for (auto Attr : D.specific_attrs<NoSanitizeAttr>())
if (Attr->getMask() & SanitizerKind::Address)
- IsBlacklisted = true;
+ IsExcluded = true;
reportGlobalToASan(GV, D.getLocation(), OS.str(), D.getType(), IsDynInit,
- IsBlacklisted);
+ IsExcluded);
}
void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
diff --git a/clang/lib/CodeGen/SanitizerMetadata.h b/clang/lib/CodeGen/SanitizerMetadata.h
index 7ffac4360d9c..440a54590acc 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.h
+++ b/clang/lib/CodeGen/SanitizerMetadata.h
@@ -40,7 +40,7 @@ public:
bool IsDynInit = false);
void reportGlobalToASan(llvm::GlobalVariable *GV, SourceLocation Loc,
StringRef Name, QualType Ty, bool IsDynInit = false,
- bool IsBlacklisted = false);
+ bool IsExcluded = false);
void disableSanitizerForGlobal(llvm::GlobalVariable *GV);
void disableSanitizerForInstruction(llvm::Instruction *I);
private:
diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp
index 8bce93b71c0c..3d7421ac2e16 100644
--- a/clang/lib/CodeGen/SwiftCallingConv.cpp
+++ b/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -694,7 +694,7 @@ swiftcall::splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
// Try to split the vector type in half.
if (numElts >= 4 && isPowerOf2(numElts)) {
if (isLegalVectorType(CGM, vectorSize / 2, eltTy, numElts / 2))
- return {llvm::VectorType::get(eltTy, numElts / 2), 2};
+ return {llvm::FixedVectorType::get(eltTy, numElts / 2), 2};
}
return {eltTy, numElts};
@@ -747,7 +747,8 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize,
// Add the right number of vectors of this size.
auto numVecs = numElts >> logCandidateNumElts;
- components.append(numVecs, llvm::VectorType::get(eltTy, candidateNumElts));
+ components.append(numVecs,
+ llvm::FixedVectorType::get(eltTy, candidateNumElts));
numElts -= (numVecs << logCandidateNumElts);
if (numElts == 0) return;
@@ -757,7 +758,7 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize,
// This only needs to be separately checked if it's not a power of 2.
if (numElts > 2 && !isPowerOf2(numElts) &&
isLegalVectorType(CGM, eltSize * numElts, eltTy, numElts)) {
- components.push_back(llvm::VectorType::get(eltTy, numElts));
+ components.push_back(llvm::FixedVectorType::get(eltTy, numElts));
return;
}
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 682ef18da73b..9cd63ebe29ee 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -28,6 +29,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm> // std::sort
@@ -96,6 +98,17 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
return Address::invalid();
}
+bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
+ if (Ty->isPromotableIntegerType())
+ return true;
+
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy))
+ return true;
+
+ return false;
+}
+
ABIInfo::~ABIInfo() {}
/// Does the given lowering require more than the given number of
@@ -384,7 +397,7 @@ static Address emitMergePHI(CodeGenFunction &CGF,
return Address(PHI, Align);
}
-TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; }
+TargetCodeGenInfo::~TargetCodeGenInfo() = default;
// If someone can figure out a general rule for this, that would be great.
// It's probably just doomed to be platform-dependent, though.
@@ -486,11 +499,15 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
// Constant arrays of empty records count as empty, strip them off.
// Constant arrays of zero length always count as empty.
+ bool WasArray = false;
if (AllowArrays)
while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
if (AT->getSize() == 0)
return true;
FT = AT->getElementType();
+ // The [[no_unique_address]] special case below does not apply to
+ // arrays of C++ empty records, so we need to remember this fact.
+ WasArray = true;
}
const RecordType *RT = FT->getAs<RecordType>();
@@ -501,7 +518,14 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
//
// FIXME: We should use a predicate for whether this behavior is true in the
// current ABI.
- if (isa<CXXRecordDecl>(RT->getDecl()))
+ //
+ // The exception to the above rule are fields marked with the
+ // [[no_unique_address]] attribute (since C++20). Those do count as empty
+ // according to the Itanium ABI. The exception applies only to records,
+ // not arrays of records, so we must also check whether we stripped off an
+ // array type above.
+ if (isa<CXXRecordDecl>(RT->getDecl()) &&
+ (WasArray || !FD->hasAttr<NoUniqueAddressAttr>()))
return false;
return isEmptyRecord(Context, FT, AllowArrays);
@@ -681,7 +705,7 @@ public:
class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
public:
DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
};
ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
@@ -700,8 +724,16 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ ASTContext &Context = getContext();
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() >
+ Context.getTypeSize(Context.getTargetInfo().hasInt128Type()
+ ? Context.Int128Ty
+ : Context.LongLongTy))
+ return getNaturalAlignIndirect(Ty);
+
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
@@ -715,8 +747,15 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect());
+ if (const auto *EIT = RetTy->getAs<ExtIntType>())
+ if (EIT->getNumBits() >
+ getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type()
+ ? getContext().Int128Ty
+ : getContext().LongLongTy))
+ return getNaturalAlignIndirect(RetTy);
+
+ return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
//===----------------------------------------------------------------------===//
@@ -726,11 +765,19 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
//===----------------------------------------------------------------------===//
class WebAssemblyABIInfo final : public SwiftABIInfo {
+public:
+ enum ABIKind {
+ MVP = 0,
+ ExperimentalMV = 1,
+ };
+
+private:
DefaultABIInfo defaultInfo;
+ ABIKind Kind;
public:
- explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT)
- : SwiftABIInfo(CGT), defaultInfo(CGT) {}
+ explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
+ : SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
@@ -761,8 +808,9 @@ private:
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
- explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {}
+ explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
+ WebAssemblyABIInfo::ABIKind K)
+ : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
@@ -813,6 +861,20 @@ ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
// though watch out for things like bitfields.
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+ // For the experimental multivalue ABI, fully expand all other aggregates
+ if (Kind == ABIKind::ExperimentalMV) {
+ const RecordType *RT = Ty->getAs<RecordType>();
+ assert(RT);
+ bool HasBitField = false;
+ for (auto *Field : RT->getDecl()->fields()) {
+ if (Field->isBitField()) {
+ HasBitField = true;
+ break;
+ }
+ }
+ if (!HasBitField)
+ return ABIArgInfo::getExpand();
+ }
}
// Otherwise just do the default thing.
@@ -832,6 +894,9 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
// ABIArgInfo::getDirect().
if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+ // For the experimental multivalue ABI, return all other aggregates
+ if (Kind == ABIKind::ExperimentalMV)
+ return ABIArgInfo::getDirect();
}
}
@@ -871,8 +936,8 @@ class PNaClABIInfo : public ABIInfo {
class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
public:
- PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : TargetCodeGenInfo(new PNaClABIInfo(CGT)) {}
+ PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+ : TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {}
};
void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -906,10 +971,15 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
} else if (Ty->isFloatingType()) {
// Floating-point types don't go inreg.
return ABIArgInfo::getDirect();
+ } else if (const auto *EIT = Ty->getAs<ExtIntType>()) {
+ // Treat extended integers as integers if <=64, otherwise pass indirectly.
+ if (EIT->getNumBits() > 64)
+ return getNaturalAlignIndirect(Ty);
+ return ABIArgInfo::getDirect();
}
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
@@ -920,12 +990,19 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
if (isAggregateTypeForABI(RetTy))
return getNaturalAlignIndirect(RetTy);
+ // Treat extended integers as integers if <=64, otherwise pass indirectly.
+ if (const auto *EIT = RetTy->getAs<ExtIntType>()) {
+ if (EIT->getNumBits() > 64)
+ return getNaturalAlignIndirect(RetTy);
+ return ABIArgInfo::getDirect();
+ }
+
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect());
+ return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
/// IsX86_MMXType - Return true if this is an MMX type.
@@ -943,7 +1020,8 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
.Cases("y", "&y", "^Ym", true)
.Default(false);
if (IsMMXCons && Ty->isVectorTy()) {
- if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) {
+ if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() !=
+ 64) {
// Invalid MMX constraint
return nullptr;
}
@@ -1112,7 +1190,7 @@ public:
X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
bool RetSmallStructInRegABI, bool Win32StructABI,
unsigned NumRegisterParameters, bool SoftFloatABI)
- : TargetCodeGenInfo(new X86_32ABIInfo(
+ : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>(
CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
NumRegisterParameters, SoftFloatABI)) {}
@@ -1412,8 +1490,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
// registers and we need to make sure to pick a type the LLVM
// backend will like.
if (Size == 128)
- return ABIArgInfo::getDirect(llvm::VectorType::get(
- llvm::Type::getInt64Ty(getVMContext()), 2));
+ return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
+ llvm::Type::getInt64Ty(getVMContext()), 2));
// Always return in register if it fits in a general purpose
// register, or if it is 64 bits and has a single element.
@@ -1470,15 +1548,19 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect());
+ if (const auto *EIT = RetTy->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 64)
+ return getIndirectReturnResult(RetTy, State);
+
+ return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
-static bool isSSEVectorType(ASTContext &Context, QualType Ty) {
+static bool isSIMDVectorType(ASTContext &Context, QualType Ty) {
return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128;
}
-static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) {
+static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) {
const RecordType *RT = Ty->getAs<RecordType>();
if (!RT)
return 0;
@@ -1487,16 +1569,16 @@ static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) {
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const auto &I : CXXRD->bases())
- if (!isRecordWithSSEVectorType(Context, I.getType()))
+ if (!isRecordWithSIMDVectorType(Context, I.getType()))
return false;
for (const auto *i : RD->fields()) {
QualType FT = i->getType();
- if (isSSEVectorType(Context, FT))
+ if (isSIMDVectorType(Context, FT))
return true;
- if (isRecordWithSSEVectorType(Context, FT))
+ if (isRecordWithSIMDVectorType(Context, FT))
return true;
}
@@ -1517,8 +1599,8 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
}
// Otherwise, if the type contains an SSE vector type, the alignment is 16.
- if (Align >= 16 && (isSSEVectorType(getContext(), Ty) ||
- isRecordWithSSEVectorType(getContext(), Ty)))
+ if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) ||
+ isRecordWithSIMDVectorType(getContext(), Ty)))
return 16;
return MinABIStackAlignInBytes;
@@ -1661,7 +1743,7 @@ void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) c
isHomogeneousAggregate(Ty, Base, NumElts)) {
if (State.FreeSSERegs >= NumElts) {
State.FreeSSERegs -= NumElts;
- Args[I].info = ABIArgInfo::getDirect();
+ Args[I].info = ABIArgInfo::getDirectInReg();
State.IsPreassigned.set(I);
}
}
@@ -1676,6 +1758,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
Ty = useFirstFieldIfTransparentUnion(Ty);
+ TypeInfo TI = getContext().getTypeInfo(Ty);
// Check with the C++ ABI first.
const RecordType *RT = Ty->getAs<RecordType>();
@@ -1725,7 +1808,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
bool NeedsPadding = false;
bool InReg;
if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
- unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
+ unsigned SizeInRegs = (TI.Width + 31) / 32;
SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
if (InReg)
@@ -1735,14 +1818,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
+ // Pass over-aligned aggregates on Windows indirectly. This behavior was
+ // added in MSVC 2015.
+ if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+
// Expand small (<= 128-bit) record types when we know that the stack layout
// of those arguments will match the struct. This is important because the
// LLVM backend isn't smart enough to remove byval, which inhibits many
// optimizations.
// Don't do this for the MCU if there are still free integer registers
// (see X86_64 ABI for full explanation).
- if (getContext().getTypeSize(Ty) <= 4 * 32 &&
- (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
+ if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
+ canExpandIndirectArgument(Ty))
return ABIArgInfo::getExpandWithPadding(
IsFastCall || IsVectorCall || IsRegCall, PaddingType);
@@ -1750,14 +1838,24 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // On Windows, vectors are passed directly if registers are available, or
+ // indirectly if not. This avoids the need to align argument memory. Pass
+ // user-defined vector types larger than 512 bits indirectly for simplicity.
+ if (IsWin32StructABI) {
+ if (TI.Width <= 512 && State.FreeSSERegs > 0) {
+ --State.FreeSSERegs;
+ return ABIArgInfo::getDirectInReg();
+ }
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+
// On Darwin, some vectors are passed in memory, we handle this by passing
// it as an i8/i16/i32/i64.
if (IsDarwinVectorABI) {
- uint64_t Size = getContext().getTypeSize(Ty);
- if ((Size == 8 || Size == 16 || Size == 32) ||
- (Size == 64 && VT->getNumElements() == 1))
- return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
- Size));
+ if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
+ (TI.Width == 64 && VT->getNumElements() == 1))
+ return ABIArgInfo::getDirect(
+ llvm::IntegerType::get(getVMContext(), TI.Width));
}
if (IsX86_MMXType(CGT.ConvertType(Ty)))
@@ -1772,12 +1870,21 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
bool InReg = shouldPrimitiveUseInReg(Ty, State);
- if (Ty->isPromotableIntegerType()) {
+ if (isPromotableIntegerTypeForABI(Ty)) {
if (InReg)
return ABIArgInfo::getExtendInReg(Ty);
return ABIArgInfo::getExtend(Ty);
}
+ if (const auto * EIT = Ty->getAs<ExtIntType>()) {
+ if (EIT->getNumBits() <= 64) {
+ if (InReg)
+ return ABIArgInfo::getDirectInReg();
+ return ABIArgInfo::getDirect();
+ }
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+
if (InReg)
return ABIArgInfo::getDirectInReg();
return ABIArgInfo::getDirect();
@@ -1787,9 +1894,10 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
CCState State(FI);
if (IsMCUABI)
State.FreeRegs = 3;
- else if (State.CC == llvm::CallingConv::X86_FastCall)
+ else if (State.CC == llvm::CallingConv::X86_FastCall) {
State.FreeRegs = 2;
- else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ State.FreeSSERegs = 3;
+ } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
State.FreeRegs = 2;
State.FreeSSERegs = 6;
} else if (FI.getHasRegParm())
@@ -1797,6 +1905,11 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
else if (State.CC == llvm::CallingConv::X86_RegCall) {
State.FreeRegs = 5;
State.FreeSSERegs = 8;
+ } else if (IsWin32StructABI) {
+ // Since MSVC 2015, the first three SSE vectors have been passed in
+ // registers. The rest are passed indirectly.
+ State.FreeRegs = DefaultNumRegisterParameters;
+ State.FreeSSERegs = 3;
} else
State.FreeRegs = DefaultNumRegisterParameters;
@@ -1843,16 +1956,25 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const {
// Arguments are always 4-byte-aligned.
- CharUnits FieldAlign = CharUnits::fromQuantity(4);
+ CharUnits WordSize = CharUnits::fromQuantity(4);
+ assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
- assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
- Info = ABIArgInfo::getInAlloca(FrameFields.size());
- FrameFields.push_back(CGT.ConvertTypeForMem(Type));
- StackOffset += getContext().getTypeSizeInChars(Type);
+ // sret pointers and indirect things will require an extra pointer
+ // indirection, unless they are byval. Most things are byval, and will not
+ // require this indirection.
+ bool IsIndirect = false;
+ if (Info.isIndirect() && !Info.getIndirectByVal())
+ IsIndirect = true;
+ Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
+ llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
+ if (IsIndirect)
+ LLTy = LLTy->getPointerTo(0);
+ FrameFields.push_back(LLTy);
+ StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
// Insert padding bytes to respect alignment.
CharUnits FieldEnd = StackOffset;
- StackOffset = FieldEnd.alignTo(FieldAlign);
+ StackOffset = FieldEnd.alignTo(WordSize);
if (StackOffset != FieldEnd) {
CharUnits NumBytes = StackOffset - FieldEnd;
llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1866,16 +1988,12 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
switch (Info.getKind()) {
case ABIArgInfo::InAlloca:
return true;
- case ABIArgInfo::Indirect:
- assert(Info.getIndirectByVal());
- return true;
case ABIArgInfo::Ignore:
return false;
+ case ABIArgInfo::Indirect:
case ABIArgInfo::Direct:
case ABIArgInfo::Extend:
- if (Info.getInReg())
- return false;
- return true;
+ return !Info.getInReg();
case ABIArgInfo::Expand:
case ABIArgInfo::CoerceAndExpand:
// These are aggregate types which are never passed in registers when
@@ -1909,8 +2027,7 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
// Put the sret parameter into the inalloca struct if it's in memory.
if (Ret.isIndirect() && !Ret.getInReg()) {
- CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
- addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
+ addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
// On Windows, the hidden sret parameter is always returned in eax.
Ret.setInAllocaSRet(IsWin32StructABI);
}
@@ -2207,7 +2324,7 @@ public:
if (info.isDirect()) {
llvm::Type *ty = info.getCoerceToType();
if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
- return (vectorTy->getBitWidth() > 128);
+ return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128;
}
return false;
}
@@ -2280,7 +2397,7 @@ private:
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
- : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {}
+ : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {}
const X86_64ABIInfo &getABIInfo() const {
return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
@@ -2361,8 +2478,110 @@ public:
}
}
}
+
+ void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
+ const FunctionDecl *Caller,
+ const FunctionDecl *Callee,
+ const CallArgList &Args) const override;
};
+static void initFeatureMaps(const ASTContext &Ctx,
+ llvm::StringMap<bool> &CallerMap,
+ const FunctionDecl *Caller,
+ llvm::StringMap<bool> &CalleeMap,
+ const FunctionDecl *Callee) {
+ if (CalleeMap.empty() && CallerMap.empty()) {
+ // The caller is potentially nullptr in the case where the call isn't in a
+ // function. In this case, the getFunctionFeatureMap ensures we just get
+ // the TU level setting (since it cannot be modified by 'target'..
+ Ctx.getFunctionFeatureMap(CallerMap, Caller);
+ Ctx.getFunctionFeatureMap(CalleeMap, Callee);
+ }
+}
+
+static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
+ SourceLocation CallLoc,
+ const llvm::StringMap<bool> &CallerMap,
+ const llvm::StringMap<bool> &CalleeMap,
+ QualType Ty, StringRef Feature,
+ bool IsArgument) {
+ bool CallerHasFeat = CallerMap.lookup(Feature);
+ bool CalleeHasFeat = CalleeMap.lookup(Feature);
+ if (!CallerHasFeat && !CalleeHasFeat)
+ return Diag.Report(CallLoc, diag::warn_avx_calling_convention)
+ << IsArgument << Ty << Feature;
+
+ // Mixing calling conventions here is very clearly an error.
+ if (!CallerHasFeat || !CalleeHasFeat)
+ return Diag.Report(CallLoc, diag::err_avx_calling_convention)
+ << IsArgument << Ty << Feature;
+
+ // Else, both caller and callee have the required feature, so there is no need
+ // to diagnose.
+ return false;
+}
+
+static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
+ SourceLocation CallLoc,
+ const llvm::StringMap<bool> &CallerMap,
+ const llvm::StringMap<bool> &CalleeMap, QualType Ty,
+ bool IsArgument) {
+ uint64_t Size = Ctx.getTypeSize(Ty);
+ if (Size > 256)
+ return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
+ "avx512f", IsArgument);
+
+ if (Size > 128)
+ return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",
+ IsArgument);
+
+ return false;
+}
+
+void X86_64TargetCodeGenInfo::checkFunctionCallABI(
+ CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+ const FunctionDecl *Callee, const CallArgList &Args) const {
+ llvm::StringMap<bool> CallerMap;
+ llvm::StringMap<bool> CalleeMap;
+ unsigned ArgIndex = 0;
+
+ // We need to loop through the actual call arguments rather than the the
+ // function's parameters, in case this variadic.
+ for (const CallArg &Arg : Args) {
+ // The "avx" feature changes how vectors >128 in size are passed. "avx512f"
+ // additionally changes how vectors >256 in size are passed. Like GCC, we
+ // warn when a function is called with an argument where this will change.
+ // Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
+ // the caller and callee features are mismatched.
+ // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
+ // change its ABI with attribute-target after this call.
+ if (Arg.getType()->isVectorType() &&
+ CGM.getContext().getTypeSize(Arg.getType()) > 128) {
+ initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
+ QualType Ty = Arg.getType();
+ // The CallArg seems to have desugared the type already, so for clearer
+ // diagnostics, replace it with the type in the FunctionDecl if possible.
+ if (ArgIndex < Callee->getNumParams())
+ Ty = Callee->getParamDecl(ArgIndex)->getType();
+
+ if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
+ CalleeMap, Ty, /*IsArgument*/ true))
+ return;
+ }
+ ++ArgIndex;
+ }
+
+ // Check return always, as we don't have a good way of knowing in codegen
+ // whether this value is used, tail-called, etc.
+ if (Callee->getReturnType()->isVectorType() &&
+ CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) {
+ initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
+ checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
+ CalleeMap, Callee->getReturnType(),
+ /*IsArgument*/ false);
+ }
+}
+
static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
// If the argument does not end in .lib, automatically add the suffix.
// If the argument contains a space, enclose it in quotes.
@@ -2424,7 +2643,7 @@ class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
X86AVXABILevel AVXLevel)
- : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT, AVXLevel)) {}
+ : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
@@ -2731,6 +2950,15 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
return;
}
+ if (const auto *EITy = Ty->getAs<ExtIntType>()) {
+ if (EITy->getNumBits() <= 64)
+ Current = Integer;
+ else if (EITy->getNumBits() <= 128)
+ Lo = Hi = Integer;
+ // Larger values need to get passed in memory.
+ return;
+ }
+
if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
// Arrays are treated like structures.
@@ -2905,8 +3133,11 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ if (Ty->isExtIntType())
+ return getNaturalAlignIndirect(Ty);
+
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
return getNaturalAlignIndirect(Ty);
@@ -2938,13 +3169,14 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
// the argument in the free register. This does not seem to happen currently,
// but this code would be much safer if we could mark the argument with
// 'onstack'. See PR12193.
- if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) {
+ if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) &&
+ !Ty->isExtIntType()) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -3001,11 +3233,11 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
// Don't pass vXi128 vectors in their native type, the backend can't
// legalize them.
if (passInt128VectorsInMem() &&
- IRType->getVectorElementType()->isIntegerTy(128)) {
+ cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) {
// Use a vXi64 vector.
uint64_t Size = getContext().getTypeSize(Ty);
- return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()),
- Size / 64);
+ return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()),
+ Size / 64);
}
return IRType;
@@ -3020,8 +3252,8 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
// Return a LLVM IR vector type based on the size of 'Ty'.
- return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
- Size / 64);
+ return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()),
+ Size / 64);
}
/// BitsContainNoUserData - Return true if the specified [start,end) bit range
@@ -3155,7 +3387,8 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
// case.
if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) &&
ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout()))
- return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2);
+ return llvm::FixedVectorType::get(llvm::Type::getFloatTy(getVMContext()),
+ 2);
return llvm::Type::getDoubleTy(getVMContext());
}
@@ -3326,7 +3559,7 @@ classifyReturnType(QualType RetTy) const {
RetTy = EnumTy->getDecl()->getIntegerType();
if (RetTy->isIntegralOrEnumerationType() &&
- RetTy->isPromotableIntegerType())
+ isPromotableIntegerTypeForABI(RetTy))
return ABIArgInfo::getExtend(RetTy);
}
break;
@@ -3471,7 +3704,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
Ty = EnumTy->getDecl()->getIntegerType();
if (Ty->isIntegralOrEnumerationType() &&
- Ty->isPromotableIntegerType())
+ isPromotableIntegerTypeForABI(Ty))
return ABIArgInfo::getExtend(Ty);
}
@@ -3627,14 +3860,15 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
} else {
FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
}
- } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) {
+ } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
+ getContext().getCanonicalType(FI.getReturnType()
+ ->getAs<ComplexType>()
+ ->getElementType()) ==
+ getContext().LongDoubleTy)
// Complex Long Double Type is passed in Memory when Regcall
// calling convention is used.
- const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>();
- if (getContext().getCanonicalType(CT->getElementType()) ==
- getContext().LongDoubleTy)
- FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
- } else
+ FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
+ else
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
}
@@ -4021,14 +4255,25 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
// Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
// Clang matches them for compatibility.
- return ABIArgInfo::getDirect(
- llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2));
+ return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
+ llvm::Type::getInt64Ty(getVMContext()), 2));
default:
break;
}
}
+ if (Ty->isExtIntType()) {
+ // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
+ // not 1, 2, 4, or 8 bytes, must be passed by reference."
+ // However, non-power-of-two _ExtInts will be passed as 1,2,4 or 8 bytes
+ // anyway as long is it fits in them, so we don't have to check the power of
+ // 2.
+ if (Width <= 64)
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ }
+
return ABIArgInfo::getDirect();
}
@@ -4118,17 +4363,247 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
/*allowHigherAlign*/ false);
}
+static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *Address, bool Is64Bit,
+ bool IsAIX) {
+ // This is calculated from the LLVM and GCC tables and verified
+ // against gcc output. AFAIK all PPC ABIs use the same encoding.
+
+ CodeGen::CGBuilderTy &Builder = CGF.Builder;
+
+ llvm::IntegerType *i8 = CGF.Int8Ty;
+ llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
+ llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
+ llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
+
+ // 0-31: r0-31, the 4-byte or 8-byte general-purpose registers
+ AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31);
+
+ // 32-63: fp0-31, the 8-byte floating-point registers
+ AssignToArrayRange(Builder, Address, Eight8, 32, 63);
+
+ // 64-67 are various 4-byte or 8-byte special-purpose registers:
+ // 64: mq
+ // 65: lr
+ // 66: ctr
+ // 67: ap
+ AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67);
+
+ // 68-76 are various 4-byte special-purpose registers:
+ // 68-75 cr0-7
+ // 76: xer
+ AssignToArrayRange(Builder, Address, Four8, 68, 76);
+
+ // 77-108: v0-31, the 16-byte vector registers
+ AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
+
+ // 109: vrsave
+ // 110: vscr
+ AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110);
+
+ // AIX does not utilize the rest of the registers.
+ if (IsAIX)
+ return false;
+
+ // 111: spe_acc
+ // 112: spefscr
+ // 113: sfp
+ AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113);
+
+ if (!Is64Bit)
+ return false;
+
+ // TODO: Need to verify if these registers are used on 64 bit AIX with Power8
+ // or above CPU.
+ // 64-bit only registers:
+ // 114: tfhar
+ // 115: tfiar
+ // 116: texasr
+ AssignToArrayRange(Builder, Address, Eight8, 114, 116);
+
+ return false;
+}
+
+// AIX
+namespace {
+/// AIXABIInfo - The AIX XCOFF ABI information.
+class AIXABIInfo : public ABIInfo {
+ const bool Is64Bit;
+ const unsigned PtrByteSize;
+ CharUnits getParamTypeAlignment(QualType Ty) const;
+
+public:
+ AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
+ : ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {}
+
+ bool isPromotableTypeForABI(QualType Ty) const;
+
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyArgumentType(QualType Ty) const;
+
+ void computeInfo(CGFunctionInfo &FI) const override {
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
+ }
+
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+};
+
+class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
+ const bool Is64Bit;
+
+public:
+ AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
+ : TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)),
+ Is64Bit(Is64Bit) {}
+ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
+ return 1; // r1 is the dedicated stack pointer
+ }
+
+ bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *Address) const override;
+};
+} // namespace
+
+// Return true if the ABI requires Ty to be passed sign- or zero-
+// extended to 32/64 bits.
+bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const {
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ // Promotable integer types are required to be promoted by the ABI.
+ if (Ty->isPromotableIntegerType())
+ return true;
+
+ if (!Is64Bit)
+ return false;
+
+ // For 64 bit mode, in addition to the usual promotable integer types, we also
+ // need to extend all 32-bit types, since the ABI requires promotion to 64
+ // bits.
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
+ switch (BT->getKind()) {
+ case BuiltinType::Int:
+ case BuiltinType::UInt:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isAnyComplexType())
+ llvm::report_fatal_error("complex type is not supported on AIX yet");
+
+ if (RetTy->isVectorType())
+ llvm::report_fatal_error("vector type is not supported on AIX yet");
+
+ if (RetTy->isVoidType())
+ return ABIArgInfo::getIgnore();
+
+ // TODO: Evaluate if AIX power alignment rule would have an impact on the
+ // alignment here.
+ if (isAggregateTypeForABI(RetTy))
+ return getNaturalAlignIndirect(RetTy);
+
+ return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
+}
+
+ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const {
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ if (Ty->isAnyComplexType())
+ llvm::report_fatal_error("complex type is not supported on AIX yet");
+
+ if (Ty->isVectorType())
+ llvm::report_fatal_error("vector type is not supported on AIX yet");
+
+ // TODO: Evaluate if AIX power alignment rule would have an impact on the
+ // alignment here.
+ if (isAggregateTypeForABI(Ty)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // passed by value.
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+ CharUnits CCAlign = getParamTypeAlignment(Ty);
+ CharUnits TyAlign = getContext().getTypeAlignInChars(Ty);
+
+ return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true,
+ /*Realign*/ TyAlign > CCAlign);
+ }
+
+ return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
+}
+
+CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const {
+ if (Ty->isAnyComplexType())
+ llvm::report_fatal_error("complex type is not supported on AIX yet");
+
+ if (Ty->isVectorType())
+ llvm::report_fatal_error("vector type is not supported on AIX yet");
+
+ // If the structure contains a vector type, the alignment is 16.
+ if (isRecordWithSIMDVectorType(getContext(), Ty))
+ return CharUnits::fromQuantity(16);
+
+ return CharUnits::fromQuantity(PtrByteSize);
+}
+
+Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ if (Ty->isAnyComplexType())
+ llvm::report_fatal_error("complex type is not supported on AIX yet");
+
+ if (Ty->isVectorType())
+ llvm::report_fatal_error("vector type is not supported on AIX yet");
+
+ auto TypeInfo = getContext().getTypeInfoInChars(Ty);
+ TypeInfo.second = getParamTypeAlignment(Ty);
+
+ CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);
+
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
+ SlotSize, /*AllowHigher*/ true);
+}
+
+bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable(
+ CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const {
+ return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true);
+}
+
// PowerPC-32
namespace {
/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
bool IsSoftFloatABI;
+ bool IsRetSmallStructInRegABI;
CharUnits getParamTypeAlignment(QualType Ty) const;
public:
- PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI)
- : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {}
+ PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI,
+ bool RetSmallStructInRegABI)
+ : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI),
+ IsRetSmallStructInRegABI(RetSmallStructInRegABI) {}
+
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+
+ void computeInfo(CGFunctionInfo &FI) const override {
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
+ }
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
@@ -4136,8 +4611,13 @@ public:
class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
public:
- PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI)
- : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {}
+ PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI,
+ bool RetSmallStructInRegABI)
+ : TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>(
+ CGT, SoftFloatABI, RetSmallStructInRegABI)) {}
+
+ static bool isStructReturnInRegABI(const llvm::Triple &Triple,
+ const CodeGenOptions &Opts);
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
@@ -4150,7 +4630,7 @@ public:
}
CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
- // Complex types are passed just like their elements
+ // Complex types are passed just like their elements.
if (const ComplexType *CTy = Ty->getAs<ComplexType>())
Ty = CTy->getElementType();
@@ -4173,6 +4653,34 @@ CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
return CharUnits::fromQuantity(4);
}
+ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
+ uint64_t Size;
+
+ // -msvr4-struct-return puts small aggregates in GPR3 and GPR4.
+ if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI &&
+ (Size = getContext().getTypeSize(RetTy)) <= 64) {
+ // System V ABI (1995), page 3-22, specified:
+ // > A structure or union whose size is less than or equal to 8 bytes
+ // > shall be returned in r3 and r4, as if it were first stored in the
+ // > 8-byte aligned memory area and then the low addressed word were
+ // > loaded into r3 and the high-addressed word into r4. Bits beyond
+ // > the last member of the structure or union are not defined.
+ //
+ // GCC for big-endian PPC32 inserts the pad before the first member,
+ // not "beyond the last member" of the struct. To stay compatible
+ // with GCC, we coerce the struct to an integer of the same size.
+ // LLVM will extend it and return i32 in r3, or i64 in r3:r4.
+ if (Size == 0)
+ return ABIArgInfo::getIgnore();
+ else {
+ llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size);
+ return ABIArgInfo::getDirect(CoerceTy);
+ }
+ }
+
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
// TODO: this implementation is now likely redundant with
// DefaultABIInfo::EmitVAArg.
Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
@@ -4328,47 +4836,32 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
return Result;
}
-bool
-PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const {
- // This is calculated from the LLVM and GCC tables and verified
- // against gcc output. AFAIK all ABIs use the same encoding.
-
- CodeGen::CGBuilderTy &Builder = CGF.Builder;
-
- llvm::IntegerType *i8 = CGF.Int8Ty;
- llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
- llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
- llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
-
- // 0-31: r0-31, the 4-byte general-purpose registers
- AssignToArrayRange(Builder, Address, Four8, 0, 31);
-
- // 32-63: fp0-31, the 8-byte floating-point registers
- AssignToArrayRange(Builder, Address, Eight8, 32, 63);
-
- // 64-76 are various 4-byte special-purpose registers:
- // 64: mq
- // 65: lr
- // 66: ctr
- // 67: ap
- // 68-75 cr0-7
- // 76: xer
- AssignToArrayRange(Builder, Address, Four8, 64, 76);
+bool PPC32TargetCodeGenInfo::isStructReturnInRegABI(
+ const llvm::Triple &Triple, const CodeGenOptions &Opts) {
+ assert(Triple.getArch() == llvm::Triple::ppc);
- // 77-108: v0-31, the 16-byte vector registers
- AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
+ switch (Opts.getStructReturnConvention()) {
+ case CodeGenOptions::SRCK_Default:
+ break;
+ case CodeGenOptions::SRCK_OnStack: // -maix-struct-return
+ return false;
+ case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return
+ return true;
+ }
- // 109: vrsave
- // 110: vscr
- // 111: spe_acc
- // 112: spefscr
- // 113: sfp
- AssignToArrayRange(Builder, Address, Four8, 109, 113);
+ if (Triple.isOSBinFormatELF() && !Triple.isOSLinux())
+ return true;
return false;
}
+bool
+PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+ llvm::Value *Address) const {
+ return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false,
+ /*IsAIX*/ false);
+}
+
// PowerPC-64
namespace {
@@ -4477,8 +4970,8 @@ public:
PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX,
bool SoftFloatABI)
- : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX,
- SoftFloatABI)) {}
+ : TargetCodeGenInfo(std::make_unique<PPC64_SVR4_ABIInfo>(
+ CGT, Kind, HasQPX, SoftFloatABI)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
@@ -4513,7 +5006,7 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
Ty = EnumTy->getDecl()->getIntegerType();
// Promotable integer types are required to be promoted by the ABI.
- if (Ty->isPromotableIntegerType())
+ if (isPromotableIntegerTypeForABI(Ty))
return true;
// In addition to the usual promotable integer types, we also need to
@@ -4527,6 +5020,10 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
break;
}
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() < 64)
+ return true;
+
return false;
}
@@ -4744,6 +5241,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
}
}
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 128)
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
+
if (isAggregateTypeForABI(Ty)) {
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
@@ -4816,6 +5317,10 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
}
}
+ if (const auto *EIT = RetTy->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 128)
+ return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
+
if (isAggregateTypeForABI(RetTy)) {
// ELFv2 homogeneous aggregates are returned as array types.
const Type *Base = nullptr;
@@ -4901,66 +5406,19 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
TypeInfo, SlotSize, /*AllowHigher*/ true);
}
-static bool
-PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) {
- // This is calculated from the LLVM and GCC tables and verified
- // against gcc output. AFAIK all ABIs use the same encoding.
-
- CodeGen::CGBuilderTy &Builder = CGF.Builder;
-
- llvm::IntegerType *i8 = CGF.Int8Ty;
- llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
- llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
- llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
-
- // 0-31: r0-31, the 8-byte general-purpose registers
- AssignToArrayRange(Builder, Address, Eight8, 0, 31);
-
- // 32-63: fp0-31, the 8-byte floating-point registers
- AssignToArrayRange(Builder, Address, Eight8, 32, 63);
-
- // 64-67 are various 8-byte special-purpose registers:
- // 64: mq
- // 65: lr
- // 66: ctr
- // 67: ap
- AssignToArrayRange(Builder, Address, Eight8, 64, 67);
-
- // 68-76 are various 4-byte special-purpose registers:
- // 68-75 cr0-7
- // 76: xer
- AssignToArrayRange(Builder, Address, Four8, 68, 76);
-
- // 77-108: v0-31, the 16-byte vector registers
- AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
-
- // 109: vrsave
- // 110: vscr
- // 111: spe_acc
- // 112: spefscr
- // 113: sfp
- // 114: tfhar
- // 115: tfiar
- // 116: texasr
- AssignToArrayRange(Builder, Address, Eight8, 109, 116);
-
- return false;
-}
-
bool
PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable(
CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
-
- return PPC64_initDwarfEHRegSizeTable(CGF, Address);
+ return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true,
+ /*IsAIX*/ false);
}
bool
PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
-
- return PPC64_initDwarfEHRegSizeTable(CGF, Address);
+ return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true,
+ /*IsAIX*/ false);
}
//===----------------------------------------------------------------------===//
@@ -5031,12 +5489,16 @@ private:
bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
unsigned elts) const override;
+
+ bool allowBFloatArgsAndRet() const override {
+ return getTarget().hasBFloat16Type();
+ }
};
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
- : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
+ : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
@@ -5054,9 +5516,11 @@ public:
if (!FD)
return;
- CodeGenOptions::SignReturnAddressScope Scope = CGM.getCodeGenOpts().getSignReturnAddress();
- CodeGenOptions::SignReturnAddressKeyValue Key = CGM.getCodeGenOpts().getSignReturnAddressKey();
- bool BranchTargetEnforcement = CGM.getCodeGenOpts().BranchTargetEnforcement;
+ LangOptions::SignReturnAddressScopeKind Scope =
+ CGM.getLangOpts().getSignReturnAddressScope();
+ LangOptions::SignReturnAddressKeyKind Key =
+ CGM.getLangOpts().getSignReturnAddressKey();
+ bool BranchTargetEnforcement = CGM.getLangOpts().BranchTargetEnforcement;
if (const auto *TA = FD->getAttr<TargetAttr>()) {
ParsedTargetAttr Attr = TA->parse();
if (!Attr.BranchProtection.empty()) {
@@ -5072,14 +5536,14 @@ public:
}
auto *Fn = cast<llvm::Function>(GV);
- if (Scope != CodeGenOptions::SignReturnAddressScope::None) {
+ if (Scope != LangOptions::SignReturnAddressScopeKind::None) {
Fn->addFnAttr("sign-return-address",
- Scope == CodeGenOptions::SignReturnAddressScope::All
+ Scope == LangOptions::SignReturnAddressScopeKind::All
? "all"
: "non-leaf");
Fn->addFnAttr("sign-return-address-key",
- Key == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ Key == LangOptions::SignReturnAddressKeyKind::AKey
? "a_key"
: "b_key");
}
@@ -5133,13 +5597,13 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64) {
- llvm::Type *ResType =
- llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
+ auto *ResType =
+ llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
return ABIArgInfo::getDirect(ResType);
}
if (Size == 128) {
- llvm::Type *ResType =
- llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
+ auto *ResType =
+ llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
return ABIArgInfo::getDirect(ResType);
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
@@ -5150,7 +5614,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() && isDarwinPCS()
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 128)
+ return getNaturalAlignIndirect(Ty);
+
+ return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
@@ -5227,7 +5695,11 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() && isDarwinPCS()
+ if (const auto *EIT = RetTy->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 128)
+ return getNaturalAlignIndirect(RetTy);
+
+ return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
@@ -5626,11 +6098,14 @@ public:
private:
ABIKind Kind;
+ bool IsFloatABISoftFP;
public:
ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
: SwiftABIInfo(CGT), Kind(_Kind) {
setCCs();
+ IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" ||
+ CGT.getCodeGenOpts().FloatABI == ""; // default
}
bool isEABI() const {
@@ -5661,6 +6136,10 @@ public:
ABIKind getABIKind() const { return Kind; }
+ bool allowBFloatArgsAndRet() const override {
+ return !IsFloatABISoftFP && getTarget().hasBFloat16Type();
+ }
+
private:
ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic,
unsigned functionCallConv) const;
@@ -5701,7 +6180,7 @@ private:
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
public:
ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
- :TargetCodeGenInfo(new ARMABIInfo(CGT, K)) {}
+ : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {}
const ARMABIInfo &getABIInfo() const {
return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
@@ -5856,7 +6335,7 @@ ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const {
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64 || Size == 128) {
- llvm::Type *ResType = llvm::VectorType::get(
+ auto *ResType = llvm::FixedVectorType::get(
llvm::Type::getInt32Ty(getVMContext()), Size / 32);
return ABIArgInfo::getDirect(ResType);
}
@@ -5872,7 +6351,7 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
// FP16 vectors should be converted to integer vectors
if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) {
uint64_t Size = getContext().getTypeSize(VT);
- llvm::Type *NewVecTy = llvm::VectorType::get(
+ auto *NewVecTy = llvm::FixedVectorType::get(
llvm::Type::getInt32Ty(getVMContext()), Size / 32);
llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members);
return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
@@ -5900,25 +6379,18 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
if (isIllegalVectorType(Ty))
return coerceIllegalVector(Ty);
- // _Float16 and __fp16 get passed as if it were an int or float, but with
- // the top 16 bits unspecified. This is not done for OpenCL as it handles the
- // half type natively, and does not need to interwork with AAPCS code.
- if ((Ty->isFloat16Type() || Ty->isHalfType()) &&
- !getContext().getLangOpts().NativeHalfArgsAndReturns) {
- llvm::Type *ResType = IsAAPCS_VFP ?
- llvm::Type::getFloatTy(getVMContext()) :
- llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(ResType);
- }
-
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
Ty = EnumTy->getDecl()->getIntegerType();
}
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 64)
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
+
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
@@ -6100,31 +6572,27 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
// Large vector types should be returned via memory.
if (getContext().getTypeSize(RetTy) > 128)
return getNaturalAlignIndirect(RetTy);
- // FP16 vectors should be converted to integer vectors
- if (!getTarget().hasLegalHalfType() &&
+ // TODO: FP16/BF16 vectors should be converted to integer vectors
+ // This check is similar to isIllegalVectorType - refactor?
+ if ((!getTarget().hasLegalHalfType() &&
(VT->getElementType()->isFloat16Type() ||
- VT->getElementType()->isHalfType()))
+ VT->getElementType()->isHalfType())) ||
+ (IsFloatABISoftFP &&
+ VT->getElementType()->isBFloat16Type()))
return coerceIllegalVector(RetTy);
}
- // _Float16 and __fp16 get returned as if it were an int or float, but with
- // the top 16 bits unspecified. This is not done for OpenCL as it handles the
- // half type natively, and does not need to interwork with AAPCS code.
- if ((RetTy->isFloat16Type() || RetTy->isHalfType()) &&
- !getContext().getLangOpts().NativeHalfArgsAndReturns) {
- llvm::Type *ResType = IsAAPCS_VFP ?
- llvm::Type::getFloatTy(getVMContext()) :
- llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(ResType);
- }
-
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect();
+ if (const auto *EIT = RetTy->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 64)
+ return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
+
+ return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect();
}
// Are we following APCS?
@@ -6200,12 +6668,17 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
/// isIllegalVector - check whether Ty is an illegal vector type.
bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
if (const VectorType *VT = Ty->getAs<VectorType> ()) {
- // On targets that don't support FP16, FP16 is expanded into float, and we
- // don't want the ABI to depend on whether or not FP16 is supported in
- // hardware. Thus return false to coerce FP16 vectors into integer vectors.
- if (!getTarget().hasLegalHalfType() &&
+ // On targets that don't support half, fp16 or bfloat, they are expanded
+ // into float, and we don't want the ABI to depend on whether or not they
+ // are supported in hardware. Thus return false to coerce vectors of these
+ // types into integer vectors.
+ // We do not depend on hasLegalHalfType for bfloat as it is a
+ // separate IR type.
+ if ((!getTarget().hasLegalHalfType() &&
(VT->getElementType()->isFloat16Type() ||
- VT->getElementType()->isHalfType()))
+ VT->getElementType()->isHalfType())) ||
+ (IsFloatABISoftFP &&
+ VT->getElementType()->isBFloat16Type()))
return true;
if (isAndroid()) {
// Android shipped using Clang 3.1, which supported a slightly different
@@ -6257,6 +6730,7 @@ bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const {
} else {
if (const VectorType *VT = Ty->getAs<VectorType>())
return (VT->getElementType()->isFloat16Type() ||
+ VT->getElementType()->isBFloat16Type() ||
VT->getElementType()->isHalfType());
return false;
}
@@ -6362,9 +6836,14 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
namespace {
+class NVPTXTargetCodeGenInfo;
+
class NVPTXABIInfo : public ABIInfo {
+ NVPTXTargetCodeGenInfo &CGInfo;
+
public:
- NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+ NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
+ : ABIInfo(CGT), CGInfo(Info) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -6372,36 +6851,87 @@ public:
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
+ bool isUnsupportedType(QualType T) const;
+ ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
};
class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
public:
NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
bool shouldEmitStaticExternCAliases() const override;
+ llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
+ // On the device side, surface reference is represented as an object handle
+ // in 64-bit integer.
+ return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
+ }
+
+ llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
+ // On the device side, texture reference is represented as an object handle
+ // in 64-bit integer.
+ return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
+ }
+
+ bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+ LValue Src) const override {
+ emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
+ return true;
+ }
+
+ bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+ LValue Src) const override {
+ emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
+ return true;
+ }
+
private:
- // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
+ // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
- static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand);
+ static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
+ int Operand);
+
+ static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+ LValue Src) {
+ llvm::Value *Handle = nullptr;
+ llvm::Constant *C =
+ llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
+ // Lookup `addrspacecast` through the constant pointer if any.
+ if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
+ C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
+ if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
+ // Load the handle from the specific global variable using
+ // `nvvm.texsurf.handle.internal` intrinsic.
+ Handle = CGF.EmitRuntimeCall(
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
+ {GV->getType()}),
+ {GV}, "texsurf_handle");
+ } else
+ Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
+ CGF.EmitStoreOfScalar(Handle, Dst);
+ }
};
/// Checks if the type is unsupported directly by the current target.
-static bool isUnsupportedType(ASTContext &Context, QualType T) {
+bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
+ ASTContext &Context = getContext();
if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
return true;
if (!Context.getTargetInfo().hasFloat128Type() &&
(T->isFloat128Type() ||
(T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
return true;
+ if (const auto *EIT = T->getAs<ExtIntType>())
+ return EIT->getNumBits() >
+ (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
- Context.getTypeSize(T) > 64)
+ Context.getTypeSize(T) > 64U)
return true;
if (const auto *AT = T->getAsArrayTypeUnsafe())
- return isUnsupportedType(Context, AT->getElementType());
+ return isUnsupportedType(AT->getElementType());
const auto *RT = T->getAs<RecordType>();
if (!RT)
return false;
@@ -6410,24 +6940,23 @@ static bool isUnsupportedType(ASTContext &Context, QualType T) {
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const CXXBaseSpecifier &I : CXXRD->bases())
- if (isUnsupportedType(Context, I.getType()))
+ if (isUnsupportedType(I.getType()))
return true;
for (const FieldDecl *I : RD->fields())
- if (isUnsupportedType(Context, I->getType()))
+ if (isUnsupportedType(I->getType()))
return true;
return false;
}
/// Coerce the given type into an array with maximum allowed size of elements.
-static ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, ASTContext &Context,
- llvm::LLVMContext &LLVMContext,
- unsigned MaxSize) {
+ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
+ unsigned MaxSize) const {
// Alignment and Size are measured in bits.
- const uint64_t Size = Context.getTypeSize(Ty);
- const uint64_t Alignment = Context.getTypeAlign(Ty);
+ const uint64_t Size = getContext().getTypeSize(Ty);
+ const uint64_t Alignment = getContext().getTypeAlign(Ty);
const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
- llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Div);
+ llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
const uint64_t NumElements = (Size + Div - 1) / Div;
return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
}
@@ -6437,9 +6966,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
return ABIArgInfo::getIgnore();
if (getContext().getLangOpts().OpenMP &&
- getContext().getLangOpts().OpenMPIsDevice &&
- isUnsupportedType(getContext(), RetTy))
- return coerceToIntArrayWithLimit(RetTy, getContext(), getVMContext(), 64);
+ getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy))
+ return coerceToIntArrayWithLimit(RetTy, 64);
// note: this is different from default ABI
if (!RetTy->isScalarType())
@@ -6449,8 +6977,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect());
+ return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
@@ -6459,11 +6987,29 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
Ty = EnumTy->getDecl()->getIntegerType();
// Return aggregates type as indirect by value
- if (isAggregateTypeForABI(Ty))
+ if (isAggregateTypeForABI(Ty)) {
+ // Under CUDA device compilation, tex/surf builtin types are replaced with
+ // object types and passed directly.
+ if (getContext().getLangOpts().CUDAIsDevice) {
+ if (Ty->isCUDADeviceBuiltinSurfaceType())
+ return ABIArgInfo::getDirect(
+ CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
+ if (Ty->isCUDADeviceBuiltinTextureType())
+ return ABIArgInfo::getDirect(
+ CGInfo.getCUDADeviceBuiltinTextureDeviceType());
+ }
return getNaturalAlignIndirect(Ty, /* byval */ true);
+ }
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ if (const auto *EIT = Ty->getAs<ExtIntType>()) {
+ if ((EIT->getNumBits() > 128) ||
+ (!getContext().getTargetInfo().hasInt128Type() &&
+ EIT->getNumBits() > 64))
+ return getNaturalAlignIndirect(Ty, /* byval */ true);
+ }
+
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
}
void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -6488,6 +7034,17 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())
return;
+ const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
+ if (VD) {
+ if (M.getLangOpts().CUDA) {
+ if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
+ addNVVMMetadata(GV, "surface", 1);
+ else if (VD->getType()->isCUDADeviceBuiltinTextureType())
+ addNVVMMetadata(GV, "texture", 1);
+ return;
+ }
+ }
+
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -6536,16 +7093,16 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
- int Operand) {
- llvm::Module *M = F->getParent();
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
+ StringRef Name, int Operand) {
+ llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
llvm::Metadata *MDVals[] = {
- llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name),
+ llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
// Append metadata to nvvm.annotations
@@ -6565,12 +7122,13 @@ namespace {
class SystemZABIInfo : public SwiftABIInfo {
bool HasVector;
+ bool IsSoftFloatABI;
public:
- SystemZABIInfo(CodeGenTypes &CGT, bool HV)
- : SwiftABIInfo(CGT), HasVector(HV) {}
+ SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF)
+ : SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {}
- bool isPromotableIntegerType(QualType Ty) const;
+ bool isPromotableIntegerTypeForABI(QualType Ty) const;
bool isCompoundType(QualType Ty) const;
bool isVectorArgumentType(QualType Ty) const;
bool isFPArgumentType(QualType Ty) const;
@@ -6600,21 +7158,26 @@ public:
class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
public:
- SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector)
- : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector)) {}
+ SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI)
+ : TargetCodeGenInfo(
+ std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {}
};
}
-bool SystemZABIInfo::isPromotableIntegerType(QualType Ty) const {
+bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Promotable integer types are required to be promoted by the ABI.
- if (Ty->isPromotableIntegerType())
+ if (ABIInfo::isPromotableIntegerTypeForABI(Ty))
return true;
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() < 64)
+ return true;
+
// 32-bit values must also be promoted.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
@@ -6640,6 +7203,9 @@ bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
}
bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
+ if (IsSoftFloatABI)
+ return false;
+
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
case BuiltinType::Float:
@@ -6653,7 +7219,9 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
}
QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
- if (const RecordType *RT = Ty->getAsStructureType()) {
+ const RecordType *RT = Ty->getAs<RecordType>();
+
+ if (RT && RT->isStructureOrClassType()) {
const RecordDecl *RD = RT->getDecl();
QualType Found;
@@ -6679,6 +7247,10 @@ QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
if (getContext().getLangOpts().CPlusPlus &&
FD->isZeroLengthBitField(getContext()))
continue;
+ // Like isSingleElementStruct(), ignore C++20 empty data members.
+ if (FD->hasAttr<NoUniqueAddressAttr>() &&
+ isEmptyRecord(getContext(), FD->getType(), true))
+ continue;
// Unlike isSingleElementStruct(), arrays do not count.
// Nested structures still do though.
@@ -6725,7 +7297,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
} else {
if (AI.getCoerceToType())
ArgTy = AI.getCoerceToType();
- InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy();
+ InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
IsVector = ArgTy->isVectorTy();
UnpaddedSize = TyInfo.first;
DirectAlign = TyInfo.second;
@@ -6858,8 +7430,8 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
return ABIArgInfo::getDirect();
if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
return getNaturalAlignIndirect(RetTy);
- return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect());
+ return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
}
ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
@@ -6868,7 +7440,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// Integers and enums are extended to full register width.
- if (isPromotableIntegerType(Ty))
+ if (isPromotableIntegerTypeForABI(Ty))
return ABIArgInfo::getExtend(Ty);
// Handle vector types and vector-like structure types. Note that
@@ -6918,10 +7490,49 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
namespace {
+class MSP430ABIInfo : public DefaultABIInfo {
+ static ABIArgInfo complexArgInfo() {
+ ABIArgInfo Info = ABIArgInfo::getDirect();
+ Info.setCanBeFlattened(false);
+ return Info;
+ }
+
+public:
+ MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+ ABIArgInfo classifyReturnType(QualType RetTy) const {
+ if (RetTy->isAnyComplexType())
+ return complexArgInfo();
+
+ return DefaultABIInfo::classifyReturnType(RetTy);
+ }
+
+ ABIArgInfo classifyArgumentType(QualType RetTy) const {
+ if (RetTy->isAnyComplexType())
+ return complexArgInfo();
+
+ return DefaultABIInfo::classifyArgumentType(RetTy);
+ }
+
+ // Just copy the original implementations because
+ // DefaultABIInfo::classify{Return,Argument}Type() are not virtual
+ void computeInfo(CGFunctionInfo &FI) const override {
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
+ }
+
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override {
+ return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
+ }
+};
+
class MSP430TargetCodeGenInfo : public TargetCodeGenInfo {
public:
MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
};
@@ -6980,8 +7591,8 @@ class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
unsigned SizeOfUnwindException;
public:
MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32)
- : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)),
- SizeOfUnwindException(IsO32 ? 24 : 32) {}
+ : TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)),
+ SizeOfUnwindException(IsO32 ? 24 : 32) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 29;
@@ -7163,6 +7774,13 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
+ // Make sure we pass indirectly things that are too large.
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 128 ||
+ (EIT->getNumBits() > 64 &&
+ !getContext().getTargetInfo().hasInt128Type()))
+ return getNaturalAlignIndirect(Ty);
+
// All integral types are promoted to the GPR width.
if (Ty->isIntegralOrEnumerationType())
return extendType(Ty);
@@ -7247,7 +7865,14 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- if (RetTy->isPromotableIntegerType())
+ // Make sure we pass indirectly things that are too large.
+ if (const auto *EIT = RetTy->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 128 ||
+ (EIT->getNumBits() > 64 &&
+ !getContext().getTargetInfo().hasInt128Type()))
+ return getNaturalAlignIndirect(RetTy);
+
+ if (isPromotableIntegerTypeForABI(RetTy))
return ABIArgInfo::getExtend(RetTy);
if ((RetTy->isUnsignedIntegerOrEnumerationType() ||
@@ -7366,7 +7991,7 @@ namespace {
class AVRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AVRTargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
+ : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
@@ -7455,50 +8080,97 @@ void TCETargetCodeGenInfo::setTargetAttributes(
namespace {
-class HexagonABIInfo : public ABIInfo {
-
-
+class HexagonABIInfo : public DefaultABIInfo {
public:
- HexagonABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+ HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
private:
-
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
+ ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
+ Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr,
+ QualType Ty) const;
+ Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr,
+ QualType Ty) const;
+ Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr,
+ QualType Ty) const;
};
class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
public:
HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
- :TargetCodeGenInfo(new HexagonABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 29;
}
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &GCM) const override {
+ if (GV->isDeclaration())
+ return;
+ const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD)
+ return;
+ }
};
-}
+} // namespace
void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ unsigned RegsLeft = 6;
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
- I.info = classifyArgumentType(I.type);
+ I.info = classifyArgumentType(I.type, &RegsLeft);
}
-ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
+static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) {
+ assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits"
+ " through registers");
+
+ if (*RegsLeft == 0)
+ return false;
+
+ if (Size <= 32) {
+ (*RegsLeft)--;
+ return true;
+ }
+
+ if (2 <= (*RegsLeft & (~1U))) {
+ *RegsLeft = (*RegsLeft & (~1U)) - 2;
+ return true;
+ }
+
+ // Next available register was r5 but candidate was greater than 32-bits so it
+ // has to go on the stack. However we still consume r5
+ if (*RegsLeft == 1)
+ *RegsLeft = 0;
+
+ return false;
+}
+
+ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty,
+ unsigned *RegsLeft) const {
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
- : ABIArgInfo::getDirect());
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64)
+ HexagonAdjustRegsLeft(Size, RegsLeft);
+
+ if (Size > 64 && Ty->isExtIntType())
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
+
+ return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect();
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -7509,63 +8181,304 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getIgnore();
uint64_t Size = getContext().getTypeSize(Ty);
+ unsigned Align = getContext().getTypeAlign(Ty);
+
if (Size > 64)
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
+
+ if (HexagonAdjustRegsLeft(Size, RegsLeft))
+ Align = Size <= 32 ? 32 : 64;
+ if (Size <= Align) {
// Pass in the smallest viable integer type.
- else if (Size > 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext()));
- else if (Size > 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
- else if (Size > 8)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
- else
- return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
+ if (!llvm::isPowerOf2_64(Size))
+ Size = llvm::NextPowerOf2(Size);
+ return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
+ }
+ return DefaultABIInfo::classifyArgumentType(Ty);
}
ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
- // Large vector types should be returned via memory.
- if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64)
- return getNaturalAlignIndirect(RetTy);
+ const TargetInfo &T = CGT.getTarget();
+ uint64_t Size = getContext().getTypeSize(RetTy);
+
+ if (RetTy->getAs<VectorType>()) {
+ // HVX vectors are returned in vector registers or register pairs.
+ if (T.hasFeature("hvx")) {
+ assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b"));
+ uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8;
+ if (Size == VecSize || Size == 2*VecSize)
+ return ABIArgInfo::getDirectInReg();
+ }
+ // Large vector types should be returned via memory.
+ if (Size > 64)
+ return getNaturalAlignIndirect(RetTy);
+ }
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
- return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
- : ABIArgInfo::getDirect());
+ if (Size > 64 && RetTy->isExtIntType())
+ return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
+
+ return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect();
}
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
- // Aggregates <= 8 bytes are returned in r0; other aggregates
+ // Aggregates <= 8 bytes are returned in registers, other aggregates
// are returned indirectly.
- uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 64) {
// Return in the smallest viable integer type.
- if (Size <= 8)
- return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
- if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
- if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
- return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext()));
+ if (!llvm::isPowerOf2_64(Size))
+ Size = llvm::NextPowerOf2(Size);
+ return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
}
-
return getNaturalAlignIndirect(RetTy, /*ByVal=*/true);
}
+Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF,
+ Address VAListAddr,
+ QualType Ty) const {
+ // Load the overflow area pointer.
+ Address __overflow_area_pointer_p =
+ CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
+ llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
+ __overflow_area_pointer_p, "__overflow_area_pointer");
+
+ uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
+ if (Align > 4) {
+ // Alignment should be a power of 2.
+ assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!");
+
+ // overflow_arg_area = (overflow_arg_area + align - 1) & -align;
+ llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1);
+
+ // Add offset to the current pointer to access the argument.
+ __overflow_area_pointer =
+ CGF.Builder.CreateGEP(__overflow_area_pointer, Offset);
+ llvm::Value *AsInt =
+ CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);
+
+ // Create a mask which should be "AND"ed
+ // with (overflow_arg_area + align - 1)
+ llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align);
+ __overflow_area_pointer = CGF.Builder.CreateIntToPtr(
+ CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(),
+ "__overflow_area_pointer.align");
+ }
+
+ // Get the type of the argument from memory and bitcast
+ // overflow area pointer to the argument type.
+ llvm::Type *PTy = CGF.ConvertTypeForMem(Ty);
+ Address AddrTyped = CGF.Builder.CreateBitCast(
+ Address(__overflow_area_pointer, CharUnits::fromQuantity(Align)),
+ llvm::PointerType::getUnqual(PTy));
+
+ // Round up to the minimum stack alignment for varargs which is 4 bytes.
+ uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
+
+ __overflow_area_pointer = CGF.Builder.CreateGEP(
+ __overflow_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
+ "__overflow_area_pointer.next");
+ CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p);
+
+ return AddrTyped;
+}
+
+Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF,
+ Address VAListAddr,
+ QualType Ty) const {
+ // FIXME: Need to handle alignment
+ llvm::Type *BP = CGF.Int8PtrTy;
+ llvm::Type *BPP = CGF.Int8PtrPtrTy;
+ CGBuilderTy &Builder = CGF.Builder;
+ Address VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
+ llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+ // Handle address alignment for type alignment > 32 bits
+ uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+ if (TyAlign > 4) {
+ assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!");
+ llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
+ AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
+ AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
+ Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
+ }
+ llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+ Address AddrTyped = Builder.CreateBitCast(
+ Address(Addr, CharUnits::fromQuantity(TyAlign)), PTy);
+
+ uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
+ llvm::Value *NextAddr = Builder.CreateGEP(
+ Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
+ Builder.CreateStore(NextAddr, VAListAddrAsBPP);
+
+ return AddrTyped;
+}
+
+Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF,
+ Address VAListAddr,
+ QualType Ty) const {
+ int ArgSize = CGF.getContext().getTypeSize(Ty) / 8;
+
+ if (ArgSize > 8)
+ return EmitVAArgFromMemory(CGF, VAListAddr, Ty);
+
+ // Here we have check if the argument is in register area or
+ // in overflow area.
+ // If the saved register area pointer + argsize rounded up to alignment >
+ // saved register area end pointer, argument is in overflow area.
+ unsigned RegsLeft = 6;
+ Ty = CGF.getContext().getCanonicalType(Ty);
+ (void)classifyArgumentType(Ty, &RegsLeft);
+
+ llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
+ llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
+ llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
+ llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
+
+ // Get rounded size of the argument.GCC does not allow vararg of
+ // size < 4 bytes. We follow the same logic here.
+ ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
+ int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
+
+ // Argument may be in saved register area
+ CGF.EmitBlock(MaybeRegBlock);
+
+ // Load the current saved register area pointer.
+ Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP(
+ VAListAddr, 0, "__current_saved_reg_area_pointer_p");
+ llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad(
+ __current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer");
+
+ // Load the saved register area end pointer.
+ Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP(
+ VAListAddr, 1, "__saved_reg_area_end_pointer_p");
+ llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad(
+ __saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer");
+
+ // If the size of argument is > 4 bytes, check if the stack
+ // location is aligned to 8 bytes
+ if (ArgAlign > 4) {
+
+ llvm::Value *__current_saved_reg_area_pointer_int =
+ CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer,
+ CGF.Int32Ty);
+
+ __current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd(
+ __current_saved_reg_area_pointer_int,
+ llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)),
+ "align_current_saved_reg_area_pointer");
+
+ __current_saved_reg_area_pointer_int =
+ CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int,
+ llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
+ "align_current_saved_reg_area_pointer");
+
+ __current_saved_reg_area_pointer =
+ CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int,
+ __current_saved_reg_area_pointer->getType(),
+ "align_current_saved_reg_area_pointer");
+ }
+
+ llvm::Value *__new_saved_reg_area_pointer =
+ CGF.Builder.CreateGEP(__current_saved_reg_area_pointer,
+ llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
+ "__new_saved_reg_area_pointer");
+
+ llvm::Value *UsingStack = 0;
+ UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer,
+ __saved_reg_area_end_pointer);
+
+ CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock);
+
+ // Argument in saved register area
+ // Implement the block where argument is in register saved area
+ CGF.EmitBlock(InRegBlock);
+
+ llvm::Type *PTy = CGF.ConvertType(Ty);
+ llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast(
+ __current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy));
+
+ CGF.Builder.CreateStore(__new_saved_reg_area_pointer,
+ __current_saved_reg_area_pointer_p);
+
+ CGF.EmitBranch(ContBlock);
+
+ // Argument in overflow area
+ // Implement the block where the argument is in overflow area.
+ CGF.EmitBlock(OnStackBlock);
+
+ // Load the overflow area pointer
+ Address __overflow_area_pointer_p =
+ CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
+ llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
+ __overflow_area_pointer_p, "__overflow_area_pointer");
+
+ // Align the overflow area pointer according to the alignment of the argument
+ if (ArgAlign > 4) {
+ llvm::Value *__overflow_area_pointer_int =
+ CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);
+
+ __overflow_area_pointer_int =
+ CGF.Builder.CreateAdd(__overflow_area_pointer_int,
+ llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1),
+ "align_overflow_area_pointer");
+
+ __overflow_area_pointer_int =
+ CGF.Builder.CreateAnd(__overflow_area_pointer_int,
+ llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
+ "align_overflow_area_pointer");
+
+ __overflow_area_pointer = CGF.Builder.CreateIntToPtr(
+ __overflow_area_pointer_int, __overflow_area_pointer->getType(),
+ "align_overflow_area_pointer");
+ }
+
+ // Get the pointer for next argument in overflow area and store it
+ // to overflow area pointer.
+ llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP(
+ __overflow_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
+ "__overflow_area_pointer.next");
+
+ CGF.Builder.CreateStore(__new_overflow_area_pointer,
+ __overflow_area_pointer_p);
+
+ CGF.Builder.CreateStore(__new_overflow_area_pointer,
+ __current_saved_reg_area_pointer_p);
+
+ // Bitcast the overflow area pointer to the type of argument.
+ llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty);
+ llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast(
+ __overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy));
+
+ CGF.EmitBranch(ContBlock);
+
+ // Get the correct pointer to load the variable argument
+ // Implement the ContBlock
+ CGF.EmitBlock(ContBlock);
+
+ llvm::Type *MemPTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+ llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr");
+ ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock);
+ ArgAddr->addIncoming(__overflow_area_p, OnStackBlock);
+
+ return Address(ArgAddr, CharUnits::fromQuantity(ArgAlign));
+}
+
Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- // FIXME: Someone needs to audit that this handle alignment correctly.
- return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
- getContext().getTypeInfoInChars(Ty),
- CharUnits::fromQuantity(4),
- /*AllowHigherAlign*/ true);
+
+ if (getTarget().getTriple().isMusl())
+ return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty);
+
+ return EmitVAArgForHexagon(CGF, VAListAddr, Ty);
}
//===----------------------------------------------------------------------===//
@@ -7676,7 +8589,13 @@ ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
Ty = EnumTy->getDecl()->getIntegerType();
bool InReg = shouldUseInReg(Ty, State);
- if (Ty->isPromotableIntegerType()) {
+
+ // Don't pass >64 bit integers in registers.
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 64)
+ return getIndirectResult(Ty, /*ByVal=*/true, State);
+
+ if (isPromotableIntegerTypeForABI(Ty)) {
if (InReg)
return ABIArgInfo::getDirectInReg();
return ABIArgInfo::getExtend(Ty);
@@ -7690,7 +8609,7 @@ namespace {
class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
public:
LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {}
};
}
@@ -7730,7 +8649,7 @@ private:
EltTys, (STy->getName() + ".coerce").str(), STy->isPacked());
return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked());
}
- // Arrary types.
+ // Array types.
if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) {
auto T = ATy->getElementType();
auto NT = coerceKernelArgumentType(T, FromAS, ToAS);
@@ -7958,7 +8877,7 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
@@ -7994,23 +8913,13 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
(isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
(isa<VarDecl>(D) &&
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
- D->hasAttr<HIPPinnedShadowAttr>()));
-}
-
-static bool requiresAMDGPUDefaultVisibility(const Decl *D,
- llvm::GlobalValue *GV) {
- if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
- return false;
-
- return isa<VarDecl>(D) && D->hasAttr<HIPPinnedShadowAttr>();
+ cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
- if (requiresAMDGPUDefaultVisibility(D, GV)) {
- GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
- GV->setDSOLocal(false);
- } else if (requiresAMDGPUProtectedVisibility(D, GV)) {
+ if (requiresAMDGPUProtectedVisibility(D, GV)) {
GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
GV->setDSOLocal(true);
}
@@ -8035,6 +8944,10 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
+ if (IsHIPKernel)
+ F->addFnAttr("uniform-work-group-size", "true");
+
+
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
if (ReqdWGS || FlatWGS) {
unsigned Min = 0;
@@ -8059,9 +8972,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
assert(Max == 0 && "Max must be zero");
} else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to a value specified by
- // --gpu-max-threads-per-block=n or its default value.
+ // --gpu-max-threads-per-block=n or its default value for HIP.
+ const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
+ const unsigned DefaultMaxWorkGroupSize =
+ IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
+ : M.getLangOpts().GPUMaxThreadsPerBlock;
std::string AttrVal =
- std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
+ std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
}
@@ -8223,7 +9140,7 @@ namespace {
class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
public:
SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {}
};
} // end anonymous namespace
@@ -8392,6 +9309,10 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
if (Size < 64 && Ty->isIntegerType())
return ABIArgInfo::getExtend(Ty);
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() < 64)
+ return ABIArgInfo::getExtend(Ty);
+
// Other non-aggregates go in registers.
if (!isAggregateTypeForABI(Ty))
return ABIArgInfo::getDirect();
@@ -8485,7 +9406,7 @@ namespace {
class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo {
public:
SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 14;
@@ -8578,7 +9499,7 @@ private:
class ARCTargetCodeGenInfo : public TargetCodeGenInfo {
public:
ARCTargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new ARCABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {}
};
@@ -8641,11 +9562,15 @@ ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
ABIArgInfo::getDirect(Result, 0, nullptr, false);
}
- return Ty->isPromotableIntegerType() ?
- (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) :
- ABIArgInfo::getExtend(Ty)) :
- (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() :
- ABIArgInfo::getDirect());
+ if (const auto *EIT = Ty->getAs<ExtIntType>())
+ if (EIT->getNumBits() > 64)
+ return getIndirectByValue(Ty);
+
+ return isPromotableIntegerTypeForABI(Ty)
+ ? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty)
+ : ABIArgInfo::getExtend(Ty))
+ : (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg()
+ : ABIArgInfo::getDirect());
}
ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const {
@@ -8769,11 +9694,15 @@ public:
class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
mutable TypeStringCache TSC;
+ void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+ const CodeGen::CodeGenModule &M) const;
+
public:
XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
- :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {}
- void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ : TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {}
+ void emitTargetMetadata(CodeGen::CodeGenModule &CGM,
+ const llvm::MapVector<GlobalDecl, StringRef>
+ &MangledDeclNames) const override;
};
} // End anonymous namespace.
@@ -8934,11 +9863,13 @@ StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
/// The output is tested by test/CodeGen/xcore-stringtype.c.
///
static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
- CodeGen::CodeGenModule &CGM, TypeStringCache &TSC);
+ const CodeGen::CodeGenModule &CGM,
+ TypeStringCache &TSC);
/// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
-void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
+void XCoreTargetCodeGenInfo::emitTargetMD(
+ const Decl *D, llvm::GlobalValue *GV,
+ const CodeGen::CodeGenModule &CGM) const {
SmallStringEnc Enc;
if (getTypeString(Enc, D, CGM, TSC)) {
llvm::LLVMContext &Ctx = CGM.getModule().getContext();
@@ -8950,6 +9881,21 @@ void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
}
}
+void XCoreTargetCodeGenInfo::emitTargetMetadata(
+ CodeGen::CodeGenModule &CGM,
+ const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {
+ // Warning, new MangledDeclNames may be appended within this loop.
+ // We rely on MapVector insertions adding new elements to the end
+ // of the container.
+ for (unsigned I = 0; I != MangledDeclNames.size(); ++I) {
+ auto Val = *(MangledDeclNames.begin() + I);
+ llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second);
+ if (GV) {
+ const Decl *D = Val.first.getDecl()->getMostRecentDecl();
+ emitTargetMD(D, GV, CGM);
+ }
+ }
+}
//===----------------------------------------------------------------------===//
// SPIR ABI Implementation
//===----------------------------------------------------------------------===//
@@ -8958,7 +9904,7 @@ namespace {
class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+ : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
unsigned getOpenCLKernelCallingConv() const override;
};
@@ -9283,7 +10229,8 @@ static bool appendType(SmallStringEnc &Enc, QualType QType,
}
static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
- CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) {
+ const CodeGen::CodeGenModule &CGM,
+ TypeStringCache &TSC) {
if (!D)
return false;
@@ -9613,7 +10560,8 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
uint64_t Size = getContext().getTypeSize(Ty);
// Pass floating point values via FPRs if possible.
- if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) {
+ if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
+ FLen >= Size && ArgFPRsLeft) {
ArgFPRsLeft--;
return ABIArgInfo::getDirect();
}
@@ -9676,6 +10624,15 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
return extendType(Ty);
}
+ if (const auto *EIT = Ty->getAs<ExtIntType>()) {
+ if (EIT->getNumBits() < XLen && !MustUseStack)
+ return extendType(Ty);
+ if (EIT->getNumBits() > 128 ||
+ (!getContext().getTargetInfo().hasInt128Type() &&
+ EIT->getNumBits() > 64))
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+ }
+
return ABIArgInfo::getDirect();
}
@@ -9747,7 +10704,7 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
public:
RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
unsigned FLen)
- : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {}
+ : TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
@@ -9773,6 +10730,56 @@ public:
} // namespace
//===----------------------------------------------------------------------===//
+// VE ABI Implementation.
+//
+namespace {
+class VEABIInfo : public DefaultABIInfo {
+public:
+ VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+private:
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyArgumentType(QualType RetTy) const;
+ void computeInfo(CGFunctionInfo &FI) const override;
+};
+} // end anonymous namespace
+
+ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const {
+ if (Ty->isAnyComplexType()) {
+ return ABIArgInfo::getDirect();
+ }
+ return DefaultABIInfo::classifyReturnType(Ty);
+}
+
+ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const {
+ if (Ty->isAnyComplexType()) {
+ return ABIArgInfo::getDirect();
+ }
+ return DefaultABIInfo::classifyArgumentType(Ty);
+}
+
+void VEABIInfo::computeInfo(CGFunctionInfo &FI) const {
+
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ for (auto &Arg : FI.arguments())
+ Arg.info = classifyArgumentType(Arg.type);
+}
+
+namespace {
+class VETargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ VETargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {}
+ // VE ABI requires the arguments of variadic and prototype-less functions
+ // are passed in both registers and memory.
+ bool isNoProtoCallVariadic(const CallArgList &args,
+ const FunctionNoProtoType *fnType) const override {
+ return true;
+ }
+};
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
// Driver code
//===----------------------------------------------------------------------===//
@@ -9824,8 +10831,12 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
}
case llvm::Triple::wasm32:
- case llvm::Triple::wasm64:
- return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types));
+ case llvm::Triple::wasm64: {
+ WebAssemblyABIInfo::ABIKind Kind = WebAssemblyABIInfo::MVP;
+ if (getTarget().getABI() == "experimental-mv")
+ Kind = WebAssemblyABIInfo::ExperimentalMV;
+ return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind));
+ }
case llvm::Triple::arm:
case llvm::Triple::armeb:
@@ -9852,11 +10863,21 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
}
- case llvm::Triple::ppc:
+ case llvm::Triple::ppc: {
+ if (Triple.isOSAIX())
+ return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ false));
+
+ bool IsSoftFloat =
+ CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe");
+ bool RetSmallStructInRegABI =
+ PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
return SetCGInfo(
- new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft" ||
- getTarget().hasFeature("spe")));
+ new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
+ }
case llvm::Triple::ppc64:
+ if (Triple.isOSAIX())
+ return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ true));
+
if (Triple.isOSBinFormatELF()) {
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
if (getTarget().getABI() == "elfv2")
@@ -9866,8 +10887,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
IsSoftFloat));
- } else
- return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
+ }
+ return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
case llvm::Triple::ppc64le: {
assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
@@ -9900,8 +10921,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
}
case llvm::Triple::systemz: {
- bool HasVector = getTarget().getABI() == "vector";
- return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector));
+ bool SoftFloat = CodeGenOpts.FloatABI == "soft";
+ bool HasVector = !SoftFloat && getTarget().getABI() == "vector";
+ return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat));
}
case llvm::Triple::tce:
@@ -9959,6 +10981,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::spir:
case llvm::Triple::spir64:
return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
+ case llvm::Triple::ve:
+ return SetCGInfo(new VETargetCodeGenInfo(Types));
}
}
@@ -10042,9 +11066,9 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
auto IP = CGF.Builder.saveIP();
auto *BB = llvm::BasicBlock::Create(C, "entry", F);
Builder.SetInsertPoint(BB);
- unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy);
+ const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
- BlockPtr->setAlignment(llvm::MaybeAlign(BlockAlign));
+ BlockPtr->setAlignment(BlockAlign);
Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
llvm::SmallVector<llvm::Value *, 2> Args;
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index e1e90e73cb58..1152cabce4a0 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -43,11 +43,10 @@ class CGFunctionInfo;
/// codegeneration issues, like target-specific attributes, builtins and so
/// on.
class TargetCodeGenInfo {
- ABIInfo *Info;
+ std::unique_ptr<ABIInfo> Info = nullptr;
public:
- // WARNING: Acquires the ownership of ABIInfo.
- TargetCodeGenInfo(ABIInfo *info = nullptr) : Info(info) {}
+ TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info) : Info(std::move(Info)) {}
virtual ~TargetCodeGenInfo();
/// getABIInfo() - Returns ABI info helper for the target.
@@ -58,10 +57,18 @@ public:
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const {}
- /// emitTargetMD - Provides a convenient hook to handle extra
- /// target-specific metadata for the given global.
- virtual void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const {}
+ /// emitTargetMetadata - Provides a convenient hook to handle extra
+ /// target-specific metadata for the given globals.
+ virtual void emitTargetMetadata(
+ CodeGen::CodeGenModule &CGM,
+ const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {}
+
+ /// Any further codegen related checks that need to be done on a function call
+ /// in a target specific manner.
+ virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
+ const FunctionDecl *Caller,
+ const FunctionDecl *Callee,
+ const CallArgList &Args) const {}
/// Determines the size of struct _Unwind_Exception on this platform,
/// in 8-bit units. The Itanium ABI defines this as:
@@ -315,6 +322,32 @@ public:
virtual bool shouldEmitStaticExternCAliases() const { return true; }
virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
+
+ /// Return the device-side type for the CUDA device builtin surface type.
+ virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
+ // By default, no change from the original one.
+ return nullptr;
+ }
+ /// Return the device-side type for the CUDA device builtin texture type.
+ virtual llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const {
+ // By default, no change from the original one.
+ return nullptr;
+ }
+
+ /// Emit the device-side copy of the builtin surface type.
+ virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF,
+ LValue Dst,
+ LValue Src) const {
+ // DO NOTHING by default.
+ return false;
+ }
+ /// Emit the device-side copy of the builtin texture type.
+ virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF,
+ LValue Dst,
+ LValue Src) const {
+ // DO NOTHING by default.
+ return false;
+ }
};
} // namespace CodeGen