aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/ABIInfo.h46
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp249
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp40
-rw-r--r--clang/lib/CodeGen/CGBlocks.cpp22
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp1787
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp71
-rw-r--r--clang/lib/CodeGen/CGCUDARuntime.h2
-rw-r--r--clang/lib/CodeGen/CGCXXABI.h12
-rw-r--r--clang/lib/CodeGen/CGCall.cpp251
-rw-r--r--clang/lib/CodeGen/CGClass.cpp94
-rw-r--r--clang/lib/CodeGen/CGCleanup.cpp10
-rw-r--r--clang/lib/CodeGen/CGCoroutine.cpp20
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp381
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h29
-rw-r--r--clang/lib/CodeGen/CGDecl.cpp14
-rw-r--r--clang/lib/CodeGen/CGDeclCXX.cpp97
-rw-r--r--clang/lib/CodeGen/CGException.cpp17
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp195
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp108
-rw-r--r--clang/lib/CodeGen/CGExprCXX.cpp2
-rw-r--r--clang/lib/CodeGen/CGExprComplex.cpp257
-rw-r--r--clang/lib/CodeGen/CGExprConstant.cpp60
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp316
-rw-r--r--clang/lib/CodeGen/CGGPUBuiltin.cpp2
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp413
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h67
-rw-r--r--clang/lib/CodeGen/CGLoopInfo.cpp17
-rw-r--r--clang/lib/CodeGen/CGObjC.cpp172
-rw-r--r--clang/lib/CodeGen/CGObjCGNU.cpp17
-rw-r--r--clang/lib/CodeGen/CGObjCMac.cpp56
-rw-r--r--clang/lib/CodeGen/CGObjCRuntime.cpp28
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp1693
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h319
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp431
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.h31
-rw-r--r--clang/lib/CodeGen/CGRecordLayoutBuilder.cpp2
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp87
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp257
-rw-r--r--clang/lib/CodeGen/CGVTT.cpp2
-rw-r--r--clang/lib/CodeGen/CGVTables.cpp58
-rw-r--r--clang/lib/CodeGen/CGVTables.h7
-rw-r--r--clang/lib/CodeGen/CodeGenAction.cpp41
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp185
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h71
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp508
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h54
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.cpp13
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.h7
-rw-r--r--clang/lib/CodeGen/CodeGenTBAA.cpp2
-rw-r--r--clang/lib/CodeGen/CodeGenTypeCache.h3
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.cpp28
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.h3
-rw-r--r--clang/lib/CodeGen/ConstantEmitter.h3
-rw-r--r--clang/lib/CodeGen/ConstantInitBuilder.cpp11
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp112
-rw-r--r--clang/lib/CodeGen/ItaniumCXXABI.cpp197
-rw-r--r--clang/lib/CodeGen/MacroPPCallbacks.cpp4
-rw-r--r--clang/lib/CodeGen/MacroPPCallbacks.h2
-rw-r--r--clang/lib/CodeGen/MicrosoftCXXABI.cpp89
-rw-r--r--clang/lib/CodeGen/ModuleBuilder.cpp1
-rw-r--r--clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp4
-rw-r--r--clang/lib/CodeGen/PatternInit.cpp4
-rw-r--r--clang/lib/CodeGen/SanitizerMetadata.cpp2
-rw-r--r--clang/lib/CodeGen/SwiftCallingConv.cpp36
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp1070
-rw-r--r--clang/lib/CodeGen/TargetInfo.h13
-rw-r--r--clang/lib/CodeGen/VarBypassDetector.cpp2
67 files changed, 6009 insertions, 4195 deletions
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index c1eb8a975796..755d2aaa7beb 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -33,7 +33,6 @@ namespace CodeGen {
class CGFunctionInfo;
class CodeGenFunction;
class CodeGenTypes;
- class SwiftABIInfo;
// FIXME: All of this stuff should be part of the target interface
// somehow. It is currently here because it is not clear how to factor
@@ -44,9 +43,8 @@ namespace CodeGen {
/// ABIInfo - Target specific hooks for defining how a type should be
/// passed or returned from functions.
class ABIInfo {
- public:
- CodeGen::CodeGenTypes &CGT;
protected:
+ CodeGen::CodeGenTypes &CGT;
llvm::CallingConv::ID RuntimeCC;
public:
ABIInfo(CodeGen::CodeGenTypes &cgt)
@@ -54,8 +52,6 @@ namespace CodeGen {
virtual ~ABIInfo();
- virtual bool supportsSwift() const { return false; }
-
virtual bool allowBFloatArgsAndRet() const { return false; }
CodeGen::CGCXXABI &getCXXABI() const;
@@ -114,33 +110,33 @@ namespace CodeGen {
CodeGen::ABIArgInfo
getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const;
-
-
};
- /// A refining implementation of ABIInfo for targets that support swiftcall.
- ///
- /// If we find ourselves wanting multiple such refinements, they'll probably
- /// be independent refinements, and we should probably find another way
- /// to do it than simple inheritance.
- class SwiftABIInfo : public ABIInfo {
- public:
- SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {}
+ /// Target specific hooks for defining how a type should be passed or returned
+ /// from functions with one of the Swift calling conventions.
+ class SwiftABIInfo {
+ protected:
+ CodeGenTypes &CGT;
+ bool SwiftErrorInRegister;
- bool supportsSwift() const final { return true; }
+ public:
+ SwiftABIInfo(CodeGen::CodeGenTypes &CGT, bool SwiftErrorInRegister)
+ : CGT(CGT), SwiftErrorInRegister(SwiftErrorInRegister) {}
- virtual bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> types,
- bool asReturnValue) const = 0;
+ virtual ~SwiftABIInfo();
- virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
- llvm::Type *eltTy,
- unsigned elts) const;
+ /// Returns true if an aggregate which expands to the given type sequence
+ /// should be passed / returned indirectly.
+ virtual bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
+ bool AsReturnValue) const;
- virtual bool isSwiftErrorInRegister() const = 0;
+ /// Returns true if the given vector type is legal from Swift's calling
+ /// convention perspective.
+ virtual bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
+ unsigned NumElts) const;
- static bool classof(const ABIInfo *info) {
- return info->supportsSwift();
- }
+ /// Returns true if swifterror is lowered to a register by the target ABI.
+ bool isSwiftErrorInRegister() const { return SwiftErrorInRegister; };
};
} // end namespace CodeGen
} // end namespace clang
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 7c4e35634e5d..10d6bff25e6d 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -29,12 +30,13 @@
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/IRPrinter/IRPrintingPasses.h"
#include "llvm/LTO/LTOBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
@@ -70,14 +72,17 @@
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/KCFI.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
@@ -87,6 +92,7 @@
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <memory>
+#include <optional>
using namespace clang;
using namespace llvm;
@@ -96,6 +102,11 @@ using namespace llvm;
namespace llvm {
extern cl::opt<bool> DebugInfoCorrelate;
+
+// Experiment to move sanitizers earlier.
+static cl::opt<bool> ClSanitizeOnOptimizerEarlyEP(
+ "sanitizer-early-opt-ep", cl::Optional,
+ cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false));
}
namespace {
@@ -215,6 +226,16 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) {
Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth;
Opts.TraceLoads = CGOpts.SanitizeCoverageTraceLoads;
Opts.TraceStores = CGOpts.SanitizeCoverageTraceStores;
+ Opts.CollectControlFlow = CGOpts.SanitizeCoverageControlFlow;
+ return Opts;
+}
+
+static SanitizerBinaryMetadataOptions
+getSanitizerBinaryMetadataOptions(const CodeGenOptions &CGOpts) {
+ SanitizerBinaryMetadataOptions Opts;
+ Opts.Covered = CGOpts.SanitizeBinaryMetadataCovered;
+ Opts.Atomics = CGOpts.SanitizeBinaryMetadataAtomics;
+ Opts.UAR = CGOpts.SanitizeBinaryMetadataUAR;
return Opts;
}
@@ -250,27 +271,28 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
switch (CodeGenOpts.getVecLib()) {
case CodeGenOptions::Accelerate:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate);
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate,
+ TargetTriple);
break;
case CodeGenOptions::LIBMVEC:
- switch(TargetTriple.getArch()) {
- default:
- break;
- case llvm::Triple::x86_64:
- TLII->addVectorizableFunctionsFromVecLib
- (TargetLibraryInfoImpl::LIBMVEC_X86);
- break;
- }
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86,
+ TargetTriple);
break;
case CodeGenOptions::MASSV:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV);
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV,
+ TargetTriple);
break;
case CodeGenOptions::SVML:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML);
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML,
+ TargetTriple);
+ break;
+ case CodeGenOptions::SLEEF:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
+ TargetTriple);
break;
case CodeGenOptions::Darwin_libsystem_m:
TLII->addVectorizableFunctionsFromVecLib(
- TargetLibraryInfoImpl::DarwinLibSystemM);
+ TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
break;
default:
break;
@@ -278,22 +300,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
return TLII;
}
-static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) {
- switch (CodeGenOpts.OptimizationLevel) {
- default:
- llvm_unreachable("Invalid optimization level!");
- case 0:
- return CodeGenOpt::None;
- case 1:
- return CodeGenOpt::Less;
- case 2:
- return CodeGenOpt::Default; // O2/Os/Oz
- case 3:
- return CodeGenOpt::Aggressive;
- }
-}
-
-static Optional<llvm::CodeModel::Model>
+static std::optional<llvm::CodeModel::Model>
getCodeModel(const CodeGenOptions &CodeGenOpts) {
unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
.Case("tiny", llvm::CodeModel::Tiny)
@@ -305,7 +312,7 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) {
.Default(~0u);
assert(CodeModel != ~0u && "invalid code model!");
if (CodeModel == ~1u)
- return None;
+ return std::nullopt;
return static_cast<llvm::CodeModel::Model>(CodeModel);
}
@@ -391,7 +398,12 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
Options.NoInfsFPMath = LangOpts.NoHonorInfs;
Options.NoNaNsFPMath = LangOpts.NoHonorNaNs;
Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
- Options.UnsafeFPMath = LangOpts.UnsafeFPMath;
+ Options.UnsafeFPMath = LangOpts.AllowFPReassoc && LangOpts.AllowRecip &&
+ LangOpts.NoSignedZero && LangOpts.ApproxFunc &&
+ (LangOpts.getDefaultFPContractMode() ==
+ LangOptions::FPModeKind::FPM_Fast ||
+ LangOpts.getDefaultFPContractMode() ==
+ LangOptions::FPModeKind::FPM_FastHonorPragmas);
Options.ApproxFuncFPMath = LangOpts.ApproxFunc;
Options.BBSections =
@@ -422,7 +434,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
CodeGenOpts.UniqueBasicBlockSectionNames;
Options.TLSSize = CodeGenOpts.TLSSize;
Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
- Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS;
+ Options.ExplicitEmulatedTLS = true;
Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
Options.StackUsageOutput = CodeGenOpts.StackUsageOutput;
@@ -478,15 +490,16 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
Options.MCOptions.Argv0 = CodeGenOpts.Argv0;
Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs;
+ Options.MCOptions.AsSecureLogFile = CodeGenOpts.AsSecureLogFile;
Options.MisExpect = CodeGenOpts.MisExpect;
return true;
}
-static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts,
- const LangOptions &LangOpts) {
+static std::optional<GCOVOptions>
+getGCOVOptions(const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts) {
if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes)
- return None;
+ return std::nullopt;
// Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
// LLVM's -default-gcov-version flag is set to something invalid.
GCOVOptions Options;
@@ -500,11 +513,11 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts,
return Options;
}
-static Optional<InstrProfOptions>
+static std::optional<InstrProfOptions>
getInstrProfOptions(const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts) {
if (!CodeGenOpts.hasProfileClangInstr())
- return None;
+ return std::nullopt;
InstrProfOptions Options;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
@@ -547,11 +560,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
return;
}
- Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
+ std::optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
- CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
+ std::optional<CodeGenOpt::Level> OptLevelOrNone =
+ CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel);
+ assert(OptLevelOrNone && "Invalid optimization level!");
+ CodeGenOpt::Level OptLevel = *OptLevelOrNone;
llvm::TargetOptions Options;
if (!initTargetOptions(Diags, Options, CodeGenOpts, TargetOpts, LangOpts,
@@ -620,18 +636,48 @@ static OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
}
}
+static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
+ PassBuilder &PB) {
+ // If the back-end supports KCFI operand bundle lowering, skip KCFIPass.
+ if (TargetTriple.getArch() == llvm::Triple::x86_64 ||
+ TargetTriple.isAArch64(64))
+ return;
+
+ // Ensure we lower KCFI operand bundles with -O0.
+ PB.registerOptimizerLastEPCallback(
+ [&](ModulePassManager &MPM, OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0 &&
+ LangOpts.Sanitize.has(SanitizerKind::KCFI))
+ MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
+ });
+
+ // When optimizations are requested, run KCIFPass after InstCombine to
+ // avoid unnecessary checks.
+ PB.registerPeepholeEPCallback(
+ [&](FunctionPassManager &FPM, OptimizationLevel Level) {
+ if (Level != OptimizationLevel::O0 &&
+ LangOpts.Sanitize.has(SanitizerKind::KCFI))
+ FPM.addPass(KCFIPass());
+ });
+}
+
static void addSanitizers(const Triple &TargetTriple,
const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts, PassBuilder &PB) {
- PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM,
- OptimizationLevel Level) {
+ auto SanitizersCallback = [&](ModulePassManager &MPM,
+ OptimizationLevel Level) {
if (CodeGenOpts.hasSanitizeCoverage()) {
auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
- MPM.addPass(ModuleSanitizerCoveragePass(
+ MPM.addPass(SanitizerCoveragePass(
SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles,
CodeGenOpts.SanitizeCoverageIgnorelistFiles));
}
+ if (CodeGenOpts.hasSanitizeBinaryMetadata()) {
+ MPM.addPass(SanitizerBinaryMetadataPass(
+ getSanitizerBinaryMetadataOptions(CodeGenOpts)));
+ }
+
auto MSanPass = [&](SanitizerMask Mask, bool CompileKernel) {
if (LangOpts.Sanitize.has(Mask)) {
int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
@@ -639,22 +685,21 @@ static void addSanitizers(const Triple &TargetTriple,
MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel,
CodeGenOpts.SanitizeMemoryParamRetval);
- MPM.addPass(ModuleMemorySanitizerPass(options));
- FunctionPassManager FPM;
- FPM.addPass(MemorySanitizerPass(options));
+ MPM.addPass(MemorySanitizerPass(options));
if (Level != OptimizationLevel::O0) {
- // MemorySanitizer inserts complex instrumentation that mostly
- // follows the logic of the original code, but operates on
- // "shadow" values. It can benefit from re-running some
- // general purpose optimization passes.
- FPM.addPass(EarlyCSEPass());
- // TODO: Consider add more passes like in
- // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible
- // difference on size. It's not clear if the rest is still
- // usefull. InstCombinePass breakes
- // compiler-rt/test/msan/select_origin.cpp.
+ // MemorySanitizer inserts complex instrumentation that mostly follows
+ // the logic of the original code, but operates on "shadow" values. It
+ // can benefit from re-running some general purpose optimization
+ // passes.
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ FunctionPassManager FPM;
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(GVNPass());
+ FPM.addPass(InstCombinePass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
};
MSanPass(SanitizerKind::Memory, false);
@@ -676,8 +721,8 @@ static void addSanitizers(const Triple &TargetTriple,
Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
- MPM.addPass(ModuleAddressSanitizerPass(
- Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
+ MPM.addPass(AddressSanitizerPass(Opts, UseGlobalGC, UseOdrIndicator,
+ DestructorKind));
}
};
ASanPass(SanitizerKind::Address, false);
@@ -697,13 +742,28 @@ static void addSanitizers(const Triple &TargetTriple,
if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) {
MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles));
}
- });
+ };
+ if (ClSanitizeOnOptimizerEarlyEP) {
+ PB.registerOptimizerEarlyEPCallback(
+ [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) {
+ ModulePassManager NewMPM;
+ SanitizersCallback(NewMPM, Level);
+ if (!NewMPM.isEmpty()) {
+ // Sanitizers can abandon<GlobalsAA>.
+ NewMPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ MPM.addPass(std::move(NewMPM));
+ }
+ });
+ } else {
+ // LastEP does not need GlobalsAA.
+ PB.registerOptimizerLastEPCallback(SanitizersCallback);
+ }
}
void EmitAssemblyHelper::RunOptimizationPipeline(
BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS,
std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS) {
- Optional<PGOOptions> PGOOpt;
+ std::optional<PGOOptions> PGOOpt;
if (CodeGenOpts.hasProfileIRInstr())
// -fprofile-generate.
@@ -782,12 +842,20 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PrintPassOptions PrintPassOpts;
PrintPassOpts.Indent = DebugPassStructure;
PrintPassOpts.SkipAnalyses = DebugPassStructure;
- StandardInstrumentations SI(CodeGenOpts.DebugPassManager ||
- DebugPassStructure,
- /*VerifyEach*/ false, PrintPassOpts);
+ StandardInstrumentations SI(
+ TheModule->getContext(),
+ (CodeGenOpts.DebugPassManager || DebugPassStructure),
+ /*VerifyEach*/ false, PrintPassOpts);
SI.registerCallbacks(PIC, &FAM);
PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC);
+ if (CodeGenOpts.EnableAssignmentTracking) {
+ PB.registerPipelineStartEPCallback(
+ [&](ModulePassManager &MPM, OptimizationLevel Level) {
+ MPM.addPass(AssignmentTrackingPass());
+ });
+ }
+
// Enable verify-debuginfo-preserve-each for new PM.
DebugifyEachInstrumentation Debugify;
DebugInfoPerPass DebugInfoBeforePass;
@@ -896,15 +964,18 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
// Don't add sanitizers if we are here from ThinLTO PostLink. That already
// done on PreLink stage.
- if (!IsThinLTOPostLink)
+ if (!IsThinLTOPostLink) {
addSanitizers(TargetTriple, CodeGenOpts, LangOpts, PB);
+ addKCFIPass(TargetTriple, LangOpts, PB);
+ }
- if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts))
+ if (std::optional<GCOVOptions> Options =
+ getGCOVOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
[Options](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(GCOVProfilerPass(*Options));
});
- if (Optional<InstrProfOptions> Options =
+ if (std::optional<InstrProfOptions> Options =
getInstrProfOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
[Options](ModulePassManager &MPM, OptimizationLevel Level) {
@@ -933,19 +1004,24 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule)
MPM.addPass(VerifierPass());
- switch (Action) {
- case Backend_EmitBC:
+ if (Action == Backend_EmitBC || Action == Backend_EmitLL) {
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
- if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
- ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
- if (!ThinLinkOS)
- return;
- }
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
CodeGenOpts.EnableSplitLTOUnit);
- MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
- : nullptr));
+ if (Action == Backend_EmitBC) {
+ if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
+ ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
+ if (!ThinLinkOS)
+ return;
+ }
+ MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
+ : nullptr));
+ } else {
+ MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
+ /*EmitLTOSummary=*/true));
+ }
+
} else {
// Emit a module summary by default for Regular LTO except for ld64
// targets
@@ -957,17 +1033,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
uint32_t(1));
}
- MPM.addPass(
- BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
+ if (Action == Backend_EmitBC)
+ MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
+ EmitLTOSummary));
+ else
+ MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
+ EmitLTOSummary));
}
- break;
-
- case Backend_EmitLL:
- MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists));
- break;
-
- default:
- break;
}
// Now that we have all of the passes ready, run them.
@@ -1059,7 +1131,7 @@ static void runThinLTOBackend(
if (!lto::initImportList(*M, *CombinedIndex, ImportList))
return;
- auto AddStream = [&](size_t Task) {
+ auto AddStream = [&](size_t Task, const Twine &ModuleName) {
return std::make_unique<CachedFileStream>(std::move(OS),
CGOpts.ObjectFilenameForDebug);
};
@@ -1077,7 +1149,10 @@ static void runThinLTOBackend(
Conf.CodeModel = getCodeModel(CGOpts);
Conf.MAttrs = TOpts.Features;
Conf.RelocModel = CGOpts.RelocationModel;
- Conf.CGOptLevel = getCGOptLevel(CGOpts);
+ std::optional<CodeGenOpt::Level> OptLevelOrNone =
+ CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
+ assert(OptLevelOrNone && "Invalid optimization level!");
+ Conf.CGOptLevel = *OptLevelOrNone;
Conf.OptLevel = CGOpts.OptimizationLevel;
initTargetOptions(Diags, Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index dee0cb64be97..8ef95bb80846 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -609,7 +609,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_add_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
: llvm::Instruction::Add;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
@@ -621,7 +621,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_sub_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
: llvm::Instruction::Sub;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
@@ -631,7 +631,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_min_fetch:
PostOpMinMax = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
@@ -642,7 +642,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_max_fetch:
PostOpMinMax = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
@@ -653,7 +653,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_and_fetch:
PostOp = llvm::Instruction::And;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
@@ -663,7 +663,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_or_fetch:
PostOp = llvm::Instruction::Or;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
@@ -673,7 +673,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_xor_fetch:
PostOp = llvm::Instruction::Xor;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
@@ -683,7 +683,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
Op = llvm::AtomicRMWInst::Nand;
@@ -914,13 +914,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
ShouldCastToIntPtrTy = !MemTy->isFloatingType();
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
@@ -1045,7 +1045,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// suitably aligned for the optimized version.
if (Misaligned)
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
@@ -1176,7 +1176,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_fetch_add_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_add_fetch:
PostOp = llvm::Instruction::Add;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
@@ -1189,7 +1189,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_fetch_and_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_and_fetch:
PostOp = llvm::Instruction::And;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
@@ -1202,7 +1202,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_fetch_or_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_or_fetch:
PostOp = llvm::Instruction::Or;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
@@ -1215,7 +1215,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_fetch_sub_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_sub_fetch:
PostOp = llvm::Instruction::Sub;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
@@ -1227,7 +1227,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_fetch_xor_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_xor_fetch:
PostOp = llvm::Instruction::Xor;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
@@ -1238,7 +1238,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
break;
case AtomicExpr::AO__atomic_min_fetch:
PostOpMinMax = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
@@ -1251,7 +1251,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
break;
case AtomicExpr::AO__atomic_max_fetch:
PostOpMinMax = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
@@ -1266,7 +1266,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_fetch_nand_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
LibCallName = "__atomic_fetch_nand";
@@ -1594,7 +1594,7 @@ llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
/// we are operating under /volatile:ms *and* the LValue itself is volatile and
/// performing such an operation can be performed without a libcall.
bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
- if (!CGM.getCodeGenOpts().MSVolatile) return false;
+ if (!CGM.getLangOpts().MSVolatile) return false;
AtomicInfo AI(*this, LV);
bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
// An atomic is inline if we don't need to use a libcall.
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index ff6ca0914e0d..6e4a0dbf2335 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -502,12 +502,10 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
if (CGM.getLangOpts().OpenCL) {
// The header is basically 'struct { int; int; generic void *;
// custom_fields; }'. Assert that struct is packed.
- auto GenericAS =
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
- auto GenPtrAlign =
- CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
- auto GenPtrSize =
- CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
+ auto GenPtrAlign = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8);
+ auto GenPtrSize = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8);
assert(CGM.getIntSize() <= GenPtrSize);
assert(CGM.getIntAlign() <= GenPtrAlign);
assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
@@ -521,10 +519,10 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
unsigned BlockAlign = GenPtrAlign.getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
- for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
+ for (auto *I : Helper->getCustomFieldTypes()) /* custom fields */ {
// TargetOpenCLBlockHelp needs to make sure the struct is packed.
// If necessary, add padding fields to the custom fields.
- unsigned Align = CGM.getDataLayout().getABITypeAlignment(I);
+ unsigned Align = CGM.getDataLayout().getABITypeAlign(I).value();
if (BlockAlign < Align)
BlockAlign = Align;
assert(Offset % Align == 0);
@@ -806,9 +804,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
auto GenVoidPtrSize = CharUnits::fromQuantity(
- CGM.getTarget().getPointerWidth(
- CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
- 8);
+ CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
CodeGenFunction BlockCGF{CGM, true};
@@ -1356,7 +1352,7 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
fields.add(buildBlockDescriptor(CGM, blockInfo));
} else if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
- for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) {
+ for (auto *I : Helper->getCustomFieldValues(CGM, blockInfo)) {
fields.add(I);
}
}
@@ -2676,7 +2672,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
size = varOffset;
// Conversely, we might have to prevent LLVM from inserting padding.
- } else if (CGM.getDataLayout().getABITypeAlignment(varTy) >
+ } else if (CGM.getDataLayout().getABITypeAlign(varTy) >
uint64_t(varAlign.getQuantity())) {
packed = true;
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 113c629bf9ed..f72e04a425d9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -51,20 +52,17 @@
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/MatrixBuilder.h"
+#include "llvm/Support/AArch64TargetParser.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/X86TargetParser.h"
+#include <optional>
#include <sstream>
using namespace clang;
using namespace CodeGen;
using namespace llvm;
-static
-int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
- return std::min(High, std::max(Low, Value));
-}
-
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
Align AlignmentInBytes) {
ConstantInt *Byte;
@@ -110,6 +108,15 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
{Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
};
+ // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
+ // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
+ // if it is 64-bit 'long double' mode.
+ static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
+ {Builtin::BI__builtin_frexpl, "frexp"},
+ {Builtin::BI__builtin_ldexpl, "ldexp"},
+ {Builtin::BI__builtin_modfl, "modf"},
+ };
+
// If the builtin has been declared explicitly with an assembler label,
// use the mangled name. This differs from the plain label on platforms
// that prefix labels.
@@ -122,8 +129,14 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
F128Builtins.find(BuiltinID) != F128Builtins.end())
Name = F128Builtins[BuiltinID];
+ else if (getTriple().isOSAIX() &&
+ &getTarget().getLongDoubleFormat() ==
+ &llvm::APFloat::IEEEdouble() &&
+ AIXLongDouble64Builtins.find(BuiltinID) !=
+ AIXLongDouble64Builtins.end())
+ Name = AIXLongDouble64Builtins[BuiltinID];
else
- Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
+ Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
}
llvm::FunctionType *Ty =
@@ -675,6 +688,8 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF,
Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
llvm::Instruction *Call = CGF.Builder.CreateCall(F);
Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
+ Call->setMetadata(llvm::LLVMContext::MD_noundef,
+ llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
return Call;
}
@@ -1164,12 +1179,12 @@ enum class CodeGenFunction::MSVCIntrin {
__fastfail,
};
-static Optional<CodeGenFunction::MSVCIntrin>
+static std::optional<CodeGenFunction::MSVCIntrin>
translateArmToMsvcIntrin(unsigned BuiltinID) {
using MSVCIntrin = CodeGenFunction::MSVCIntrin;
switch (BuiltinID) {
default:
- return None;
+ return std::nullopt;
case clang::ARM::BI_BitScanForward:
case clang::ARM::BI_BitScanForward64:
return MSVCIntrin::_BitScanForward;
@@ -1310,12 +1325,12 @@ translateArmToMsvcIntrin(unsigned BuiltinID) {
llvm_unreachable("must return from switch");
}
-static Optional<CodeGenFunction::MSVCIntrin>
+static std::optional<CodeGenFunction::MSVCIntrin>
translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
using MSVCIntrin = CodeGenFunction::MSVCIntrin;
switch (BuiltinID) {
default:
- return None;
+ return std::nullopt;
case clang::AArch64::BI_BitScanForward:
case clang::AArch64::BI_BitScanForward64:
return MSVCIntrin::_BitScanForward;
@@ -1464,12 +1479,12 @@ translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
llvm_unreachable("must return from switch");
}
-static Optional<CodeGenFunction::MSVCIntrin>
+static std::optional<CodeGenFunction::MSVCIntrin>
translateX86ToMsvcIntrin(unsigned BuiltinID) {
using MSVCIntrin = CodeGenFunction::MSVCIntrin;
switch (BuiltinID) {
default:
- return None;
+ return std::nullopt;
case clang::X86::BI_BitScanForward:
case clang::X86::BI_BitScanForward64:
return MSVCIntrin::_BitScanForward;
@@ -1704,7 +1719,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
SanitizerHandler::InvalidBuiltin,
{EmitCheckSourceLocation(E->getExprLoc()),
llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
- None);
+ std::nullopt);
return ArgValue;
}
@@ -2204,7 +2219,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// might. Also, math builtins have the same semantics as their math library
// twins. Thus, we can transform math library and builtin calls to their
// LLVM counterparts if the call is marked 'const' (known to never set errno).
- if (FD->hasAttr<ConstAttr>()) {
+ // In case FP exceptions are enabled, the experimental versions of the
+ // intrinsics model those.
+ bool ConstWithoutErrnoAndExceptions =
+ getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
+ bool ConstWithoutExceptions =
+ getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
+ if (FD->hasAttr<ConstAttr>() ||
+ ((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) &&
+ (!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIceil:
case Builtin::BIceilf:
@@ -2515,11 +2538,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_va_start:
case Builtin::BI__va_start:
case Builtin::BI__builtin_va_end:
- return RValue::get(
- EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
- ? EmitScalarExpr(E->getArg(0))
- : EmitVAListRef(E->getArg(0)).getPointer(),
- BuiltinID != Builtin::BI__builtin_va_end));
+ EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
+ ? EmitScalarExpr(E->getArg(0))
+ : EmitVAListRef(E->getArg(0)).getPointer(),
+ BuiltinID != Builtin::BI__builtin_va_end);
+ return RValue::get(nullptr);
case Builtin::BI__builtin_va_copy: {
Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
@@ -2528,8 +2551,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
DstPtr = Builder.CreateBitCast(DstPtr, Type);
SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
- return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
- {DstPtr, SrcPtr}));
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
+ return RValue::get(nullptr);
}
case Builtin::BI__builtin_abs:
case Builtin::BI__builtin_labs:
@@ -2778,6 +2801,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_assume_aligned: {
const Expr *Ptr = E->getArg(0);
Value *PtrValue = EmitScalarExpr(Ptr);
+ if (PtrValue->getType() != VoidPtrTy)
+ PtrValue = EmitCastToVoidPtr(PtrValue);
Value *OffsetValue =
(E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
@@ -2799,7 +2824,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *ArgValue = EmitScalarExpr(E->getArg(0));
Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
- return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
+ Builder.CreateCall(FnAssume, ArgValue);
+ return RValue::get(nullptr);
}
case Builtin::BI__arithmetic_fence: {
// Create the builtin call if FastMath is selected, and the target
@@ -2920,7 +2946,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::ConstantInt::get(Int32Ty, 3);
Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
+ Builder.CreateCall(F, {Address, RW, Locality, Data});
+ return RValue::get(nullptr);
}
case Builtin::BI__builtin_readcyclecounter: {
Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
@@ -2933,9 +2960,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Builder.CreateCall(F, {Begin, End}));
}
case Builtin::BI__builtin_trap:
- return RValue::get(EmitTrapCall(Intrinsic::trap));
+ EmitTrapCall(Intrinsic::trap);
+ return RValue::get(nullptr);
case Builtin::BI__debugbreak:
- return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
+ EmitTrapCall(Intrinsic::debugtrap);
+ return RValue::get(nullptr);
case Builtin::BI__builtin_unreachable: {
EmitUnreachable(E->getExprLoc());
@@ -2971,7 +3000,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// Ordered comparisons: we know the arguments to these are matching scalar
// floating point values.
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
@@ -3051,16 +3079,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_ceil:
return RValue::get(
emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
+ case Builtin::BI__builtin_elementwise_cos:
+ return RValue::get(
+ emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
case Builtin::BI__builtin_elementwise_floor:
return RValue::get(
emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
case Builtin::BI__builtin_elementwise_roundeven:
return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
"elt.roundeven"));
+ case Builtin::BI__builtin_elementwise_sin:
+ return RValue::get(
+ emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
+
case Builtin::BI__builtin_elementwise_trunc:
return RValue::get(
emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
-
+ case Builtin::BI__builtin_elementwise_canonicalize:
+ return RValue::get(
+ emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.trunc"));
+ case Builtin::BI__builtin_elementwise_copysign:
+ return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
@@ -3294,7 +3333,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_flt_rounds: {
- Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds);
+ Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
llvm::Type *ResultType = ConvertType(E->getType());
Value *Result = Builder.CreateCall(F);
@@ -3717,7 +3756,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_unwind_init: {
Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
- return RValue::get(Builder.CreateCall(F));
+ Builder.CreateCall(F);
+ return RValue::get(nullptr);
}
case Builtin::BI__builtin_extend_pointer: {
// Extends a pointer to the size of an _Unwind_Word, which is
@@ -4210,7 +4250,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
StringRef WideBytes = Str->getBytes();
std::string StrUtf8;
if (!convertUTF16ToUTF8String(
- makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
+ ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
continue;
}
@@ -4226,8 +4266,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_annotation: {
llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
- AnnVal->getType());
+ llvm::Function *F =
+ CGM.getIntrinsic(llvm::Intrinsic::annotation,
+ {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
// Get the annotation string, go through casts. Sema requires this to be a
// non-wide string literal, potentially casted, so the cast<> is safe.
@@ -4478,8 +4519,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return EmitBuiltinNewDeleteCall(
E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
case Builtin::BI__builtin_operator_delete:
- return EmitBuiltinNewDeleteCall(
+ EmitBuiltinNewDeleteCall(
E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
+ return RValue::get(nullptr);
case Builtin::BI__builtin_is_aligned:
return EmitBuiltinIsAligned(E);
@@ -4644,20 +4686,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__fastfail:
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
- case Builtin::BI__builtin_coro_size: {
- auto & Context = getContext();
- auto SizeTy = Context.getSizeType();
- auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
- Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
- return RValue::get(Builder.CreateCall(F));
- }
-
case Builtin::BI__builtin_coro_id:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
case Builtin::BI__builtin_coro_promise:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
case Builtin::BI__builtin_coro_resume:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
+ EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
+ return RValue::get(nullptr);
case Builtin::BI__builtin_coro_frame:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
case Builtin::BI__builtin_coro_noop:
@@ -4665,7 +4700,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_coro_free:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
case Builtin::BI__builtin_coro_destroy:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
+ EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
+ return RValue::get(nullptr);
case Builtin::BI__builtin_coro_done:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
case Builtin::BI__builtin_coro_alloc:
@@ -4676,6 +4712,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
case Builtin::BI__builtin_coro_suspend:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
+ case Builtin::BI__builtin_coro_size:
+ return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
+ case Builtin::BI__builtin_coro_align:
+ return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
// OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
case Builtin::BIread_pipe:
@@ -5034,7 +5074,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
EmitLifetimeEnd(TmpSize, TmpPtr);
return Call;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
// OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
// parameter.
@@ -5094,7 +5134,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *Val = EmitScalarExpr(E->getArg(0));
Address Address = EmitPointerWithAlignment(E->getArg(1));
Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
- return RValue::get(Builder.CreateStore(HalfVal, Address));
+ Builder.CreateStore(HalfVal, Address);
+ return RValue::get(nullptr);
}
case Builtin::BI__builtin_load_half: {
Address Address = EmitPointerWithAlignment(E->getArg(0));
@@ -5265,7 +5306,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
// See if we have a target specific intrinsic.
- const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
+ StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
StringRef Prefix =
llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
@@ -5359,6 +5400,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
V = Builder.CreateBitCast(V, RetTy);
}
+ if (RetTy->isVoidTy())
+ return RValue::get(nullptr);
+
return RValue::get(V);
}
@@ -5376,6 +5420,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
switch (EvalKind) {
case TEK_Scalar:
+ if (V->getType()->isVoidTy())
+ return RValue::get(nullptr);
return RValue::get(V);
case TEK_Aggregate:
return RValue::getAggregate(ReturnValue.getValue(),
@@ -5433,6 +5479,9 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
+ case llvm::Triple::loongarch32:
+ case llvm::Triple::loongarch64:
+ return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E);
default:
return nullptr;
}
@@ -5630,7 +5679,7 @@ struct ARMVectorIntrinsicInfo {
TypeModifier }
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
- NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0),
+ NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
NEONMAP0(splat_lane_v),
NEONMAP0(splat_laneq_v),
NEONMAP0(splatq_lane_v),
@@ -5642,21 +5691,27 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vadd_v),
NEONMAP0(vaddhn_v),
NEONMAP0(vaddq_v),
- NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
- NEONMAP1(vaeseq_v, arm_neon_aese, 0),
- NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
- NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
- NEONMAP1(vbfdot_v, arm_neon_bfdot, 0),
- NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0),
- NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0),
- NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0),
- NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0),
+ NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
+ NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
+ NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
+ NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
+ NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
- NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcage_v, arm_neon_vacge, 0),
NEONMAP1(vcageq_v, arm_neon_vacge, 0),
NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
@@ -5682,90 +5737,96 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
- NEONMAP0(vcvt_f16_v),
+ NEONMAP0(vcvt_f16_s16),
+ NEONMAP0(vcvt_f16_u16),
NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
- NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvt_s16_v),
+ NEONMAP0(vcvt_s16_f16),
NEONMAP0(vcvt_s32_v),
NEONMAP0(vcvt_s64_v),
- NEONMAP0(vcvt_u16_v),
+ NEONMAP0(vcvt_u16_f16),
NEONMAP0(vcvt_u32_v),
NEONMAP0(vcvt_u64_v),
- NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
- NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
- NEONMAP0(vcvtq_f16_v),
+ NEONMAP0(vcvtq_f16_s16),
+ NEONMAP0(vcvtq_f16_u16),
NEONMAP0(vcvtq_f32_v),
- NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvtq_s16_v),
+ NEONMAP0(vcvtq_s16_f16),
NEONMAP0(vcvtq_s32_v),
NEONMAP0(vcvtq_s64_v),
- NEONMAP0(vcvtq_u16_v),
+ NEONMAP0(vcvtq_u16_f16),
NEONMAP0(vcvtq_u32_v),
NEONMAP0(vcvtq_u64_v),
- NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
- NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
+ NEONMAP1(vdot_s32, arm_neon_sdot, 0),
+ NEONMAP1(vdot_u32, arm_neon_udot, 0),
+ NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
+ NEONMAP1(vdotq_u32, arm_neon_udot, 0),
NEONMAP0(vext_v),
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
@@ -5810,7 +5871,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
- NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0),
+ NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
+ NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
NEONMAP0(vmovl_v),
NEONMAP0(vmovn_v),
NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
@@ -5837,10 +5899,14 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
- NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
@@ -5883,12 +5949,12 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
- NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
- NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
- NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
- NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
- NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
- NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
+ NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
+ NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
+ NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
+ NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
+ NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
+ NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
NEONMAP0(vshl_n_v),
NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
NEONMAP0(vshll_n_v),
@@ -5922,9 +5988,9 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vtrnq_v),
NEONMAP0(vtst_v),
NEONMAP0(vtstq_v),
- NEONMAP1(vusdot_v, arm_neon_usdot, 0),
- NEONMAP1(vusdotq_v, arm_neon_usdot, 0),
- NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0),
+ NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
+ NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
NEONMAP0(vuzp_v),
NEONMAP0(vuzpq_v),
NEONMAP0(vzip_v),
@@ -5932,7 +5998,7 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
};
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
- NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0),
+ NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
NEONMAP0(splat_lane_v),
NEONMAP0(splat_laneq_v),
NEONMAP0(splatq_lane_v),
@@ -5943,20 +6009,33 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vaddhn_v),
NEONMAP0(vaddq_p128),
NEONMAP0(vaddq_v),
- NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
- NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
- NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
- NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
- NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
- NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0),
- NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0),
- NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0),
- NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0),
- NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0),
- NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
+ NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
+ NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
+ NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
+ NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
+ NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcage_v, aarch64_neon_facge, 0),
NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
@@ -5979,57 +6058,82 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vcltzq_v),
NEONMAP1(vclz_v, ctlz, Add1ArgType),
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
- NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
- NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
- NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
- NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType),
- NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
- NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
- NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
- NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
- NEONMAP0(vcvt_f16_v),
+ NEONMAP0(vcvt_f16_s16),
+ NEONMAP0(vcvt_f16_u16),
NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
- NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvtq_f16_v),
+ NEONMAP0(vcvtq_f16_s16),
+ NEONMAP0(vcvtq_f16_u16),
NEONMAP0(vcvtq_f32_v),
- NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0),
- NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
+ NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
- NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
- NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
- NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
+ NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
+ NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
+ NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
+ NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
NEONMAP0(vext_v),
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
NEONMAP0(vfmaq_v),
- NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
- NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
- NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
- NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
- NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
- NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
- NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
- NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
@@ -6040,7 +6144,8 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
- NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0),
+ NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
+ NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
NEONMAP0(vmovl_v),
NEONMAP0(vmovn_v),
NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
@@ -6066,10 +6171,14 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
- NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
@@ -6087,21 +6196,21 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
- NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0),
+ NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
- NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
- NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
- NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
- NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
- NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
- NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
- NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
- NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
NEONMAP0(vrndi_v),
NEONMAP0(vrndiq_v),
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
@@ -6113,16 +6222,16 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
- NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
- NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
- NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
- NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
- NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
- NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
- NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0),
- NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0),
- NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0),
- NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0),
+ NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
+ NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
+ NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
+ NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
+ NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
+ NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
+ NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
+ NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
+ NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
+ NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
NEONMAP0(vshl_n_v),
NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
NEONMAP0(vshll_n_v),
@@ -6131,15 +6240,15 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vshr_n_v),
NEONMAP0(vshrn_n_v),
NEONMAP0(vshrq_n_v),
- NEONMAP1(vsm3partw1q_v, aarch64_crypto_sm3partw1, 0),
- NEONMAP1(vsm3partw2q_v, aarch64_crypto_sm3partw2, 0),
- NEONMAP1(vsm3ss1q_v, aarch64_crypto_sm3ss1, 0),
- NEONMAP1(vsm3tt1aq_v, aarch64_crypto_sm3tt1a, 0),
- NEONMAP1(vsm3tt1bq_v, aarch64_crypto_sm3tt1b, 0),
- NEONMAP1(vsm3tt2aq_v, aarch64_crypto_sm3tt2a, 0),
- NEONMAP1(vsm3tt2bq_v, aarch64_crypto_sm3tt2b, 0),
- NEONMAP1(vsm4ekeyq_v, aarch64_crypto_sm4ekey, 0),
- NEONMAP1(vsm4eq_v, aarch64_crypto_sm4e, 0),
+ NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
+ NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
+ NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
+ NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
+ NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
+ NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
+ NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
+ NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
+ NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
@@ -6149,10 +6258,10 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vsubhn_v),
NEONMAP0(vtst_v),
NEONMAP0(vtstq_v),
- NEONMAP1(vusdot_v, aarch64_neon_usdot, 0),
- NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0),
- NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0),
- NEONMAP1(vxarq_v, aarch64_crypto_xar, 0),
+ NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
+ NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
+ NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
};
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
@@ -6394,6 +6503,148 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
};
+// Some intrinsics are equivalent for codegen.
+static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
+ { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
+ { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
+ { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
+ { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
+ { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
+ { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
+ { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
+ { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
+ { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
+ { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
+ { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
+ { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
+ { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
+ { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
+ { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
+ { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
+ { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
+ { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
+ { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
+ { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
+ { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
+ { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
+ { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
+ { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
+ { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
+ { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
+ { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
+ { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
+ { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
+ { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
+ { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
+ { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
+ { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
+ { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
+ { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
+ { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
+ { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
+ { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
+ { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
+ { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
+ { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
+ { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
+ { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
+ { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
+ { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
+ { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
+ { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
+ { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
+ { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
+ { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
+ { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
+ { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
+ { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
+ { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
+ { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
+ { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
+ { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
+ { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
+ { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
+ { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
+ { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
+ { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
+ { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
+ { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
+ { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
+ { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
+ { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
+ { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
+ { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
+ { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
+ { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
+ { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
+ { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
+ { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
+ { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
+ { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
+ { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
+ { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
+ { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
+ { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
+ { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
+ { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
+ { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
+ { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
+ { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
+ { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
+ { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
+ { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
+ { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
+ { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
+ { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
+ { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
+ { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
+ { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
+ { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
+ { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
+ { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
+ { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
+ { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
+ { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
+ { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
+ { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
+ { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
+ { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
+ { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
+ { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
+ { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
+ { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
+ { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
+ { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
+ { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
+ { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
+ { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
+ { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
+ { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
+ { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
+ { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
+ { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
+ { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
+ { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
+ { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
+ { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
+ { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
+ { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
+ { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
+ { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
+ { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
+ { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
+ { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
+ { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
+ { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
+ { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
+ { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
+ { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
+ { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
+ { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
+ { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
+ { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
+};
+
#undef NEONMAP0
#undef NEONMAP1
#undef NEONMAP2
@@ -6531,13 +6782,13 @@ static Value *EmitCommonNeonSISDBuiltinExpr(
Ops[j] = CGF.Builder.CreateTruncOrBitCast(
Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
Ops[j] =
- CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
+ CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
}
Value *Result = CGF.EmitNeonCall(F, Ops, s);
llvm::Type *ResultType = CGF.ConvertType(E->getType());
- if (ResultType->getPrimitiveSizeInBits().getFixedSize() <
- Result->getType()->getPrimitiveSizeInBits().getFixedSize())
+ if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
+ Result->getType()->getPrimitiveSizeInBits().getFixedValue())
return CGF.Builder.CreateExtractElement(Result, C0);
return CGF.Builder.CreateBitCast(Result, ResultType, s);
@@ -6550,7 +6801,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
llvm::Triple::ArchType Arch) {
// Get the last argument, which specifies the vector type.
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- Optional<llvm::APSInt> NeonTypeConst =
+ std::optional<llvm::APSInt> NeonTypeConst =
Arg->getIntegerConstantExpr(getContext());
if (!NeonTypeConst)
return nullptr;
@@ -6634,7 +6885,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcalt_v:
case NEON::BI__builtin_neon_vcaltq_v:
std::swap(Ops[0], Ops[1]);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vcage_v:
case NEON::BI__builtin_neon_vcageq_v:
case NEON::BI__builtin_neon_vcagt_v:
@@ -6690,17 +6941,25 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- case NEON::BI__builtin_neon_vcvt_f16_v:
- case NEON::BI__builtin_neon_vcvtq_f16_v:
+ case NEON::BI__builtin_neon_vcvt_f16_s16:
+ case NEON::BI__builtin_neon_vcvt_f16_u16:
+ case NEON::BI__builtin_neon_vcvtq_f16_s16:
+ case NEON::BI__builtin_neon_vcvtq_f16_u16:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- case NEON::BI__builtin_neon_vcvt_n_f16_v:
+ case NEON::BI__builtin_neon_vcvt_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvt_n_f16_u16:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
+ llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvt_n");
+ }
case NEON::BI__builtin_neon_vcvt_n_f32_v:
case NEON::BI__builtin_neon_vcvt_n_f64_v:
- case NEON::BI__builtin_neon_vcvtq_n_f16_v:
case NEON::BI__builtin_neon_vcvtq_n_f32_v:
case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
@@ -6708,15 +6967,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
- case NEON::BI__builtin_neon_vcvt_n_s16_v:
+ case NEON::BI__builtin_neon_vcvt_n_s16_f16:
case NEON::BI__builtin_neon_vcvt_n_s32_v:
- case NEON::BI__builtin_neon_vcvt_n_u16_v:
+ case NEON::BI__builtin_neon_vcvt_n_u16_f16:
case NEON::BI__builtin_neon_vcvt_n_u32_v:
case NEON::BI__builtin_neon_vcvt_n_s64_v:
case NEON::BI__builtin_neon_vcvt_n_u64_v:
- case NEON::BI__builtin_neon_vcvtq_n_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
case NEON::BI__builtin_neon_vcvtq_n_s32_v:
- case NEON::BI__builtin_neon_vcvtq_n_u16_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
case NEON::BI__builtin_neon_vcvtq_n_u32_v:
case NEON::BI__builtin_neon_vcvtq_n_s64_v:
case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
@@ -6728,64 +6987,64 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
- case NEON::BI__builtin_neon_vcvt_s16_v:
- case NEON::BI__builtin_neon_vcvt_u16_v:
+ case NEON::BI__builtin_neon_vcvt_s16_f16:
+ case NEON::BI__builtin_neon_vcvt_u16_f16:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
case NEON::BI__builtin_neon_vcvtq_u64_v:
- case NEON::BI__builtin_neon_vcvtq_s16_v:
- case NEON::BI__builtin_neon_vcvtq_u16_v: {
+ case NEON::BI__builtin_neon_vcvtq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtq_u16_f16: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
}
- case NEON::BI__builtin_neon_vcvta_s16_v:
+ case NEON::BI__builtin_neon_vcvta_s16_f16:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
- case NEON::BI__builtin_neon_vcvta_u16_v:
+ case NEON::BI__builtin_neon_vcvta_u16_f16:
case NEON::BI__builtin_neon_vcvta_u32_v:
case NEON::BI__builtin_neon_vcvta_u64_v:
- case NEON::BI__builtin_neon_vcvtaq_s16_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_f16:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
- case NEON::BI__builtin_neon_vcvtaq_u16_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_f16:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
case NEON::BI__builtin_neon_vcvtaq_u64_v:
- case NEON::BI__builtin_neon_vcvtn_s16_v:
+ case NEON::BI__builtin_neon_vcvtn_s16_f16:
case NEON::BI__builtin_neon_vcvtn_s32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
- case NEON::BI__builtin_neon_vcvtn_u16_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_f16:
case NEON::BI__builtin_neon_vcvtn_u32_v:
case NEON::BI__builtin_neon_vcvtn_u64_v:
- case NEON::BI__builtin_neon_vcvtnq_s16_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_f16:
case NEON::BI__builtin_neon_vcvtnq_s32_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
- case NEON::BI__builtin_neon_vcvtnq_u16_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_f16:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
case NEON::BI__builtin_neon_vcvtnq_u64_v:
- case NEON::BI__builtin_neon_vcvtp_s16_v:
+ case NEON::BI__builtin_neon_vcvtp_s16_f16:
case NEON::BI__builtin_neon_vcvtp_s32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
- case NEON::BI__builtin_neon_vcvtp_u16_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_f16:
case NEON::BI__builtin_neon_vcvtp_u32_v:
case NEON::BI__builtin_neon_vcvtp_u64_v:
- case NEON::BI__builtin_neon_vcvtpq_s16_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_f16:
case NEON::BI__builtin_neon_vcvtpq_s32_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
- case NEON::BI__builtin_neon_vcvtpq_u16_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_f16:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
case NEON::BI__builtin_neon_vcvtpq_u64_v:
- case NEON::BI__builtin_neon_vcvtm_s16_v:
+ case NEON::BI__builtin_neon_vcvtm_s16_f16:
case NEON::BI__builtin_neon_vcvtm_s32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
- case NEON::BI__builtin_neon_vcvtm_u16_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_f16:
case NEON::BI__builtin_neon_vcvtm_u32_v:
case NEON::BI__builtin_neon_vcvtm_u64_v:
- case NEON::BI__builtin_neon_vcvtmq_s16_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_f16:
case NEON::BI__builtin_neon_vcvtmq_s32_v:
case NEON::BI__builtin_neon_vcvtmq_s64_v:
- case NEON::BI__builtin_neon_vcvtmq_u16_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_f16:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtmq_u64_v: {
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
@@ -6861,7 +7120,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vld1_dup_v:
case NEON::BI__builtin_neon_vld1q_dup_v: {
- Value *V = UndefValue::get(Ty);
+ Value *V = PoisonValue::get(Ty);
PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
LoadInst *Ld = Builder.CreateLoad(PtrOp0);
llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
@@ -6879,7 +7138,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
for (unsigned I = 2; I < Ops.size() - 1; ++I)
Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
Ops.push_back(getAlignmentValue32(PtrOp1));
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
+ Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
@@ -6983,10 +7242,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vrshrq_n_v:
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
1, true);
- case NEON::BI__builtin_neon_vsha512hq_v:
- case NEON::BI__builtin_neon_vsha512h2q_v:
- case NEON::BI__builtin_neon_vsha512su0q_v:
- case NEON::BI__builtin_neon_vsha512su1q_v: {
+ case NEON::BI__builtin_neon_vsha512hq_u64:
+ case NEON::BI__builtin_neon_vsha512h2q_u64:
+ case NEON::BI__builtin_neon_vsha512su0q_u64:
+ case NEON::BI__builtin_neon_vsha512su1q_u64: {
Function *F = CGM.getIntrinsic(Int);
return EmitNeonCall(F, Ops, "");
}
@@ -7038,18 +7297,18 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops.push_back(getAlignmentValue32(PtrOp0));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
}
- case NEON::BI__builtin_neon_vsm3partw1q_v:
- case NEON::BI__builtin_neon_vsm3partw2q_v:
- case NEON::BI__builtin_neon_vsm3ss1q_v:
- case NEON::BI__builtin_neon_vsm4ekeyq_v:
- case NEON::BI__builtin_neon_vsm4eq_v: {
+ case NEON::BI__builtin_neon_vsm3partw1q_u32:
+ case NEON::BI__builtin_neon_vsm3partw2q_u32:
+ case NEON::BI__builtin_neon_vsm3ss1q_u32:
+ case NEON::BI__builtin_neon_vsm4ekeyq_u32:
+ case NEON::BI__builtin_neon_vsm4eq_u32: {
Function *F = CGM.getIntrinsic(Int);
return EmitNeonCall(F, Ops, "");
}
- case NEON::BI__builtin_neon_vsm3tt1aq_v:
- case NEON::BI__builtin_neon_vsm3tt1bq_v:
- case NEON::BI__builtin_neon_vsm3tt2aq_v:
- case NEON::BI__builtin_neon_vsm3tt2bq_v: {
+ case NEON::BI__builtin_neon_vsm3tt1aq_u32:
+ case NEON::BI__builtin_neon_vsm3tt1bq_u32:
+ case NEON::BI__builtin_neon_vsm3tt2aq_u32:
+ case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
Function *F = CGM.getIntrinsic(Int);
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
return EmitNeonCall(F, Ops, "");
@@ -7135,7 +7394,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
return SV;
}
- case NEON::BI__builtin_neon_vxarq_v: {
+ case NEON::BI__builtin_neon_vxarq_u64: {
Function *F = CGM.getIntrinsic(Int);
Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
return EmitNeonCall(F, Ops, "");
@@ -7159,70 +7418,71 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
return SV;
}
- case NEON::BI__builtin_neon_vdot_v:
- case NEON::BI__builtin_neon_vdotq_v: {
+ case NEON::BI__builtin_neon_vdot_s32:
+ case NEON::BI__builtin_neon_vdot_u32:
+ case NEON::BI__builtin_neon_vdotq_s32:
+ case NEON::BI__builtin_neon_vdotq_u32: {
auto *InputTy =
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
llvm::Type *Tys[2] = { Ty, InputTy };
- Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
}
- case NEON::BI__builtin_neon_vfmlal_low_v:
- case NEON::BI__builtin_neon_vfmlalq_low_v: {
+ case NEON::BI__builtin_neon_vfmlal_low_f16:
+ case NEON::BI__builtin_neon_vfmlalq_low_f16: {
auto *InputTy =
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
}
- case NEON::BI__builtin_neon_vfmlsl_low_v:
- case NEON::BI__builtin_neon_vfmlslq_low_v: {
+ case NEON::BI__builtin_neon_vfmlsl_low_f16:
+ case NEON::BI__builtin_neon_vfmlslq_low_f16: {
auto *InputTy =
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
}
- case NEON::BI__builtin_neon_vfmlal_high_v:
- case NEON::BI__builtin_neon_vfmlalq_high_v: {
+ case NEON::BI__builtin_neon_vfmlal_high_f16:
+ case NEON::BI__builtin_neon_vfmlalq_high_f16: {
auto *InputTy =
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
}
- case NEON::BI__builtin_neon_vfmlsl_high_v:
- case NEON::BI__builtin_neon_vfmlslq_high_v: {
+ case NEON::BI__builtin_neon_vfmlsl_high_f16:
+ case NEON::BI__builtin_neon_vfmlslq_high_f16: {
auto *InputTy =
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
}
- case NEON::BI__builtin_neon_vmmlaq_v: {
+ case NEON::BI__builtin_neon_vmmlaq_s32:
+ case NEON::BI__builtin_neon_vmmlaq_u32: {
auto *InputTy =
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
llvm::Type *Tys[2] = { Ty, InputTy };
- Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla");
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
}
- case NEON::BI__builtin_neon_vusmmlaq_v: {
+ case NEON::BI__builtin_neon_vusmmlaq_s32: {
auto *InputTy =
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
}
- case NEON::BI__builtin_neon_vusdot_v:
- case NEON::BI__builtin_neon_vusdotq_v: {
+ case NEON::BI__builtin_neon_vusdot_s32:
+ case NEON::BI__builtin_neon_vusdotq_s32: {
auto *InputTy =
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
}
- case NEON::BI__builtin_neon_vbfdot_v:
- case NEON::BI__builtin_neon_vbfdotq_v: {
+ case NEON::BI__builtin_neon_vbfdot_f32:
+ case NEON::BI__builtin_neon_vbfdotq_f32: {
llvm::Type *InputTy =
llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
}
- case NEON::BI__builtin_neon___a32_vcvt_bf16_v: {
+ case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
llvm::Type *Tys[1] = { Ty };
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvtfp2bf");
@@ -7354,9 +7614,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
llvm::Type *ValueType,
SpecialRegisterAccessKind AccessKind,
StringRef SysReg = "") {
- // write and register intrinsics only support 32 and 64 bit operations.
- assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
- && "Unsupported size for register.");
+ // write and register intrinsics only support 32, 64 and 128 bit operations.
+ assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
+ RegisterType->isIntegerTy(128)) &&
+ "Unsupported size for register.");
CodeGen::CGBuilderTy &Builder = CGF.Builder;
CodeGen::CodeGenModule &CGM = CGF.CGM;
@@ -7741,7 +8002,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Value *Arg0 = EmitScalarExpr(E->getArg(0));
Value *Arg1 = EmitScalarExpr(E->getArg(1));
- // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
+ // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
// intrinsics, hence we need different codegen for these cases.
if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
@@ -7802,7 +8063,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
- if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
+ if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
return EmitMSVCBuiltinExpr(*MsvcIntId, E);
// Deal with MVE builtins
@@ -7812,6 +8073,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
return Result;
+ // Some intrinsics are equivalent - if they are use the base intrinsic ID.
+ auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
+ return P.first == BuiltinID;
+ });
+ if (It != end(NEONEquivalentIntrinsicMap))
+ BuiltinID = It->second;
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -7971,7 +8239,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// Get the last argument, which specifies the vector type.
assert(HasExtraArg);
const Expr *Arg = E->getArg(E->getNumArgs()-1);
- Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext());
+ std::optional<llvm::APSInt> Result =
+ Arg->getIntegerConstantExpr(getContext());
if (!Result)
return nullptr;
@@ -8007,7 +8276,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// Many NEON builtins have identical semantics and uses in ARM and
// AArch64. Emit these in a single function.
- auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
+ auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
if (Builtin)
@@ -8037,7 +8306,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
int Indices[] = {1 - Lane, Lane};
return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vld1_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
@@ -8077,7 +8346,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vsri_n_v:
case NEON::BI__builtin_neon_vsriq_n_v:
rightShift = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vsli_n_v:
case NEON::BI__builtin_neon_vsliq_n_v:
Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
@@ -8100,7 +8369,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
Tys), Ops);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vst1_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
@@ -8301,9 +8570,9 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
Ops.push_back(EmitScalarExpr(Addr));
Tys.push_back(ConvertType(Addr->getType()));
- Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
+ Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
Value *LoadResult = Builder.CreateCall(F, Ops);
- Value *MvecOut = UndefValue::get(MvecLType);
+ Value *MvecOut = PoisonValue::get(MvecLType);
for (unsigned i = 0; i < NumVectors; ++i) {
Value *Vec = Builder.CreateExtractValue(LoadResult, i);
MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
@@ -8343,7 +8612,7 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
for (unsigned i = 0; i < NumVectors; i++)
Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
- Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
+ Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
Value *ToReturn = nullptr;
for (unsigned i = 0; i < NumVectors; i++) {
Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
@@ -8409,7 +8678,8 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
// Get the last argument, which specifies the vector type.
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext());
+ std::optional<llvm::APSInt> Result =
+ Arg->getIntegerConstantExpr(CGF.getContext());
if (!Result)
return nullptr;
@@ -8425,29 +8695,25 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
// argument that specifies the vector type, need to handle each case.
switch (BuiltinID) {
case NEON::BI__builtin_neon_vtbl1_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
- Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
- "vtbl1");
+ return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
+ Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
}
case NEON::BI__builtin_neon_vtbl2_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
- Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
- "vtbl1");
+ return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
+ Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
}
case NEON::BI__builtin_neon_vtbl3_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
- Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
- "vtbl2");
+ return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
+ Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
}
case NEON::BI__builtin_neon_vtbl4_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
- Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
- "vtbl2");
+ return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
+ Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
}
case NEON::BI__builtin_neon_vtbx1_v: {
Value *TblRes =
- packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
- Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
+ packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
+ Intrinsic::aarch64_neon_tbl1, "vtbl1");
llvm::Constant *EightV = ConstantInt::get(Ty, 8);
Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
@@ -8458,14 +8724,13 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
}
case NEON::BI__builtin_neon_vtbx2_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
- Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
- "vtbx1");
+ return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
+ Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
}
case NEON::BI__builtin_neon_vtbx3_v: {
Value *TblRes =
- packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
- Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
+ packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
+ Intrinsic::aarch64_neon_tbl2, "vtbl2");
llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
@@ -8477,9 +8742,8 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
}
case NEON::BI__builtin_neon_vtbx4_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
- Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
- "vtbx2");
+ return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
+ Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
}
case NEON::BI__builtin_neon_vqtbl1_v:
case NEON::BI__builtin_neon_vqtbl1q_v:
@@ -8518,7 +8782,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
Op = Builder.CreateBitCast(Op, Int16Ty);
- Value *V = UndefValue::get(VTy);
+ Value *V = PoisonValue::get(VTy);
llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
Op = Builder.CreateInsertElement(V, Op, CI);
return Op;
@@ -8732,8 +8996,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
unsigned BytesPerElt =
OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
- Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
- Ops[2] = Builder.CreateMul(Ops[2], Scale);
+ Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
}
Value *Call = Builder.CreateCall(F, Ops);
@@ -8792,8 +9055,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
unsigned BytesPerElt =
OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
- Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
- Ops[3] = Builder.CreateMul(Ops[3], Scale);
+ Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
}
return Builder.CreateCall(F, Ops);
@@ -8823,8 +9085,8 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
// Index needs to be passed as scaled offset.
llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
- Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
- Ops[2] = Builder.CreateMul(Ops[2], Scale);
+ if (BytesPerElt > 1)
+ Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
}
}
@@ -8841,13 +9103,13 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
unsigned N;
switch (IntID) {
- case Intrinsic::aarch64_sve_ld2:
+ case Intrinsic::aarch64_sve_ld2_sret:
N = 2;
break;
- case Intrinsic::aarch64_sve_ld3:
+ case Intrinsic::aarch64_sve_ld3_sret:
N = 3;
break;
- case Intrinsic::aarch64_sve_ld4:
+ case Intrinsic::aarch64_sve_ld4_sret:
N = 4;
break;
default:
@@ -8858,12 +9120,22 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
- Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
- BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
- BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
- Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()});
- return Builder.CreateCall(F, { Predicate, BasePtr });
+ // Does the load have an offset?
+ if (Ops.size() > 2)
+ BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
+
+ BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
+ Function *F = CGM.getIntrinsic(IntID, {VTy});
+ Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
+ unsigned MinElts = VTy->getMinNumElements();
+ Value *Ret = llvm::PoisonValue::get(RetTy);
+ for (unsigned I = 0; I < N; I++) {
+ Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
+ Value *SRet = Builder.CreateExtractValue(Call, I);
+ Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
+ }
+ return Ret;
}
Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
@@ -8887,23 +9159,25 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
default:
llvm_unreachable("unknown intrinsic!");
}
- auto TupleTy =
- llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N);
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
- Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0);
- Value *Val = Ops.back();
- BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
+
+ // Does the store have an offset?
+ if (Ops.size() > 3)
+ BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
+
BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
+ Value *Val = Ops.back();
// The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
// need to break up the tuple vector.
SmallVector<llvm::Value*, 5> Operands;
- Function *FExtr =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
- for (unsigned I = 0; I < N; ++I)
- Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)}));
+ unsigned MinElts = VTy->getElementCount().getKnownMinValue();
+ for (unsigned I = 0; I < N; ++I) {
+ Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
+ Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx));
+ }
Operands.append({Predicate, BasePtr});
Function *F = CGM.getIntrinsic(IntID, { VTy });
@@ -8978,8 +9252,10 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
- Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
- BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
+
+ // Does the load have an offset?
+ if (Ops.size() > 2)
+ BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
@@ -9006,8 +9282,10 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
- Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0);
- BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
+
+ // Does the store have an offset?
+ if (Ops.size() == 4)
+ BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
// Last value is always the data
llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
@@ -9024,8 +9302,8 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
- auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty);
- return Builder.CreateCall(F, Scalar);
+ return Builder.CreateVectorSplat(
+ cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
}
Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
@@ -9069,16 +9347,46 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
if (TypeFlags.isOverloadWhileRW())
return {getSVEPredType(TypeFlags), Ops[0]->getType()};
- if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet())
+ if (TypeFlags.isOverloadCvt())
return {Ops[0]->getType(), Ops.back()->getType()};
- if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet())
- return {ResultType, Ops[0]->getType()};
-
assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
return {DefaultType};
}
+Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
+ llvm::Type *Ty,
+ ArrayRef<Value *> Ops) {
+ assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
+ "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
+
+ unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
+ auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
+ TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
+ Value *Idx = ConstantInt::get(CGM.Int64Ty,
+ I * SingleVecTy->getMinNumElements());
+
+ if (TypeFlags.isTupleSet())
+ return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
+ return Builder.CreateExtractVector(Ty, Ops[0], Idx);
+}
+
+Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
+ llvm::Type *Ty,
+ ArrayRef<Value *> Ops) {
+ assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
+
+ auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
+ unsigned MinElts = SrcTy->getMinNumElements();
+ Value *Call = llvm::PoisonValue::get(Ty);
+ for (unsigned I = 0; I < Ops.size(); I++) {
+ Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
+ Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
+ }
+
+ return Call;
+}
+
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
// Find out if any arguments are required to be integer constant expressions.
@@ -9101,7 +9409,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
else {
// If this is required to be a constant, constant fold it so that we know
// that the generated intrinsic gets a ConstantInt.
- Optional<llvm::APSInt> Result =
+ std::optional<llvm::APSInt> Result =
E->getArg(i)->getIntegerConstantExpr(getContext());
assert(Result && "Expected argument to be a constant");
@@ -9133,6 +9441,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isStructStore())
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
+ return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
+ else if (TypeFlags.isTupleCreate())
+ return EmitSVETupleCreate(TypeFlags, Ty, Ops);
else if (TypeFlags.isUndef())
return UndefValue::get(Ty);
else if (Builtin->LLVMIntrinsic != 0) {
@@ -9171,8 +9483,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
llvm::Type *OpndTy = Ops[1]->getType();
auto *SplatZero = Constant::getNullValue(OpndTy);
- Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy);
- Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero});
+ Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
}
Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
@@ -9285,12 +9596,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
Value *Vec = BuildVector(VecOps);
- SVETypeFlags TypeFlags(Builtin->TypeModifier);
- Value *Pred = EmitSVEAllTruePred(TypeFlags);
-
llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
Value *InsertSubVec = Builder.CreateInsertVector(
- OverloadedTy, UndefValue::get(OverloadedTy), Vec, Builder.getInt64(0));
+ OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
@@ -9300,6 +9608,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
if (!IsBoolTy)
return DupQLane;
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ Value *Pred = EmitSVEAllTruePred(TypeFlags);
+
// For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
: Intrinsic::aarch64_sve_cmpne_wide,
@@ -9346,12 +9657,12 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svtbl2_f32:
case SVE::BI__builtin_sve_svtbl2_f64: {
SVETypeFlags TF(Builtin->TypeModifier);
- auto VTy = cast<llvm::VectorType>(getSVEType(TF));
- auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy);
- Function *FExtr =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
- Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)});
- Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)});
+ auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
+ Value *V0 = Builder.CreateExtractVector(VTy, Ops[0],
+ ConstantInt::get(CGM.Int64Ty, 0));
+ unsigned MinElts = VTy->getMinNumElements();
+ Value *V1 = Builder.CreateExtractVector(
+ VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts));
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
return Builder.CreateCall(F, {V0, V1, Ops[1]});
}
@@ -9398,7 +9709,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svdup_neonq_f32:
case SVE::BI__builtin_sve_svdup_neonq_f64:
case SVE::BI__builtin_sve_svdup_neonq_bf16: {
- Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0],
+ Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
Builder.getInt64(0));
return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
{Insert, Builder.getInt64(0)});
@@ -9449,29 +9760,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
}
- if (BuiltinID == clang::AArch64::BI__builtin_arm_prefetch) {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *RW = EmitScalarExpr(E->getArg(1));
- Value *CacheLevel = EmitScalarExpr(E->getArg(2));
- Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
- Value *IsData = EmitScalarExpr(E->getArg(4));
-
- Value *Locality = nullptr;
- if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
- // Temporal fetch, needs to convert cache level to locality.
- Locality = llvm::ConstantInt::get(Int32Ty,
- -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
- } else {
- // Streaming fetch.
- Locality = llvm::ConstantInt::get(Int32Ty, 0);
- }
-
- // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
- // PLDL3STRM or PLDL2STRM.
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return Builder.CreateCall(F, {Address, RW, Locality, IsData});
- }
-
if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
assert((getContext().getTypeSize(E->getType()) == 32) &&
"rbit of unusual size!");
@@ -9498,32 +9786,32 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
"cls");
}
- if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32zf ||
- BuiltinID == clang::AArch64::BI__builtin_arm_frint32z) {
+ if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *Ty = Arg->getType();
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
Arg, "frint32z");
}
- if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64zf ||
- BuiltinID == clang::AArch64::BI__builtin_arm_frint64z) {
+ if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *Ty = Arg->getType();
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
Arg, "frint64z");
}
- if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32xf ||
- BuiltinID == clang::AArch64::BI__builtin_arm_frint32x) {
+ if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *Ty = Arg->getType();
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
Arg, "frint32x");
}
- if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64xf ||
- BuiltinID == clang::AArch64::BI__builtin_arm_frint64x) {
+ if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *Ty = Arg->getType();
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
@@ -9875,32 +10163,43 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
SpecialRegisterAccessKind AccessKind = Write;
if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
AccessKind = VolatileRead;
bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
- bool Is64Bit = BuiltinID != clang::AArch64::BI__builtin_arm_rsr &&
- BuiltinID != clang::AArch64::BI__builtin_arm_wsr;
+ bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
+
+ bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
+ BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
llvm::Type *ValueType;
llvm::Type *RegisterType = Int64Ty;
- if (IsPointerBuiltin) {
+ if (Is32Bit) {
+ ValueType = Int32Ty;
+ } else if (Is128Bit) {
+ llvm::Type *Int128Ty =
+ llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
+ ValueType = Int128Ty;
+ RegisterType = Int128Ty;
+ } else if (IsPointerBuiltin) {
ValueType = VoidPtrTy;
- } else if (Is64Bit) {
- ValueType = Int64Ty;
} else {
- ValueType = Int32Ty;
- }
+ ValueType = Int64Ty;
+ };
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
AccessKind);
@@ -10026,9 +10325,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
- if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
+ if (std::optional<MSVCIntrin> MsvcIntId =
+ translateAarch64ToMsvcIntrin(BuiltinID))
return EmitMSVCBuiltinExpr(*MsvcIntId, E);
+ // Some intrinsics are equivalent - if they are use the base intrinsic ID.
+ auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
+ return P.first == BuiltinID;
+ });
+ if (It != end(NEONEquivalentIntrinsicMap))
+ BuiltinID = It->second;
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -10069,7 +10376,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
}
- auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
+ auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
@@ -10082,7 +10389,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
const Expr *Arg = E->getArg(E->getNumArgs()-1);
NeonTypeFlags Type(0);
- if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()))
+ if (std::optional<llvm::APSInt> Result =
+ Arg->getIntegerConstantExpr(getContext()))
// Determine the type of this overloaded NEON intrinsic.
Type = NeonTypeFlags(Result->getZExtValue());
@@ -10119,7 +10427,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvts_f32_u32:
case NEON::BI__builtin_neon_vcvtd_f64_u64:
usgn = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vcvts_f32_s32:
case NEON::BI__builtin_neon_vcvtd_f64_s64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -10135,7 +10443,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_f16_u32:
case NEON::BI__builtin_neon_vcvth_f16_u64:
usgn = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vcvth_f16_s16:
case NEON::BI__builtin_neon_vcvth_f16_s32:
case NEON::BI__builtin_neon_vcvth_f16_s64: {
@@ -11068,26 +11376,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
: Intrinsic::trunc;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
}
- case NEON::BI__builtin_neon_vrnd32x_v:
- case NEON::BI__builtin_neon_vrnd32xq_v: {
+ case NEON::BI__builtin_neon_vrnd32x_f32:
+ case NEON::BI__builtin_neon_vrnd32xq_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint32x;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
}
- case NEON::BI__builtin_neon_vrnd32z_v:
- case NEON::BI__builtin_neon_vrnd32zq_v: {
+ case NEON::BI__builtin_neon_vrnd32z_f32:
+ case NEON::BI__builtin_neon_vrnd32zq_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint32z;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
}
- case NEON::BI__builtin_neon_vrnd64x_v:
- case NEON::BI__builtin_neon_vrnd64xq_v: {
+ case NEON::BI__builtin_neon_vrnd64x_f32:
+ case NEON::BI__builtin_neon_vrnd64xq_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint64x;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
}
- case NEON::BI__builtin_neon_vrnd64z_v:
- case NEON::BI__builtin_neon_vrnd64zq_v: {
+ case NEON::BI__builtin_neon_vrnd64z_f32:
+ case NEON::BI__builtin_neon_vrnd64zq_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint64z;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
@@ -11125,26 +11433,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
- case NEON::BI__builtin_neon_vcvt_s16_v:
- case NEON::BI__builtin_neon_vcvt_u16_v:
+ case NEON::BI__builtin_neon_vcvt_s16_f16:
+ case NEON::BI__builtin_neon_vcvt_u16_f16:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
case NEON::BI__builtin_neon_vcvtq_u64_v:
- case NEON::BI__builtin_neon_vcvtq_s16_v:
- case NEON::BI__builtin_neon_vcvtq_u16_v: {
+ case NEON::BI__builtin_neon_vcvtq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtq_u16_f16: {
Int =
usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
}
- case NEON::BI__builtin_neon_vcvta_s16_v:
- case NEON::BI__builtin_neon_vcvta_u16_v:
+ case NEON::BI__builtin_neon_vcvta_s16_f16:
+ case NEON::BI__builtin_neon_vcvta_u16_f16:
case NEON::BI__builtin_neon_vcvta_s32_v:
- case NEON::BI__builtin_neon_vcvtaq_s16_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_f16:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
- case NEON::BI__builtin_neon_vcvtaq_u16_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_f16:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
@@ -11154,13 +11462,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
}
- case NEON::BI__builtin_neon_vcvtm_s16_v:
+ case NEON::BI__builtin_neon_vcvtm_s16_f16:
case NEON::BI__builtin_neon_vcvtm_s32_v:
- case NEON::BI__builtin_neon_vcvtmq_s16_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_f16:
case NEON::BI__builtin_neon_vcvtmq_s32_v:
- case NEON::BI__builtin_neon_vcvtm_u16_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_f16:
case NEON::BI__builtin_neon_vcvtm_u32_v:
- case NEON::BI__builtin_neon_vcvtmq_u16_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_f16:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
case NEON::BI__builtin_neon_vcvtmq_s64_v:
@@ -11170,13 +11478,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
}
- case NEON::BI__builtin_neon_vcvtn_s16_v:
+ case NEON::BI__builtin_neon_vcvtn_s16_f16:
case NEON::BI__builtin_neon_vcvtn_s32_v:
- case NEON::BI__builtin_neon_vcvtnq_s16_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_f16:
case NEON::BI__builtin_neon_vcvtnq_s32_v:
- case NEON::BI__builtin_neon_vcvtn_u16_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_f16:
case NEON::BI__builtin_neon_vcvtn_u32_v:
- case NEON::BI__builtin_neon_vcvtnq_u16_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_f16:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
@@ -11186,13 +11494,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
}
- case NEON::BI__builtin_neon_vcvtp_s16_v:
+ case NEON::BI__builtin_neon_vcvtp_s16_f16:
case NEON::BI__builtin_neon_vcvtp_s32_v:
- case NEON::BI__builtin_neon_vcvtpq_s16_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_f16:
case NEON::BI__builtin_neon_vcvtpq_s32_v:
- case NEON::BI__builtin_neon_vcvtp_u16_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_f16:
case NEON::BI__builtin_neon_vcvtp_u32_v:
- case NEON::BI__builtin_neon_vcvtpq_u16_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_f16:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
@@ -11268,7 +11576,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddv_u8:
// FIXME: These are handled by the AArch64 scalar code.
usgn = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vaddv_s8: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -11280,7 +11588,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vaddv_u16:
usgn = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vaddv_s16: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -11292,7 +11600,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vaddvq_u8:
usgn = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vaddvq_s8: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -11304,7 +11612,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vaddvq_u16:
usgn = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NEON::BI__builtin_neon_vaddvq_s16: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -11652,7 +11960,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vld1_dup_v:
case NEON::BI__builtin_neon_vld1q_dup_v: {
- Value *V = UndefValue::get(Ty);
+ Value *V = PoisonValue::get(Ty);
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
@@ -11745,7 +12053,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
+ Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
@@ -11759,7 +12067,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
+ Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
@@ -11774,7 +12082,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
+ Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
@@ -11934,7 +12242,7 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
"unexpected BPF builtin");
// A sequence number, injected into IR builtin functions, to
- // prevent CSE given the only difference of the funciton
+ // prevent CSE given the only difference of the function
// may just be the debuginfo metadata.
static uint32_t BuiltinSeqNum;
@@ -12050,11 +12358,11 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
}
// Otherwise, insertelement the values to build the vector.
- Value *Result = llvm::UndefValue::get(
+ Value *Result = llvm::PoisonValue::get(
llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
+ Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
return Result;
}
@@ -12074,9 +12382,8 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
- MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
- makeArrayRef(Indices, NumElts),
- "extract");
+ MaskVec = CGF.Builder.CreateShuffleVector(
+ MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
}
return MaskVec;
}
@@ -12347,7 +12654,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
default: break;
case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
Subtract = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
@@ -12355,7 +12662,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
break;
case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
Subtract = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
@@ -12363,21 +12670,21 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
break;
case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
Subtract = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
Subtract = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
Subtract = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -12385,7 +12692,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
break;
case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
Subtract = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
@@ -12628,18 +12935,6 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
return Res;
}
-// Convert a BF16 to a float.
-static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF,
- const CallExpr *E,
- ArrayRef<Value *> Ops) {
- llvm::Type *Int32Ty = CGF.Builder.getInt32Ty();
- Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty);
- Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16);
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
- Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType);
- return BitCast;
-}
-
Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
llvm::Type *Int32Ty = Builder.getInt32Ty();
@@ -12668,9 +12963,11 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
.Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
#define X86_CPU_TYPE(ENUM, STR) \
.Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
+ .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
#define X86_CPU_SUBTYPE(ENUM, STR) \
.Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
-#include "llvm/Support/X86TargetParser.def"
+#include "llvm/TargetParser/X86TargetParser.def"
.Default({0, 0});
assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
@@ -12749,6 +13046,16 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
return Result;
}
+Value *CodeGenFunction::EmitAArch64CpuInit() {
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
+ llvm::FunctionCallee Func =
+ CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver");
+ cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
+ cast<llvm::GlobalValue>(Func.getCallee())
+ ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ return Builder.CreateCall(Func);
+}
+
Value *CodeGenFunction::EmitX86CpuInit() {
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
/*Variadic*/ false);
@@ -12760,6 +13067,32 @@ Value *CodeGenFunction::EmitX86CpuInit() {
return Builder.CreateCall(Func);
}
+llvm::Value *
+CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
+ uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
+ Value *Result = Builder.getTrue();
+ if (FeaturesMask != 0) {
+ // Get features from structure in runtime library
+ // struct {
+ // unsigned long long features;
+ // } __aarch64_cpu_features;
+ llvm::Type *STy = llvm::StructType::get(Int64Ty);
+ llvm::Constant *AArch64CPUFeatures =
+ CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
+ cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
+ llvm::Value *CpuFeatures = Builder.CreateGEP(
+ STy, AArch64CPUFeatures,
+ {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
+ Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
+ CharUnits::fromQuantity(8));
+ Value *Mask = Builder.getInt64(FeaturesMask);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
+ return Result;
+}
+
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
if (BuiltinID == X86::BI__builtin_cpu_is)
@@ -12771,7 +13104,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
- if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
+ if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
return EmitMSVCBuiltinExpr(*MsvcIntId, E);
SmallVector<Value*, 4> Ops;
@@ -13512,8 +13845,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + Index;
- Value *Res = Builder.CreateShuffleVector(Ops[0],
- makeArrayRef(Indices, NumElts),
+ Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
"extract");
if (Ops.size() == 4)
@@ -13551,9 +13883,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
for (unsigned i = 0; i != DstNumElts; ++i)
Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
- Value *Op1 = Builder.CreateShuffleVector(Ops[1],
- makeArrayRef(Indices, DstNumElts),
- "widen");
+ Value *Op1 = Builder.CreateShuffleVector(
+ Ops[1], ArrayRef(Indices, DstNumElts), "widen");
for (unsigned i = 0; i != DstNumElts; ++i) {
if (i >= Index && i < (Index + SrcNumElts))
@@ -13563,8 +13894,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateShuffleVector(Ops[0], Op1,
- makeArrayRef(Indices, DstNumElts),
- "insert");
+ ArrayRef(Indices, DstNumElts), "insert");
}
case X86::BI__builtin_ia32_pmovqd512_mask:
case X86::BI__builtin_ia32_pmovwb512_mask: {
@@ -13614,8 +13944,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
return Builder.CreateShuffleVector(Ops[0], Ops[1],
- makeArrayRef(Indices, NumElts),
- "blend");
+ ArrayRef(Indices, NumElts), "blend");
}
case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
@@ -13637,7 +13966,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[l + i] = l + i;
}
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
"pshuflw");
}
case X86::BI__builtin_ia32_pshufhw:
@@ -13660,7 +13989,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
"pshufhw");
}
case X86::BI__builtin_ia32_pshufd:
@@ -13689,7 +14018,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
"permil");
}
case X86::BI__builtin_ia32_shufpd:
@@ -13719,8 +14048,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateShuffleVector(Ops[0], Ops[1],
- makeArrayRef(Indices, NumElts),
- "shufp");
+ ArrayRef(Indices, NumElts), "shufp");
}
case X86::BI__builtin_ia32_permdi256:
case X86::BI__builtin_ia32_permdf256:
@@ -13736,7 +14064,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
"perm");
}
case X86::BI__builtin_ia32_palignr128:
@@ -13773,8 +14101,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateShuffleVector(Ops[1], Ops[0],
- makeArrayRef(Indices, NumElts),
- "palignr");
+ ArrayRef(Indices, NumElts), "palignr");
}
case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
@@ -13794,8 +14121,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[i] = i + ShiftVal;
return Builder.CreateShuffleVector(Ops[1], Ops[0],
- makeArrayRef(Indices, NumElts),
- "valign");
+ ArrayRef(Indices, NumElts), "valign");
}
case X86::BI__builtin_ia32_shuf_f32x4_256:
case X86::BI__builtin_ia32_shuf_f64x2_256:
@@ -13823,8 +14149,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateShuffleVector(Ops[0], Ops[1],
- makeArrayRef(Indices, NumElts),
- "shuf");
+ ArrayRef(Indices, NumElts), "shuf");
}
case X86::BI__builtin_ia32_vperm2f128_pd256:
@@ -13863,8 +14188,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
- makeArrayRef(Indices, NumElts),
- "vperm");
+ ArrayRef(Indices, NumElts), "vperm");
}
case X86::BI__builtin_ia32_pslldqi128_byteshift:
@@ -13892,9 +14216,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
Value *Zero = llvm::Constant::getNullValue(VecTy);
- Value *SV = Builder.CreateShuffleVector(Zero, Cast,
- makeArrayRef(Indices, NumElts),
- "pslldq");
+ Value *SV = Builder.CreateShuffleVector(
+ Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
}
case X86::BI__builtin_ia32_psrldqi128_byteshift:
@@ -13922,9 +14245,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
Value *Zero = llvm::Constant::getNullValue(VecTy);
- Value *SV = Builder.CreateShuffleVector(Cast, Zero,
- makeArrayRef(Indices, NumElts),
- "psrldq");
+ Value *SV = Builder.CreateShuffleVector(
+ Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
return Builder.CreateBitCast(SV, ResultType, "cast");
}
case X86::BI__builtin_ia32_kshiftliqi:
@@ -13944,9 +14266,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[i] = NumElts + i - ShiftVal;
Value *Zero = llvm::Constant::getNullValue(In->getType());
- Value *SV = Builder.CreateShuffleVector(Zero, In,
- makeArrayRef(Indices, NumElts),
- "kshiftl");
+ Value *SV = Builder.CreateShuffleVector(
+ Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
return Builder.CreateBitCast(SV, Ops[0]->getType());
}
case X86::BI__builtin_ia32_kshiftriqi:
@@ -13966,9 +14287,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[i] = i + ShiftVal;
Value *Zero = llvm::Constant::getNullValue(In->getType());
- Value *SV = Builder.CreateShuffleVector(In, Zero,
- makeArrayRef(Indices, NumElts),
- "kshiftr");
+ Value *SV = Builder.CreateShuffleVector(
+ In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
return Builder.CreateBitCast(SV, Ops[0]->getType());
}
case X86::BI__builtin_ia32_movnti:
@@ -14046,6 +14366,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectph_128:
case X86::BI__builtin_ia32_selectph_256:
case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
case X86::BI__builtin_ia32_selectps_128:
case X86::BI__builtin_ia32_selectps_256:
case X86::BI__builtin_ia32_selectps_512:
@@ -14054,6 +14377,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectpd_512:
return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
case X86::BI__builtin_ia32_selectsh_128:
+ case X86::BI__builtin_ia32_selectsbf_128:
case X86::BI__builtin_ia32_selectss_128:
case X86::BI__builtin_ia32_selectsd_128: {
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
@@ -14251,14 +14575,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// First extract half of each vector. This gives better codegen than
// doing it in a single shuffle.
- LHS = Builder.CreateShuffleVector(LHS, LHS,
- makeArrayRef(Indices, NumElts / 2));
- RHS = Builder.CreateShuffleVector(RHS, RHS,
- makeArrayRef(Indices, NumElts / 2));
+ LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
+ RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
// Concat the vectors.
// NOTE: Operands are swapped to match the intrinsic definition.
- Value *Res = Builder.CreateShuffleVector(RHS, LHS,
- makeArrayRef(Indices, NumElts));
+ Value *Res =
+ Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
return Builder.CreateBitCast(Res, Ops[0]->getType());
}
@@ -14448,6 +14770,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_reduce_fadd_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.getFastMathFlags().setAllowReassoc();
return Builder.CreateCall(F, {Ops[0], Ops[1]});
}
@@ -14458,6 +14781,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_reduce_fmul_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.getFastMathFlags().setAllowReassoc();
return Builder.CreateCall(F, {Ops[0], Ops[1]});
}
@@ -14468,6 +14792,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_reduce_fmax_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.getFastMathFlags().setNoNaNs();
return Builder.CreateCall(F, {Ops[0]});
}
@@ -14478,6 +14803,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_reduce_fmin_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.getFastMathFlags().setNoNaNs();
return Builder.CreateCall(F, {Ops[0]});
}
@@ -14725,7 +15051,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmppd256_mask:
case X86::BI__builtin_ia32_cmppd512_mask:
IsMaskFCmp = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd:
@@ -14890,7 +15216,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
}
-// AVX512 bf16 intrinsics
+ // AVX512 bf16 intrinsics
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
Ops[2] = getMaskVecValue(
*this, Ops[2],
@@ -14899,7 +15225,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
case X86::BI__builtin_ia32_cvtsbf162ss_32:
- return EmitX86CvtBF16ToFloatExpr(*this, E, Ops);
+ return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
@@ -15214,7 +15540,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
case X86::BI__builtin_ia32_vfcmaddcph512_mask:
IsConjFMA = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case X86::BI__builtin_ia32_vfmaddcph512_mask: {
Intrinsic::ID IID = IsConjFMA
? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
@@ -15224,7 +15550,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
IsConjFMA = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
: Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
@@ -15234,7 +15560,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
IsConjFMA = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
: Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
@@ -15242,6 +15568,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
static constexpr int Mask[] = {0, 5, 6, 7};
return Builder.CreateShuffleVector(Call, Ops[2], Mask);
}
+ case X86::BI__builtin_ia32_prefetchi:
+ return Builder.CreateCall(
+ CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
+ {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
+ llvm::ConstantInt::get(Int32Ty, 0)});
}
}
@@ -15976,7 +16307,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
assert(ArgCI &&
"Third arg to xxinsertw intrinsic must be constant integer");
const int64_t MaxIndex = 12;
- int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
+ int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
// The builtin semantics don't exactly match the xxinsertw instructions
// semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
@@ -16018,7 +16349,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
assert(ArgCI &&
"Second Arg to xxextractuw intrinsic must be a constant integer!");
const int64_t MaxIndex = 12;
- int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
+ int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
if (getTarget().isLittleEndian()) {
// Reverse the index.
@@ -16103,10 +16434,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
bool isLittleEndian = getTarget().isLittleEndian();
- Value *UndefValue =
- llvm::UndefValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
+ Value *PoisonValue =
+ llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
Value *Res = Builder.CreateInsertElement(
- UndefValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
+ PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
Res = Builder.CreateInsertElement(Res, Op1,
(uint64_t)(isLittleEndian ? 0 : 1));
return Builder.CreateBitCast(Res, ConvertType(E->getType()));
@@ -16235,7 +16566,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
auto Pair = EmitAtomicCompareExchange(
LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
- // Unlike c11's atomic_compare_exchange, accroding to
+ // Unlike c11's atomic_compare_exchange, according to
// https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
// > In either case, the contents of the memory location specified by addr
// > are copied into the memory location specified by old_val_addr.
@@ -16353,16 +16684,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
case PPC::BI__builtin_ppc_test_data_class: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
- llvm::Type *ArgType = Op0->getType();
- unsigned IntrinsicID;
- if (ArgType->isDoubleTy())
- IntrinsicID = Intrinsic::ppc_test_data_class_d;
- else if (ArgType->isFloatTy())
- IntrinsicID = Intrinsic::ppc_test_data_class_f;
- else
- llvm_unreachable("Invalid Argument Type");
- return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), {Op0, Op1},
- "test_data_class");
+ return Builder.CreateCall(
+ CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
+ {Op0, Op1}, "test_data_class");
}
case PPC::BI__builtin_ppc_maxfe: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
@@ -16475,8 +16799,10 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
+ LD->setMetadata(llvm::LLVMContext::MD_noundef,
+ llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
- llvm::MDNode::get(CGF.getLLVMContext(), None));
+ llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
return LD;
}
@@ -16493,7 +16819,7 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
auto *LD = CGF.Builder.CreateLoad(
Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4)));
LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
- llvm::MDNode::get(CGF.getLLVMContext(), None));
+ llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
return LD;
}
} // namespace
@@ -16504,39 +16830,35 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
// specific SyncScopeID and writes it to \p SSID.
-bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
+void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID) {
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-
- // Map C11/C++11 memory ordering to LLVM memory ordering
- assert(llvm::isValidAtomicOrderingCABI(ord));
- switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
- case llvm::AtomicOrderingCABI::acquire:
- case llvm::AtomicOrderingCABI::consume:
- AO = llvm::AtomicOrdering::Acquire;
- break;
- case llvm::AtomicOrderingCABI::release:
- AO = llvm::AtomicOrdering::Release;
- break;
- case llvm::AtomicOrderingCABI::acq_rel:
- AO = llvm::AtomicOrdering::AcquireRelease;
- break;
- case llvm::AtomicOrderingCABI::seq_cst:
- AO = llvm::AtomicOrdering::SequentiallyConsistent;
- break;
- case llvm::AtomicOrderingCABI::relaxed:
- AO = llvm::AtomicOrdering::Monotonic;
- break;
- }
+ int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
- StringRef scp;
- llvm::getConstantStringInfo(Scope, scp);
- SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
- return true;
+ // Map C11/C++11 memory ordering to LLVM memory ordering
+ assert(llvm::isValidAtomicOrderingCABI(ord));
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ case llvm::AtomicOrderingCABI::consume:
+ AO = llvm::AtomicOrdering::Acquire;
+ break;
+ case llvm::AtomicOrderingCABI::release:
+ AO = llvm::AtomicOrdering::Release;
+ break;
+ case llvm::AtomicOrderingCABI::acq_rel:
+ AO = llvm::AtomicOrdering::AcquireRelease;
+ break;
+ case llvm::AtomicOrderingCABI::seq_cst:
+ AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ case llvm::AtomicOrderingCABI::relaxed:
+ AO = llvm::AtomicOrdering::Monotonic;
+ break;
}
- return false;
+
+ StringRef scp;
+ llvm::getConstantStringInfo(Scope, scp);
+ SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
}
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
@@ -16593,7 +16915,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Args.push_back(EmitScalarExpr(E->getArg(I)));
assert(Args.size() == 5 || Args.size() == 6);
if (Args.size() == 5)
- Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
+ Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
@@ -16661,6 +16983,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
case AMDGPU::BI__builtin_amdgcn_sbfe:
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
+ case AMDGPU::BI__builtin_amdgcn_ballot_w32:
+ case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
+ return Builder.CreateCall(F, { Src });
+ }
case AMDGPU::BI__builtin_amdgcn_uicmp:
case AMDGPU::BI__builtin_amdgcn_uicmpl:
case AMDGPU::BI__builtin_amdgcn_sicmp:
@@ -16862,6 +17191,21 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
RayInverseDir, TextureDescr});
}
+ case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
+ SmallVector<Value *, 4> Args;
+ for (int i = 0, e = E->getNumArgs(); i != e; ++i)
+ Args.push_back(EmitScalarExpr(E->getArg(i)));
+
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
+ Value *Call = Builder.CreateCall(F, Args);
+ Value *Rtn = Builder.CreateExtractValue(Call, 0);
+ Value *A = Builder.CreateExtractValue(Call, 1);
+ llvm::Type *RetTy = ConvertType(E->getType());
+ Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
+ (uint64_t)0);
+ return Builder.CreateInsertElement(I0, A, 1);
+ }
+
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
@@ -16966,12 +17310,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
-
case AMDGPU::BI__builtin_amdgcn_fence: {
- if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(1)), AO, SSID))
- return Builder.CreateFence(AO, SSID);
- LLVM_FALLTHROUGH;
+ ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1)), AO, SSID);
+ return Builder.CreateFence(AO, SSID);
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
@@ -16997,22 +17339,29 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
- if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(3)), AO, SSID)) {
+ ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)), AO, SSID);
- // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
- // scope as unsigned values
- Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
- Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
+ // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
+ // scope as unsigned values
+ Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
+ Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
- QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
- bool Volatile =
- PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
- Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
+ QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
+ bool Volatile =
+ PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
+ Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
- return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
- }
- LLVM_FALLTHROUGH;
+ return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
+ }
+ case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
+ case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResultType = ConvertType(E->getType());
+ // s_sendmsg_rtn is mangled using return type only.
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
+ return Builder.CreateCall(F, {Arg});
}
default:
return nullptr;
@@ -18035,7 +18384,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
Address Dst = EmitPointerWithAlignment(E->getArg(0));
Value *Src = EmitScalarExpr(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
- Optional<llvm::APSInt> isColMajorArg =
+ std::optional<llvm::APSInt> isColMajorArg =
E->getArg(3)->getIntegerConstantExpr(getContext());
if (!isColMajorArg)
return nullptr;
@@ -18082,7 +18431,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
Value *Dst = EmitScalarExpr(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
- Optional<llvm::APSInt> isColMajorArg =
+ std::optional<llvm::APSInt> isColMajorArg =
E->getArg(3)->getIntegerConstantExpr(getContext());
if (!isColMajorArg)
return nullptr;
@@ -18141,7 +18490,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
Address SrcA = EmitPointerWithAlignment(E->getArg(1));
Address SrcB = EmitPointerWithAlignment(E->getArg(2));
Address SrcC = EmitPointerWithAlignment(E->getArg(3));
- Optional<llvm::APSInt> LayoutArg =
+ std::optional<llvm::APSInt> LayoutArg =
E->getArg(4)->getIntegerConstantExpr(getContext());
if (!LayoutArg)
return nullptr;
@@ -18152,7 +18501,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
SatfArg = 0; // .b1 does not have satf argument.
- else if (Optional<llvm::APSInt> OptSatfArg =
+ else if (std::optional<llvm::APSInt> OptSatfArg =
E->getArg(5)->getIntegerConstantExpr(getContext()))
SatfArg = *OptSatfArg;
else
@@ -18595,7 +18944,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
break;
default:
- llvm_unreachable("unexptected builtin ID");
+ llvm_unreachable("unexpected builtin ID");
}
Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
@@ -18715,7 +19064,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
while (OpIdx < 18) {
- Optional<llvm::APSInt> LaneConst =
+ std::optional<llvm::APSInt> LaneConst =
E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
assert(LaneConst && "Constant arg isn't actually constant?");
Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
@@ -18723,22 +19072,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
return Builder.CreateCall(Callee, Ops);
}
- case WebAssembly::BI__builtin_wasm_fma_f32x4:
- case WebAssembly::BI__builtin_wasm_fms_f32x4:
- case WebAssembly::BI__builtin_wasm_fma_f64x2:
- case WebAssembly::BI__builtin_wasm_fms_f64x2: {
+ case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
+ case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
Value *A = EmitScalarExpr(E->getArg(0));
Value *B = EmitScalarExpr(E->getArg(1));
Value *C = EmitScalarExpr(E->getArg(2));
unsigned IntNo;
switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_fma_f32x4:
- case WebAssembly::BI__builtin_wasm_fma_f64x2:
- IntNo = Intrinsic::wasm_fma;
+ case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
+ IntNo = Intrinsic::wasm_relaxed_madd;
break;
- case WebAssembly::BI__builtin_wasm_fms_f32x4:
- case WebAssembly::BI__builtin_wasm_fms_f64x2:
- IntNo = Intrinsic::wasm_fms;
+ case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
+ IntNo = Intrinsic::wasm_relaxed_nmadd;
break;
default:
llvm_unreachable("unexpected builtin ID");
@@ -18746,15 +19095,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
return Builder.CreateCall(Callee, {A, B, C});
}
- case WebAssembly::BI__builtin_wasm_laneselect_i8x16:
- case WebAssembly::BI__builtin_wasm_laneselect_i16x8:
- case WebAssembly::BI__builtin_wasm_laneselect_i32x4:
- case WebAssembly::BI__builtin_wasm_laneselect_i64x2: {
+ case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
+ case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
+ case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
Value *A = EmitScalarExpr(E->getArg(0));
Value *B = EmitScalarExpr(E->getArg(1));
Value *C = EmitScalarExpr(E->getArg(2));
Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType());
+ CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
return Builder.CreateCall(Callee, {A, B, C});
}
case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
@@ -18816,18 +19165,27 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
return Builder.CreateCall(Callee, {LHS, RHS});
}
- case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_s_i16x8: {
+ case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_signed);
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
return Builder.CreateCall(Callee, {LHS, RHS});
}
- case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_add_s_i32x4: {
+ case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Acc = EmitScalarExpr(E->getArg(2));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
+ return Builder.CreateCall(Callee, {LHS, RHS, Acc});
+ }
+ case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
Value *Acc = EmitScalarExpr(E->getArg(2));
Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_add_signed);
+ CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
return Builder.CreateCall(Callee, {LHS, RHS, Acc});
}
default:
@@ -18842,7 +19200,7 @@ getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
Intrinsic::ID IntrinsicID;
unsigned VecLen;
};
- Info Infos[] = {
+ static Info Infos[] = {
#define CUSTOM_BUILTIN_MAPPING(x,s) \
{ Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
@@ -18884,8 +19242,7 @@ getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
(void)SortOnce;
- const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
- Info{BuiltinID, 0, 0}, CmpInfo);
+ const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
return {Intrinsic::not_intrinsic, 0};
@@ -19004,6 +19361,25 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
PredAddr.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
+ // These are identical to the builtins above, except they don't consume
+ // input carry, only generate carry-out. Since they still produce two
+ // outputs, generate the store of the predicate, but no load.
+ case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
+ case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
+ case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
+ case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
+ // Get the type from the 0-th argument.
+ llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
+ Address PredAddr = Builder.CreateElementBitCast(
+ EmitPointerWithAlignment(E->getArg(2)), VecType);
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
+ {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
+
+ llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
+ Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
+ PredAddr.getAlignment());
+ return Builder.CreateExtractValue(Result, 0);
+ }
case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
@@ -19104,7 +19480,12 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID = Intrinsic::not_intrinsic;
unsigned NF = 1;
- constexpr unsigned TAIL_UNDISTURBED = 0;
+ // The 0th bit simulates the `vta` of RVV
+ // The 1st bit simulates the `vma` of RVV
+ constexpr unsigned RVV_VTA = 0x1;
+ constexpr unsigned RVV_VMA = 0x2;
+ int PolicyAttrs = 0;
+ bool IsMasked = false;
// Required for overloaded intrinsics.
llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
@@ -19119,38 +19500,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_clmul:
case RISCV::BI__builtin_riscv_clmulh:
case RISCV::BI__builtin_riscv_clmulr:
- case RISCV::BI__builtin_riscv_bcompress_32:
- case RISCV::BI__builtin_riscv_bcompress_64:
- case RISCV::BI__builtin_riscv_bdecompress_32:
- case RISCV::BI__builtin_riscv_bdecompress_64:
- case RISCV::BI__builtin_riscv_bfp_32:
- case RISCV::BI__builtin_riscv_bfp_64:
- case RISCV::BI__builtin_riscv_grev_32:
- case RISCV::BI__builtin_riscv_grev_64:
- case RISCV::BI__builtin_riscv_gorc_32:
- case RISCV::BI__builtin_riscv_gorc_64:
- case RISCV::BI__builtin_riscv_shfl_32:
- case RISCV::BI__builtin_riscv_shfl_64:
- case RISCV::BI__builtin_riscv_unshfl_32:
- case RISCV::BI__builtin_riscv_unshfl_64:
case RISCV::BI__builtin_riscv_xperm4:
case RISCV::BI__builtin_riscv_xperm8:
- case RISCV::BI__builtin_riscv_xperm_n:
- case RISCV::BI__builtin_riscv_xperm_b:
- case RISCV::BI__builtin_riscv_xperm_h:
- case RISCV::BI__builtin_riscv_xperm_w:
- case RISCV::BI__builtin_riscv_crc32_b:
- case RISCV::BI__builtin_riscv_crc32_h:
- case RISCV::BI__builtin_riscv_crc32_w:
- case RISCV::BI__builtin_riscv_crc32_d:
- case RISCV::BI__builtin_riscv_crc32c_b:
- case RISCV::BI__builtin_riscv_crc32c_h:
- case RISCV::BI__builtin_riscv_crc32c_w:
- case RISCV::BI__builtin_riscv_crc32c_d:
- case RISCV::BI__builtin_riscv_fsl_32:
- case RISCV::BI__builtin_riscv_fsr_32:
- case RISCV::BI__builtin_riscv_fsl_64:
- case RISCV::BI__builtin_riscv_fsr_64:
case RISCV::BI__builtin_riscv_brev8:
case RISCV::BI__builtin_riscv_zip_32:
case RISCV::BI__builtin_riscv_unzip_32: {
@@ -19183,88 +19534,6 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
ID = Intrinsic::riscv_clmulr;
break;
- // Zbe
- case RISCV::BI__builtin_riscv_bcompress_32:
- case RISCV::BI__builtin_riscv_bcompress_64:
- ID = Intrinsic::riscv_bcompress;
- break;
- case RISCV::BI__builtin_riscv_bdecompress_32:
- case RISCV::BI__builtin_riscv_bdecompress_64:
- ID = Intrinsic::riscv_bdecompress;
- break;
-
- // Zbf
- case RISCV::BI__builtin_riscv_bfp_32:
- case RISCV::BI__builtin_riscv_bfp_64:
- ID = Intrinsic::riscv_bfp;
- break;
-
- // Zbp
- case RISCV::BI__builtin_riscv_grev_32:
- case RISCV::BI__builtin_riscv_grev_64:
- ID = Intrinsic::riscv_grev;
- break;
- case RISCV::BI__builtin_riscv_gorc_32:
- case RISCV::BI__builtin_riscv_gorc_64:
- ID = Intrinsic::riscv_gorc;
- break;
- case RISCV::BI__builtin_riscv_shfl_32:
- case RISCV::BI__builtin_riscv_shfl_64:
- ID = Intrinsic::riscv_shfl;
- break;
- case RISCV::BI__builtin_riscv_unshfl_32:
- case RISCV::BI__builtin_riscv_unshfl_64:
- ID = Intrinsic::riscv_unshfl;
- break;
- case RISCV::BI__builtin_riscv_xperm_n:
- ID = Intrinsic::riscv_xperm_n;
- break;
- case RISCV::BI__builtin_riscv_xperm_b:
- ID = Intrinsic::riscv_xperm_b;
- break;
- case RISCV::BI__builtin_riscv_xperm_h:
- ID = Intrinsic::riscv_xperm_h;
- break;
- case RISCV::BI__builtin_riscv_xperm_w:
- ID = Intrinsic::riscv_xperm_w;
- break;
-
- // Zbr
- case RISCV::BI__builtin_riscv_crc32_b:
- ID = Intrinsic::riscv_crc32_b;
- break;
- case RISCV::BI__builtin_riscv_crc32_h:
- ID = Intrinsic::riscv_crc32_h;
- break;
- case RISCV::BI__builtin_riscv_crc32_w:
- ID = Intrinsic::riscv_crc32_w;
- break;
- case RISCV::BI__builtin_riscv_crc32_d:
- ID = Intrinsic::riscv_crc32_d;
- break;
- case RISCV::BI__builtin_riscv_crc32c_b:
- ID = Intrinsic::riscv_crc32c_b;
- break;
- case RISCV::BI__builtin_riscv_crc32c_h:
- ID = Intrinsic::riscv_crc32c_h;
- break;
- case RISCV::BI__builtin_riscv_crc32c_w:
- ID = Intrinsic::riscv_crc32c_w;
- break;
- case RISCV::BI__builtin_riscv_crc32c_d:
- ID = Intrinsic::riscv_crc32c_d;
- break;
-
- // Zbt
- case RISCV::BI__builtin_riscv_fsl_32:
- case RISCV::BI__builtin_riscv_fsl_64:
- ID = Intrinsic::riscv_fsl;
- break;
- case RISCV::BI__builtin_riscv_fsr_32:
- case RISCV::BI__builtin_riscv_fsr_64:
- ID = Intrinsic::riscv_fsr;
- break;
-
// Zbkx
case RISCV::BI__builtin_riscv_xperm8:
ID = Intrinsic::riscv_xperm8;
@@ -19407,3 +19676,129 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
return Builder.CreateCall(F, Ops, "");
}
+
+Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ SmallVector<Value *, 4> Ops;
+
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+
+ switch (BuiltinID) {
+ default:
+ llvm_unreachable("unexpected builtin ID.");
+ case LoongArch::BI__builtin_loongarch_cacop_d:
+ ID = Intrinsic::loongarch_cacop_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_cacop_w:
+ ID = Intrinsic::loongarch_cacop_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_dbar:
+ ID = Intrinsic::loongarch_dbar;
+ break;
+ case LoongArch::BI__builtin_loongarch_break:
+ ID = Intrinsic::loongarch_break;
+ break;
+ case LoongArch::BI__builtin_loongarch_ibar:
+ ID = Intrinsic::loongarch_ibar;
+ break;
+ case LoongArch::BI__builtin_loongarch_movfcsr2gr:
+ ID = Intrinsic::loongarch_movfcsr2gr;
+ break;
+ case LoongArch::BI__builtin_loongarch_movgr2fcsr:
+ ID = Intrinsic::loongarch_movgr2fcsr;
+ break;
+ case LoongArch::BI__builtin_loongarch_syscall:
+ ID = Intrinsic::loongarch_syscall;
+ break;
+ case LoongArch::BI__builtin_loongarch_crc_w_b_w:
+ ID = Intrinsic::loongarch_crc_w_b_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crc_w_h_w:
+ ID = Intrinsic::loongarch_crc_w_h_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crc_w_w_w:
+ ID = Intrinsic::loongarch_crc_w_w_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crc_w_d_w:
+ ID = Intrinsic::loongarch_crc_w_d_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crcc_w_b_w:
+ ID = Intrinsic::loongarch_crcc_w_b_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crcc_w_h_w:
+ ID = Intrinsic::loongarch_crcc_w_h_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crcc_w_w_w:
+ ID = Intrinsic::loongarch_crcc_w_w_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_crcc_w_d_w:
+ ID = Intrinsic::loongarch_crcc_w_d_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_csrrd_w:
+ ID = Intrinsic::loongarch_csrrd_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_csrwr_w:
+ ID = Intrinsic::loongarch_csrwr_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_csrxchg_w:
+ ID = Intrinsic::loongarch_csrxchg_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_csrrd_d:
+ ID = Intrinsic::loongarch_csrrd_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_csrwr_d:
+ ID = Intrinsic::loongarch_csrwr_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_csrxchg_d:
+ ID = Intrinsic::loongarch_csrxchg_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrrd_b:
+ ID = Intrinsic::loongarch_iocsrrd_b;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrrd_h:
+ ID = Intrinsic::loongarch_iocsrrd_h;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrrd_w:
+ ID = Intrinsic::loongarch_iocsrrd_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrrd_d:
+ ID = Intrinsic::loongarch_iocsrrd_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrwr_b:
+ ID = Intrinsic::loongarch_iocsrwr_b;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrwr_h:
+ ID = Intrinsic::loongarch_iocsrwr_h;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrwr_w:
+ ID = Intrinsic::loongarch_iocsrwr_w;
+ break;
+ case LoongArch::BI__builtin_loongarch_iocsrwr_d:
+ ID = Intrinsic::loongarch_iocsrwr_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_cpucfg:
+ ID = Intrinsic::loongarch_cpucfg;
+ break;
+ case LoongArch::BI__builtin_loongarch_asrtle_d:
+ ID = Intrinsic::loongarch_asrtle_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_asrtgt_d:
+ ID = Intrinsic::loongarch_asrtgt_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_lddir_d:
+ ID = Intrinsic::loongarch_lddir_d;
+ break;
+ case LoongArch::BI__builtin_loongarch_ldpte_d:
+ ID = Intrinsic::loongarch_ldpte_d;
+ break;
+ // TODO: Support more Intrinsics.
+ }
+
+ assert(ID != Intrinsic::not_intrinsic);
+
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Ops);
+}
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index a8bb0dd65d1a..bb887df3e4e0 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -49,10 +49,10 @@ private:
const Decl *D;
};
llvm::SmallVector<KernelInfo, 16> EmittedKernels;
- // Map a device stub function to a symbol for identifying kernel in host code.
+ // Map a kernel mangled name to a symbol for identifying kernel in host code
// For CUDA, the symbol for identifying the kernel is the same as the device
// stub function. For HIP, they are different.
- llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
+ llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles;
// Map a kernel handle to the kernel stub.
llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
struct VarInfo {
@@ -69,6 +69,8 @@ private:
bool RelocatableDeviceCode;
/// Mangle context for device.
std::unique_ptr<MangleContext> DeviceMC;
+ /// Some zeros used for GEPs.
+ llvm::Constant *Zeros[2];
llvm::FunctionCallee getSetupArgumentFn() const;
llvm::FunctionCallee getLaunchFn() const;
@@ -86,14 +88,25 @@ private:
/// the start of the string. The result of this function can be used anywhere
/// where the C code specifies const char*.
llvm::Constant *makeConstantString(const std::string &Str,
- const std::string &Name = "",
- const std::string &SectionName = "",
- unsigned Alignment = 0) {
- llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
- llvm::ConstantInt::get(SizeTy, 0)};
+ const std::string &Name = "") {
auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
- llvm::GlobalVariable *GV =
- cast<llvm::GlobalVariable>(ConstStr.getPointer());
+ return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
+ ConstStr.getPointer(), Zeros);
+ }
+
+ /// Helper function which generates an initialized constant array from Str,
+ /// and optionally sets section name and alignment. AddNull specifies whether
+ /// the array should nave NUL termination.
+ llvm::Constant *makeConstantArray(StringRef Str,
+ StringRef Name = "",
+ StringRef SectionName = "",
+ unsigned Alignment = 0,
+ bool AddNull = false) {
+ llvm::Constant *Value =
+ llvm::ConstantDataArray::getString(Context, Str, AddNull);
+ auto *GV = new llvm::GlobalVariable(
+ TheModule, Value->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage, Value, Name);
if (!SectionName.empty()) {
GV->setSection(SectionName);
// Mark the address as used which make sure that this section isn't
@@ -102,9 +115,7 @@ private:
}
if (Alignment)
GV->setAlignment(llvm::Align(Alignment));
-
- return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
- ConstStr.getPointer(), Zeros);
+ return llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
}
/// Helper function that generates an empty dummy function returning void.
@@ -220,6 +231,8 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
IntTy = CGM.IntTy;
SizeTy = CGM.SizeTy;
VoidTy = CGM.VoidTy;
+ Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
+ Zeros[1] = Zeros[0];
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
@@ -297,7 +310,8 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
- if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
+ if (auto *GV =
+ dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn->getName()])) {
GV->setLinkage(CGF.CurFn->getLinkage());
GV->setInitializer(CGF.CurFn);
}
@@ -387,8 +401,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
ShmemSize.getPointer(), Stream.getPointer()});
// Emit the call to cudaLaunch
- llvm::Value *Kernel =
- CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
+ llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
+ KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
CallArgList LaunchKernelArgs;
LaunchKernelArgs.add(RValue::get(Kernel),
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -443,8 +457,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
// Emit the call to cudaLaunch
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
- llvm::Value *Arg =
- CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
+ llvm::Value *Arg = CGF.Builder.CreatePointerCast(
+ KernelHandles[CGF.CurFn->getName()], CharPtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
@@ -538,7 +552,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
+ Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy),
KernelName,
KernelName,
llvm::ConstantInt::get(IntTy, -1),
@@ -744,9 +758,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// If fatbin is available from early finalization, create a string
// literal containing the fat binary loaded from the given file.
const unsigned HIPCodeObjectAlign = 4096;
- FatBinStr =
- makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
- FatbinConstantName, HIPCodeObjectAlign);
+ FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "",
+ FatbinConstantName, HIPCodeObjectAlign);
} else {
// If fatbin is not available, create an external symbol
// __hip_fatbin in section .hip_fatbin. The external symbol is supposed
@@ -780,8 +793,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// For CUDA, create a string literal containing the fat binary loaded from
// the given file.
- FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
- FatbinConstantName, 8);
+ FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "",
+ FatbinConstantName, 8);
FatMagic = CudaFatMagic;
}
@@ -888,8 +901,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
- llvm::Constant *ModuleIDConstant = makeConstantString(
- std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
+ llvm::Constant *ModuleIDConstant = makeConstantArray(
+ std::string(ModuleID.str()), "", ModuleIDSectionName, 32, /*AddNull=*/true);
// Create an alias for the FatbinWrapper that nvcc will look for.
llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
@@ -1118,7 +1131,7 @@ void CGNVCUDARuntime::createOffloadingEntries() {
StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries"
: "cuda_offloading_entries";
for (KernelInfo &I : EmittedKernels)
- OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel],
+ OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()],
getDeviceSideName(cast<NamedDecl>(I.D)), 0,
DeviceVarFlags::OffloadGlobalEntry, Section);
@@ -1181,12 +1194,12 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
GlobalDecl GD) {
- auto Loc = KernelHandles.find(F);
+ auto Loc = KernelHandles.find(F->getName());
if (Loc != KernelHandles.end())
return Loc->second;
if (!CGM.getLangOpts().HIP) {
- KernelHandles[F] = F;
+ KernelHandles[F->getName()] = F;
KernelStubs[F] = F;
return F;
}
@@ -1200,7 +1213,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
Var->setDSOLocal(F->isDSOLocal());
Var->setVisibility(F->getVisibility());
CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
- KernelHandles[F] = Var;
+ KernelHandles[F->getName()] = Var;
KernelStubs[Var] = F;
return Var;
}
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index 73c7ca7bc15f..9a9c6d26cc63 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -55,7 +55,7 @@ public:
/// The kind flag for an offloading entry.
enum OffloadEntryKindFlag : uint32_t {
/// Mark the entry as a global entry. This indicates the presense of a
- /// kernel if the size size field is zero and a variable otherwise.
+ /// kernel if the size field is zero and a variable otherwise.
OffloadGlobalEntry = 0x0,
/// Mark the entry as a managed global variable.
OffloadGlobalManagedEntry = 0x1,
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index 0768e6581acb..a600768b2074 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -105,6 +105,10 @@ protected:
/// final class will have been taken care of by the caller.
virtual bool isThisCompleteObject(GlobalDecl GD) const = 0;
+ virtual bool constructorsAndDestructorsReturnThis() const {
+ return CGM.getCodeGenOpts().CtorDtorReturnThis;
+ }
+
public:
virtual ~CGCXXABI();
@@ -120,7 +124,13 @@ public:
///
/// There currently is no way to indicate if a destructor returns 'this'
/// when called virtually, and code generation does not support the case.
- virtual bool HasThisReturn(GlobalDecl GD) const { return false; }
+ virtual bool HasThisReturn(GlobalDecl GD) const {
+ if (isa<CXXConstructorDecl>(GD.getDecl()) ||
+ (isa<CXXDestructorDecl>(GD.getDecl()) &&
+ GD.getDtorType() != Dtor_Deleting))
+ return constructorsAndDestructorsReturnThis();
+ return false;
+ }
virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; }
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index dfa78bf59c65..dfa552161d7c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -112,7 +113,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
// variadic type.
return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
/*instanceMethod=*/false,
- /*chainCall=*/false, None,
+ /*chainCall=*/false, std::nullopt,
FTNP->getExtInfo(), {}, RequiredArgs(0));
}
@@ -459,7 +460,8 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) {
return arrangeLLVMFunctionInfo(
noProto->getReturnType(), /*instanceMethod=*/false,
- /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All);
+ /*chainCall=*/false, std::nullopt, noProto->getExtInfo(), {},
+ RequiredArgs::All);
}
return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>());
@@ -484,9 +486,11 @@ const CGFunctionInfo &
CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
QualType receiverType) {
SmallVector<CanQualType, 16> argTys;
- SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos(2);
+ SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos(
+ MD->isDirectMethod() ? 1 : 2);
argTys.push_back(Context.getCanonicalParamType(receiverType));
- argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
+ if (!MD->isDirectMethod())
+ argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
// FIXME: Kill copy?
for (const auto *I : MD->parameters()) {
argTys.push_back(Context.getCanonicalParamType(I->getType()));
@@ -708,7 +712,7 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
return arrangeLLVMFunctionInfo(
getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false,
- None, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+ std::nullopt, FunctionType::ExtInfo(), {}, RequiredArgs::All);
}
const CGFunctionInfo &
@@ -1144,7 +1148,7 @@ static Address CreateTempAllocaForCoercion(CodeGenFunction &CGF, llvm::Type *Ty,
CharUnits MinAlign,
const Twine &Name = "tmp") {
// Don't use an alignment that's worse than what LLVM would prefer.
- auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(Ty);
+ auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(Ty);
CharUnits Align = std::max(MinAlign, CharUnits::fromQuantity(PrefAlign));
return CGF.CreateTempAlloca(Ty, Align, Name + ".coerce");
@@ -1257,7 +1261,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
Src = EnterStructPointerForCoercedAccess(Src, SrcSTy,
- DstSize.getFixedSize(), CGF);
+ DstSize.getFixedValue(), CGF);
SrcTy = Src.getElementType();
}
@@ -1273,7 +1277,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// If load is legal, just bitcast the src pointer.
if (!SrcSize.isScalable() && !DstSize.isScalable() &&
- SrcSize.getFixedSize() >= DstSize.getFixedSize()) {
+ SrcSize.getFixedValue() >= DstSize.getFixedValue()) {
// Generally SrcSize is never greater than DstSize, since this means we are
// losing bits. However, this can happen in cases where the structure has
// additional padding, for example due to a user specified alignment.
@@ -1319,7 +1323,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
CGF.Builder.CreateMemCpy(
Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), Src.getPointer(),
Src.getAlignment().getAsAlign(),
- llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize.getKnownMinSize()));
+ llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize.getKnownMinValue()));
return CGF.Builder.CreateLoad(Tmp);
}
@@ -1362,7 +1366,7 @@ static void CreateCoercedStore(llvm::Value *Src,
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
- SrcSize.getFixedSize(), CGF);
+ SrcSize.getFixedValue(), CGF);
DstTy = Dst.getElementType();
}
@@ -1389,7 +1393,7 @@ static void CreateCoercedStore(llvm::Value *Src,
// If store is legal, just bitcast the src pointer.
if (isa<llvm::ScalableVectorType>(SrcTy) ||
isa<llvm::ScalableVectorType>(DstTy) ||
- SrcSize.getFixedSize() <= DstSize.getFixedSize()) {
+ SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
} else {
@@ -1407,7 +1411,7 @@ static void CreateCoercedStore(llvm::Value *Src,
CGF.Builder.CreateMemCpy(
Dst.getPointer(), Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
Tmp.getAlignment().getAsAlign(),
- llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedSize()));
+ llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
}
}
@@ -1633,7 +1637,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
// sret things on win32 aren't void, they return the sret pointer.
QualType ret = FI.getReturnType();
llvm::Type *ty = ConvertType(ret);
- unsigned addressSpace = Context.getTargetAddressSpace(ret);
+ unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret);
resultType = llvm::PointerType::get(ty, addressSpace);
} else {
resultType = llvm::Type::getVoidTy(getLLVMContext());
@@ -1657,7 +1661,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
if (IRFunctionArgs.hasSRetArg()) {
QualType Ret = FI.getReturnType();
llvm::Type *Ty = ConvertType(Ret);
- unsigned AddressSpace = Context.getTargetAddressSpace(Ret);
+ unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret);
ArgTypes[IRFunctionArgs.getSRetArgNo()] =
llvm::PointerType::get(Ty, AddressSpace);
}
@@ -1723,7 +1727,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
case ABIArgInfo::CoerceAndExpand: {
auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
- for (auto EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) {
+ for (auto *EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) {
*ArgTypesIter++ = EltTy;
}
assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs);
@@ -1781,7 +1785,7 @@ static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs,
}
bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
- QualType ReturnType) {
+ QualType ReturnType) const {
// We can't just discard the return value for a record type with a
// complex destructor or a non-trivially copyable type.
if (const RecordType *RT =
@@ -1792,6 +1796,38 @@ bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
return ReturnType.isTriviallyCopyableType(Context);
}
+static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy,
+ const Decl *TargetDecl) {
+ // As-is msan can not tolerate noundef mismatch between caller and
+ // implementation. Mismatch is possible for e.g. indirect calls from C-caller
+ // into C++. Such mismatches lead to confusing false reports. To avoid
+ // expensive workaround on msan we enforce initialization event in uncommon
+ // cases where it's allowed.
+ if (Module.getLangOpts().Sanitize.has(SanitizerKind::Memory))
+ return true;
+ // C++ explicitly makes returning undefined values UB. C's rule only applies
+ // to used values, so we never mark them noundef for now.
+ if (!Module.getLangOpts().CPlusPlus)
+ return false;
+ if (TargetDecl) {
+ if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl)) {
+ if (FDecl->isExternC())
+ return false;
+ } else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl)) {
+ // Function pointer.
+ if (VDecl->isExternC())
+ return false;
+ }
+ }
+
+ // We don't want to be too aggressive with the return checking, unless
+ // it's explicit in the code opts or we're using an appropriate sanitizer.
+ // Try to respect what the programmer intended.
+ return Module.getCodeGenOpts().StrictReturn ||
+ !Module.MayDropFunctionReturn(Module.getContext(), RetTy) ||
+ Module.getLangOpts().Sanitize.has(SanitizerKind::Return);
+}
+
void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
bool HasOptnone,
bool AttrOnCallSite,
@@ -1820,19 +1856,16 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
if (!CodeGenOpts.TrapFuncName.empty())
FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
} else {
- StringRef FpKind;
switch (CodeGenOpts.getFramePointer()) {
case CodeGenOptions::FramePointerKind::None:
- FpKind = "none";
+ // This is the default behavior.
break;
case CodeGenOptions::FramePointerKind::NonLeaf:
- FpKind = "non-leaf";
- break;
case CodeGenOptions::FramePointerKind::All:
- FpKind = "all";
- break;
+ FuncAttrs.addAttribute("frame-pointer",
+ CodeGenOptions::getFramePointerKindName(
+ CodeGenOpts.getFramePointer()));
}
- FuncAttrs.addAttribute("frame-pointer", FpKind);
if (CodeGenOpts.LessPreciseFPMAD)
FuncAttrs.addAttribute("less-precise-fpmad", "true");
@@ -1860,7 +1893,12 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
FuncAttrs.addAttribute("no-nans-fp-math", "true");
if (LangOpts.ApproxFunc)
FuncAttrs.addAttribute("approx-func-fp-math", "true");
- if (LangOpts.UnsafeFPMath)
+ if (LangOpts.AllowFPReassoc && LangOpts.AllowRecip &&
+ LangOpts.NoSignedZero && LangOpts.ApproxFunc &&
+ (LangOpts.getDefaultFPContractMode() ==
+ LangOptions::FPModeKind::FPM_Fast ||
+ LangOpts.getDefaultFPContractMode() ==
+ LangOptions::FPModeKind::FPM_FastHonorPragmas))
FuncAttrs.addAttribute("unsafe-fp-math", "true");
if (CodeGenOpts.SoftFloat)
FuncAttrs.addAttribute("use-soft-float", "true");
@@ -1931,11 +1969,11 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
}
- // TODO: NoUnwind attribute should be added for other GPU modes OpenCL, HIP,
+ // TODO: NoUnwind attribute should be added for other GPU modes HIP,
// SYCL, OpenMP offload. AFAIK, none of them support exceptions in device
// code.
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
- // Exceptions aren't supported in CUDA device code.
+ if ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice) ||
+ getLangOpts().OpenCL) {
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
@@ -2046,6 +2084,27 @@ static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types,
return false;
}
+/// Check if the argument of a function has maybe_undef attribute.
+static bool IsArgumentMaybeUndef(const Decl *TargetDecl,
+ unsigned NumRequiredArgs, unsigned ArgNo) {
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
+ if (!FD)
+ return false;
+
+ // Assume variadic arguments do not have maybe_undef attribute.
+ if (ArgNo >= NumRequiredArgs)
+ return false;
+
+ // Check if argument has maybe_undef attribute.
+ if (ArgNo < FD->getNumParams()) {
+ const ParmVarDecl *Param = FD->getParamDecl(ArgNo);
+ if (Param && Param->hasAttr<MaybeUndefAttr>())
+ return true;
+ }
+
+ return false;
+}
+
/// Construct the IR attribute list of a function or call.
///
/// When adding an attribute, please consider where it should be handled:
@@ -2094,6 +2153,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// The NoBuiltinAttr attached to the target FunctionDecl.
const NoBuiltinAttr *NBA = nullptr;
+ // Some ABIs may result in additional accesses to arguments that may
+ // otherwise not be present.
+ auto AddPotentialArgAccess = [&]() {
+ llvm::Attribute A = FuncAttrs.getAttribute(llvm::Attribute::Memory);
+ if (A.isValid())
+ FuncAttrs.addMemoryAttr(A.getMemoryEffects() |
+ llvm::MemoryEffects::argMemOnly());
+ };
+
// Collect function IR attributes based on declaration-specific
// information.
// FIXME: handle sseregparm someday...
@@ -2140,18 +2208,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// 'const', 'pure' and 'noalias' attributed functions are also nounwind.
if (TargetDecl->hasAttr<ConstAttr>()) {
- FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
+ FuncAttrs.addMemoryAttr(llvm::MemoryEffects::none());
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
// gcc specifies that 'const' functions have greater restrictions than
// 'pure' functions, so they also cannot have infinite loops.
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
} else if (TargetDecl->hasAttr<PureAttr>()) {
- FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
+ FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly());
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
// gcc specifies that 'pure' functions cannot have infinite loops.
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
} else if (TargetDecl->hasAttr<NoAliasAttr>()) {
- FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly);
+ FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly());
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
if (TargetDecl->hasAttr<RestrictAttr>())
@@ -2168,7 +2236,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
- Optional<unsigned> NumElemsParam;
+ std::optional<unsigned> NumElemsParam;
if (AllocSize->getNumElemsParam().isValid())
NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();
FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),
@@ -2237,9 +2305,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// Add "sample-profile-suffix-elision-policy" attribute for internal linkage
// functions with -funique-internal-linkage-names.
if (TargetDecl && CodeGenOpts.UniqueInternalLinkageNames) {
- if (isa<FunctionDecl>(TargetDecl)) {
- if (this->getFunctionLinkage(CalleeInfo.getCalleeDecl()) ==
- llvm::GlobalValue::InternalLinkage)
+ if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+ if (!FD->isExternallyVisible())
FuncAttrs.addAttribute("sample-profile-suffix-elision-policy",
"selected");
}
@@ -2287,27 +2354,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
const ABIArgInfo &RetAI = FI.getReturnInfo();
const llvm::DataLayout &DL = getDataLayout();
- // C++ explicitly makes returning undefined values UB. C's rule only applies
- // to used values, so we never mark them noundef for now.
- bool HasStrictReturn = getLangOpts().CPlusPlus;
- if (TargetDecl && HasStrictReturn) {
- if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl))
- HasStrictReturn &= !FDecl->isExternC();
- else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl))
- // Function pointer
- HasStrictReturn &= !VDecl->isExternC();
- }
-
- // We don't want to be too aggressive with the return checking, unless
- // it's explicit in the code opts or we're using an appropriate sanitizer.
- // Try to respect what the programmer intended.
- HasStrictReturn &= getCodeGenOpts().StrictReturn ||
- !MayDropFunctionReturn(getContext(), RetTy) ||
- getLangOpts().Sanitize.has(SanitizerKind::Memory) ||
- getLangOpts().Sanitize.has(SanitizerKind::Return);
-
// Determine if the return type could be partially undef
- if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) {
+ if (CodeGenOpts.EnableNoundefAttrs &&
+ HasStrictReturn(*this, RetTy, TargetDecl)) {
if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
RetAttrs.addAttribute(llvm::Attribute::NoUndef);
@@ -2319,7 +2368,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
RetAttrs.addAttribute(llvm::Attribute::SExt);
else
RetAttrs.addAttribute(llvm::Attribute::ZExt);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ABIArgInfo::Direct:
if (RetAI.getInReg())
RetAttrs.addAttribute(llvm::Attribute::InReg);
@@ -2330,8 +2379,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
case ABIArgInfo::InAlloca:
case ABIArgInfo::Indirect: {
// inalloca and sret disable readnone and readonly
- FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
- .removeAttribute(llvm::Attribute::ReadNone);
+ AddPotentialArgAccess();
break;
}
@@ -2350,7 +2398,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
RetAttrs.addDereferenceableAttr(
getMinimumObjectSize(PTy).getQuantity());
- if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ if (getTypes().getTargetAddressSpace(PTy) == 0 &&
!CodeGenOpts.NullPointerIsValid)
RetAttrs.addAttribute(llvm::Attribute::NonNull);
if (PTy->isObjectType()) {
@@ -2399,7 +2447,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
FI.arg_begin()->type.castAs<PointerType>()->getPointeeType();
if (!CodeGenOpts.NullPointerIsValid &&
- getContext().getTargetAddressSpace(FI.arg_begin()->type) == 0) {
+ getTypes().getTargetAddressSpace(FI.arg_begin()->type) == 0) {
Attrs.addAttribute(llvm::Attribute::NonNull);
Attrs.addDereferenceableAttr(getMinimumObjectSize(ThisTy).getQuantity());
} else {
@@ -2455,7 +2503,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
Attrs.addAttribute(llvm::Attribute::SExt);
else
Attrs.addAttribute(llvm::Attribute::ZExt);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ABIArgInfo::Direct:
if (ArgNo == 0 && FI.isChainCall())
Attrs.addAttribute(llvm::Attribute::Nest);
@@ -2501,9 +2549,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
Attrs.addAlignmentAttr(Align.getQuantity());
// byval disables readnone and readonly.
- FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
- .removeAttribute(llvm::Attribute::ReadNone);
-
+ AddPotentialArgAccess();
break;
}
case ABIArgInfo::IndirectAliased: {
@@ -2519,8 +2565,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
case ABIArgInfo::InAlloca:
// inalloca disables readnone and readonly.
- FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
- .removeAttribute(llvm::Attribute::ReadNone);
+ AddPotentialArgAccess();
continue;
}
@@ -2529,7 +2574,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
Attrs.addDereferenceableAttr(
getMinimumObjectSize(PTy).getQuantity());
- if (getContext().getTargetAddressSpace(PTy) == 0 &&
+ if (getTypes().getTargetAddressSpace(PTy) == 0 &&
!CodeGenOpts.NullPointerIsValid)
Attrs.addAttribute(llvm::Attribute::NonNull);
if (PTy->isObjectType()) {
@@ -2851,7 +2896,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::Align Alignment =
CGM.getNaturalTypeAlignment(ETy).getAsAlign();
AI->addAttrs(llvm::AttrBuilder(getLLVMContext()).addAlignmentAttr(Alignment));
- if (!getContext().getTargetAddressSpace(ETy) &&
+ if (!getTypes().getTargetAddressSpace(ETy) &&
!CGM.getCodeGenOpts().NullPointerIsValid)
AI->addAttr(llvm::Attribute::NonNull);
}
@@ -2860,7 +2905,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// Set `align` attribute if any.
const auto *AVAttr = PVD->getAttr<AlignValueAttr>();
if (!AVAttr)
- if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
+ if (const auto *TOTy = OTy->getAs<TypedefType>())
AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) {
// If alignment-assumption sanitizer is enabled, we do *not* add
@@ -3509,7 +3554,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
switch (RetAI.getKind()) {
case ABIArgInfo::InAlloca:
- // Aggregrates get evaluated directly into the destination. Sometimes we
+ // Aggregates get evaluated directly into the destination. Sometimes we
// need to return the sret value in a register, though.
assert(hasAggregateEvaluationKind(RetTy));
if (RetAI.getInAllocaSRet()) {
@@ -3537,7 +3582,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
break;
}
case TEK_Aggregate:
- // Do nothing; aggregrates get evaluated directly into the destination.
+ // Do nothing; aggregates get evaluated directly into the destination.
break;
case TEK_Scalar: {
LValueBaseInfo BaseInfo;
@@ -4078,7 +4123,7 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
bool CanCheckNullability = false;
if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) {
- auto Nullability = PVD->getType()->getNullability(getContext());
+ auto Nullability = PVD->getType()->getNullability();
CanCheckNullability = Nullability &&
*Nullability == NullabilityKind::NonNull &&
PVD->getTypeSourceInfo();
@@ -4106,7 +4151,7 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc),
llvm::ConstantInt::get(Int32Ty, ArgNo + 1),
};
- EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None);
+ EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, std::nullopt);
}
// Check if the call is going to use the inalloca convention. This needs to
@@ -4426,7 +4471,7 @@ QualType CodeGenFunction::getVarArgType(const Expr *Arg) {
if (Arg->getType()->isIntegerType() &&
getContext().getTypeSize(Arg->getType()) <
- getContext().getTargetInfo().getPointerWidth(0) &&
+ getContext().getTargetInfo().getPointerWidth(LangAS::Default) &&
Arg->isNullPointerConstant(getContext(),
Expr::NPC_ValueDependentIsNotNull)) {
return getContext().getIntPtrType();
@@ -4449,7 +4494,7 @@ CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) {
llvm::CallInst *
CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
const llvm::Twine &name) {
- return EmitNounwindRuntimeCall(callee, None, name);
+ return EmitNounwindRuntimeCall(callee, std::nullopt, name);
}
/// Emits a call to the given nounwind runtime function.
@@ -4466,7 +4511,7 @@ CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
/// runtime function.
llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
const llvm::Twine &name) {
- return EmitRuntimeCall(callee, None, name);
+ return EmitRuntimeCall(callee, std::nullopt, name);
}
// Calls which may throw must have operand bundles indicating which funclet
@@ -4530,7 +4575,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(
llvm::CallBase *
CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee,
const Twine &name) {
- return EmitRuntimeCallOrInvoke(callee, None, name);
+ return EmitRuntimeCallOrInvoke(callee, std::nullopt, name);
}
/// Emits a call or invoke instruction to the given runtime function.
@@ -4580,7 +4625,7 @@ namespace {
/// Specify given \p NewAlign as the alignment of return value attribute. If
/// such attribute already exists, re-set it to the maximal one of two options.
-LLVM_NODISCARD llvm::AttributeList
+[[nodiscard]] llvm::AttributeList
maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx,
const llvm::AttributeList &Attrs,
llvm::Align NewAlign) {
@@ -4611,7 +4656,7 @@ protected:
public:
/// If we can, materialize the alignment as an attribute on return value.
- LLVM_NODISCARD llvm::AttributeList
+ [[nodiscard]] llvm::AttributeList
TryEmitAsCallSiteAttribute(const llvm::AttributeList &Attrs) {
if (!AA || OffsetCI || CGF.SanOpts.has(SanitizerKind::Alignment))
return Attrs;
@@ -4680,7 +4725,7 @@ public:
static unsigned getMaxVectorWidth(const llvm::Type *Ty) {
if (auto *VT = dyn_cast<llvm::VectorType>(Ty))
- return VT->getPrimitiveSizeInBits().getKnownMinSize();
+ return VT->getPrimitiveSizeInBits().getKnownMinValue();
if (auto *AT = dyn_cast<llvm::ArrayType>(Ty))
return getMaxVectorWidth(AT->getElementType());
@@ -4821,6 +4866,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
unsigned FirstIRArg, NumIRArgs;
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
+ bool ArgHasMaybeUndefAttr =
+ IsArgumentMaybeUndef(TargetDecl, CallInfo.getNumRequiredArgs(), ArgNo);
+
switch (ArgInfo.getKind()) {
case ABIArgInfo::InAlloca: {
assert(NumIRArgs == 0);
@@ -4879,7 +4927,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Make a temporary alloca to pass the argument.
Address Addr = CreateMemTempWithoutCast(
I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
- IRCallArgs[FirstIRArg] = Addr.getPointer();
+
+ llvm::Value *Val = Addr.getPointer();
+ if (ArgHasMaybeUndefAttr)
+ Val = Builder.CreateFreeze(Addr.getPointer());
+ IRCallArgs[FirstIRArg] = Val;
I->copyInto(*this, Addr);
} else {
@@ -4937,7 +4989,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Create an aligned temporary, and copy to it.
Address AI = CreateMemTempWithoutCast(
I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
- IRCallArgs[FirstIRArg] = AI.getPointer();
+ llvm::Value *Val = AI.getPointer();
+ if (ArgHasMaybeUndefAttr)
+ Val = Builder.CreateFreeze(AI.getPointer());
+ IRCallArgs[FirstIRArg] = Val;
// Emit lifetime markers for the temporary alloca.
llvm::TypeSize ByvalTempElementSize =
@@ -4956,9 +5011,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
auto *T = llvm::PointerType::getWithSamePointeeType(
cast<llvm::PointerType>(V->getType()),
CGM.getDataLayout().getAllocaAddrSpace());
- IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast(
+
+ llvm::Value *Val = getTargetHooks().performAddrSpaceCast(
*this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
true);
+ if (ArgHasMaybeUndefAttr)
+ Val = Builder.CreateFreeze(Val);
+ IRCallArgs[FirstIRArg] = Val;
}
}
break;
@@ -5012,6 +5071,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
V->getType() != IRFuncTy->getParamType(FirstIRArg))
V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
+ if (ArgHasMaybeUndefAttr)
+ V = Builder.CreateFreeze(V);
IRCallArgs[FirstIRArg] = V;
break;
}
@@ -5056,6 +5117,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Address EltPtr = Builder.CreateStructGEP(Src, i);
llvm::Value *LI = Builder.CreateLoad(EltPtr);
+ if (ArgHasMaybeUndefAttr)
+ LI = Builder.CreateFreeze(LI);
IRCallArgs[FirstIRArg + i] = LI;
}
} else {
@@ -5072,6 +5135,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
}
+
+ if (ArgHasMaybeUndefAttr)
+ Load = Builder.CreateFreeze(Load);
IRCallArgs[FirstIRArg] = Load;
}
@@ -5095,15 +5161,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Type *scalarType = RV.getScalarVal()->getType();
auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
- auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
+ auto scalarAlign = CGM.getDataLayout().getPrefTypeAlign(scalarType);
// Materialize to a temporary.
- addr =
- CreateTempAlloca(RV.getScalarVal()->getType(),
- CharUnits::fromQuantity(std::max(
- layout->getAlignment().value(), scalarAlign)),
- "tmp",
- /*ArraySize=*/nullptr, &AllocaAddr);
+ addr = CreateTempAlloca(
+ RV.getScalarVal()->getType(),
+ CharUnits::fromQuantity(std::max(layout->getAlignment(), scalarAlign)),
+ "tmp",
+ /*ArraySize=*/nullptr, &AllocaAddr);
tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());
Builder.CreateStore(RV.getScalarVal(), addr);
@@ -5117,6 +5182,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
Address eltAddr = Builder.CreateStructGEP(addr, i);
llvm::Value *elt = Builder.CreateLoad(eltAddr);
+ if (ArgHasMaybeUndefAttr)
+ elt = Builder.CreateFreeze(elt);
IRCallArgs[IRArgPos++] = elt;
}
assert(IRArgPos == FirstIRArg + NumIRArgs);
@@ -5324,6 +5391,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
SmallVector<llvm::OperandBundleDef, 1> BundleList =
getBundlesForFunclet(CalleePtr);
+ if (SanOpts.has(SanitizerKind::KCFI) &&
+ !isa_and_nonnull<FunctionDecl>(TargetDecl))
+ EmitKCFIOperandBundle(ConcreteCallee, BundleList);
+
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
if (FD->hasAttr<StrictFPAttr>())
// All calls within a strictfp function are marked strictfp
@@ -5506,7 +5577,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Builder.CreateStore(elt, eltAddr);
}
// FALLTHROUGH
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case ABIArgInfo::InAlloca:
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index c6696c4df775..0795ea598411 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -1505,7 +1506,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
}
// Fallthrough: act like we're in the base variant.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Dtor_Base:
assert(Body);
@@ -1649,23 +1650,58 @@ namespace {
}
};
- static void EmitSanitizerDtorCallback(CodeGenFunction &CGF, llvm::Value *Ptr,
- CharUnits::QuantityType PoisonSize) {
+ class DeclAsInlineDebugLocation {
+ CGDebugInfo *DI;
+ llvm::MDNode *InlinedAt;
+ std::optional<ApplyDebugLocation> Location;
+
+ public:
+ DeclAsInlineDebugLocation(CodeGenFunction &CGF, const NamedDecl &Decl)
+ : DI(CGF.getDebugInfo()) {
+ if (!DI)
+ return;
+ InlinedAt = DI->getInlinedAt();
+ DI->setInlinedAt(CGF.Builder.getCurrentDebugLocation());
+ Location.emplace(CGF, Decl.getLocation());
+ }
+
+ ~DeclAsInlineDebugLocation() {
+ if (!DI)
+ return;
+ Location.reset();
+ DI->setInlinedAt(InlinedAt);
+ }
+ };
+
+ static void EmitSanitizerDtorCallback(
+ CodeGenFunction &CGF, StringRef Name, llvm::Value *Ptr,
+ std::optional<CharUnits::QuantityType> PoisonSize = {}) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
// Pass in void pointer and size of region as arguments to runtime
// function
- llvm::Value *Args[] = {CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy),
- llvm::ConstantInt::get(CGF.SizeTy, PoisonSize)};
+ SmallVector<llvm::Value *, 2> Args = {
+ CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy)};
+ SmallVector<llvm::Type *, 2> ArgTypes = {CGF.VoidPtrTy};
- llvm::Type *ArgTypes[] = {CGF.VoidPtrTy, CGF.SizeTy};
+ if (PoisonSize.has_value()) {
+ Args.emplace_back(llvm::ConstantInt::get(CGF.SizeTy, *PoisonSize));
+ ArgTypes.emplace_back(CGF.SizeTy);
+ }
llvm::FunctionType *FnType =
llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false);
- llvm::FunctionCallee Fn =
- CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback");
+ llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FnType, Name);
+
CGF.EmitNounwindRuntimeCall(Fn, Args);
}
+ static void
+ EmitSanitizerDtorFieldsCallback(CodeGenFunction &CGF, llvm::Value *Ptr,
+ CharUnits::QuantityType PoisonSize) {
+ EmitSanitizerDtorCallback(CGF, "__sanitizer_dtor_callback_fields", Ptr,
+ PoisonSize);
+ }
+
/// Poison base class with a trivial destructor.
struct SanitizeDtorTrivialBase final : EHScopeStack::Cleanup {
const CXXRecordDecl *BaseClass;
@@ -1687,7 +1723,11 @@ namespace {
if (!BaseSize.isPositive())
return;
- EmitSanitizerDtorCallback(CGF, Addr.getPointer(), BaseSize.getQuantity());
+ // Use the base class declaration location as inline DebugLocation. All
+ // fields of the class are destroyed.
+ DeclAsInlineDebugLocation InlineHere(CGF, *BaseClass);
+ EmitSanitizerDtorFieldsCallback(CGF, Addr.getPointer(),
+ BaseSize.getQuantity());
// Prevent the current stack frame from disappearing from the stack trace.
CGF.CurFn->addFnAttr("disable-tail-calls", "true");
@@ -1735,7 +1775,10 @@ namespace {
if (!PoisonSize.isPositive())
return;
- EmitSanitizerDtorCallback(CGF, OffsetPtr, PoisonSize.getQuantity());
+ // Use the top field declaration location as inline DebugLocation.
+ DeclAsInlineDebugLocation InlineHere(
+ CGF, **std::next(Dtor->getParent()->field_begin(), StartIndex));
+ EmitSanitizerDtorFieldsCallback(CGF, OffsetPtr, PoisonSize.getQuantity());
// Prevent the current stack frame from disappearing from the stack trace.
CGF.CurFn->addFnAttr("disable-tail-calls", "true");
@@ -1752,15 +1795,13 @@ namespace {
void Emit(CodeGenFunction &CGF, Flags flags) override {
assert(Dtor->getParent()->isDynamicClass());
(void)Dtor;
- ASTContext &Context = CGF.getContext();
// Poison vtable and vtable ptr if they exist for this class.
llvm::Value *VTablePtr = CGF.LoadCXXThis();
- CharUnits::QuantityType PoisonSize =
- Context.toCharUnitsFromBits(CGF.PointerWidthInBits).getQuantity();
// Pass in void pointer and size of region as arguments to runtime
// function
- EmitSanitizerDtorCallback(CGF, VTablePtr, PoisonSize);
+ EmitSanitizerDtorCallback(CGF, "__sanitizer_dtor_callback_vptr",
+ VTablePtr);
}
};
@@ -1768,12 +1809,12 @@ namespace {
ASTContext &Context;
EHScopeStack &EHStack;
const CXXDestructorDecl *DD;
- llvm::Optional<unsigned> StartIndex;
+ std::optional<unsigned> StartIndex;
public:
SanitizeDtorCleanupBuilder(ASTContext &Context, EHScopeStack &EHStack,
const CXXDestructorDecl *DD)
- : Context(Context), EHStack(EHStack), DD(DD), StartIndex(llvm::None) {}
+ : Context(Context), EHStack(EHStack), DD(DD), StartIndex(std::nullopt) {}
void PushCleanupForField(const FieldDecl *Field) {
if (Field->isZeroSize(Context))
return;
@@ -1782,15 +1823,15 @@ namespace {
if (!StartIndex)
StartIndex = FieldIndex;
} else if (StartIndex) {
- EHStack.pushCleanup<SanitizeDtorFieldRange>(
- NormalAndEHCleanup, DD, StartIndex.value(), FieldIndex);
- StartIndex = None;
+ EHStack.pushCleanup<SanitizeDtorFieldRange>(NormalAndEHCleanup, DD,
+ *StartIndex, FieldIndex);
+ StartIndex = std::nullopt;
}
}
void End() {
if (StartIndex)
EHStack.pushCleanup<SanitizeDtorFieldRange>(NormalAndEHCleanup, DD,
- StartIndex.value(), -1);
+ *StartIndex, -1);
}
};
} // end anonymous namespace
@@ -2543,7 +2584,7 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true)
->getPointerTo(ProgAS)
->getPointerTo(GlobalsAS);
- // vtable field is is derived from `this` pointer, therefore they should be in
+ // vtable field is derived from `this` pointer, therefore they should be in
// the same addr space. Note that this might not be LLVM address space 0.
VTableField = Builder.CreateElementBitCast(VTableField, VTablePtrTy);
VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy);
@@ -2955,7 +2996,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
// Add the rest of the parameters.
- for (auto param : BD->parameters())
+ for (auto *param : BD->parameters())
EmitDelegateCallArg(CallArgs, param, param->getBeginLoc());
assert(!Lambda->isGenericLambda() &&
@@ -2969,12 +3010,13 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
// Start building arguments for forwarding call
CallArgList CallArgs;
- QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda));
- llvm::Value *ThisPtr = llvm::UndefValue::get(getTypes().ConvertType(ThisType));
- CallArgs.add(RValue::get(ThisPtr), ThisType);
+ QualType LambdaType = getContext().getRecordType(Lambda);
+ QualType ThisType = getContext().getPointerType(LambdaType);
+ Address ThisPtr = CreateMemTemp(LambdaType, "unused.capture");
+ CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
// Add the rest of the parameters.
- for (auto Param : MD->parameters())
+ for (auto *Param : MD->parameters())
EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc());
const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp
index 5035ed34358d..43758ac27e43 100644
--- a/clang/lib/CodeGen/CGCleanup.cpp
+++ b/clang/lib/CodeGen/CGCleanup.cpp
@@ -556,7 +556,7 @@ static llvm::BasicBlock *SimplifyCleanupEntry(CodeGenFunction &CGF,
Entry->replaceAllUsesWith(Pred);
// Merge the blocks.
- Pred->getInstList().splice(Pred->end(), Entry->getInstList());
+ Pred->splice(Pred->end(), Entry);
// Kill the entry block.
Entry->eraseFromParent();
@@ -942,7 +942,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
// Append the prepared cleanup prologue from above.
llvm::BasicBlock *NormalExit = Builder.GetInsertBlock();
for (unsigned I = 0, E = InstsToAppend.size(); I != E; ++I)
- NormalExit->getInstList().push_back(InstsToAppend[I]);
+ InstsToAppend[I]->insertInto(NormalExit, NormalExit->end());
// Optimistically hope that any fixups will continue falling through.
for (unsigned I = FixupDepth, E = EHStack.getNumBranchFixups();
@@ -1016,8 +1016,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
// throwing cleanups. For funclet EH personalities, the cleanupendpad models
// program termination when cleanups throw.
bool PushedTerminate = false;
- SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
- CurrentFuncletPad);
+ SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
llvm::CleanupPadInst *CPI = nullptr;
const EHPersonality &Personality = EHPersonality::get(*this);
@@ -1336,7 +1335,8 @@ static void EmitSehScope(CodeGenFunction &CGF,
CGF.getBundlesForFunclet(SehCppScope.getCallee());
if (CGF.CurrentFuncletPad)
BundleList.emplace_back("funclet", CGF.CurrentFuncletPad);
- CGF.Builder.CreateInvoke(SehCppScope, Cont, InvokeDest, None, BundleList);
+ CGF.Builder.CreateInvoke(SehCppScope, Cont, InvokeDest, std::nullopt,
+ BundleList);
CGF.EmitBlock(Cont);
}
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 594c7d49df3c..775a4341558a 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -539,7 +539,7 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate());
// Create mapping between parameters and copy-params for coroutine function.
- auto ParamMoves = S.getParamMoves();
+ llvm::ArrayRef<const Stmt *> ParamMoves = S.getParamMoves();
assert(
(ParamMoves.size() == 0 || (ParamMoves.size() == FnArgs.size())) &&
"ParamMoves and FnArgs should be the same size for coroutine function");
@@ -673,9 +673,23 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
}
CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin "
"has been used earlier in this function");
- auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
+ auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
return RValue::get(NullPtr);
}
+ case llvm::Intrinsic::coro_size: {
+ auto &Context = getContext();
+ CanQualType SizeTy = Context.getSizeType();
+ llvm::IntegerType *T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
+ llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::coro_size, T);
+ return RValue::get(Builder.CreateCall(F));
+ }
+ case llvm::Intrinsic::coro_align: {
+ auto &Context = getContext();
+ CanQualType SizeTy = Context.getSizeType();
+ llvm::IntegerType *T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
+ llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::coro_align, T);
+ return RValue::get(Builder.CreateCall(F));
+ }
// The following three intrinsics take a token parameter referring to a token
// returned by earlier call to @llvm.coro.id. Since we cannot represent it in
// builtins, we patch it up here.
@@ -689,7 +703,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_id has"
" been used earlier in this function");
// Fallthrough to the next case to add TokenNone as the first argument.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
// @llvm.coro.suspend takes a token parameter. Add token 'none' as the first
// argument.
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 94c48316add7..3bde43cc1db3 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -26,6 +26,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/VTableBuilder.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
@@ -47,7 +48,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/SHA256.h"
#include "llvm/Support/TimeProfiler.h"
+#include <optional>
using namespace clang;
using namespace clang::CodeGen;
@@ -342,35 +346,44 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
return StringRef();
}
-Optional<llvm::DIFile::ChecksumKind>
-CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
+std::optional<llvm::DIFile::ChecksumKind>
+CGDebugInfo::computeChecksum(FileID FID, SmallString<64> &Checksum) const {
Checksum.clear();
if (!CGM.getCodeGenOpts().EmitCodeView &&
CGM.getCodeGenOpts().DwarfVersion < 5)
- return None;
+ return std::nullopt;
SourceManager &SM = CGM.getContext().getSourceManager();
- Optional<llvm::MemoryBufferRef> MemBuffer = SM.getBufferOrNone(FID);
+ std::optional<llvm::MemoryBufferRef> MemBuffer = SM.getBufferOrNone(FID);
if (!MemBuffer)
- return None;
+ return std::nullopt;
- llvm::toHex(
- llvm::MD5::hash(llvm::arrayRefFromStringRef(MemBuffer->getBuffer())),
- /*LowerCase*/ true, Checksum);
- return llvm::DIFile::CSK_MD5;
+ auto Data = llvm::arrayRefFromStringRef(MemBuffer->getBuffer());
+ switch (CGM.getCodeGenOpts().getDebugSrcHash()) {
+ case clang::CodeGenOptions::DSH_MD5:
+ llvm::toHex(llvm::MD5::hash(Data), /*LowerCase=*/true, Checksum);
+ return llvm::DIFile::CSK_MD5;
+ case clang::CodeGenOptions::DSH_SHA1:
+ llvm::toHex(llvm::SHA1::hash(Data), /*LowerCase=*/true, Checksum);
+ return llvm::DIFile::CSK_SHA1;
+ case clang::CodeGenOptions::DSH_SHA256:
+ llvm::toHex(llvm::SHA256::hash(Data), /*LowerCase=*/true, Checksum);
+ return llvm::DIFile::CSK_SHA256;
+ }
+ llvm_unreachable("Unhandled DebugSrcHashKind enum");
}
-Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
- FileID FID) {
+std::optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
+ FileID FID) {
if (!CGM.getCodeGenOpts().EmbedSource)
- return None;
+ return std::nullopt;
bool SourceInvalid = false;
StringRef Source = SM.getBufferData(FID, &SourceInvalid);
if (SourceInvalid)
- return None;
+ return std::nullopt;
return Source;
}
@@ -405,19 +418,20 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
return cast<llvm::DIFile>(V);
}
- SmallString<32> Checksum;
+ SmallString<64> Checksum;
- Optional<llvm::DIFile::ChecksumKind> CSKind = computeChecksum(FID, Checksum);
- Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
+ std::optional<llvm::DIFile::ChecksumKind> CSKind =
+ computeChecksum(FID, Checksum);
+ std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
}
-llvm::DIFile *
-CGDebugInfo::createFile(StringRef FileName,
- Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
- Optional<StringRef> Source) {
+llvm::DIFile *CGDebugInfo::createFile(
+ StringRef FileName,
+ std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ std::optional<StringRef> Source) {
StringRef Dir;
StringRef File;
std::string RemappedFile = remapDIPath(FileName);
@@ -499,9 +513,9 @@ StringRef CGDebugInfo::getCurrentDirname() {
}
void CGDebugInfo::CreateCompileUnit() {
- SmallString<32> Checksum;
- Optional<llvm::DIFile::ChecksumKind> CSKind;
- Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
+ SmallString<64> Checksum;
+ std::optional<llvm::DIFile::ChecksumKind> CSKind;
+ std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
// Should we be asking the SourceManager for the main file name, instead of
// accepting it as an argument? This just causes the main file name to
@@ -512,7 +526,8 @@ void CGDebugInfo::CreateCompileUnit() {
// Get absolute path name.
SourceManager &SM = CGM.getContext().getSourceManager();
- std::string MainFileName = CGM.getCodeGenOpts().MainFileName;
+ auto &CGO = CGM.getCodeGenOpts();
+ std::string MainFileName = CGO.MainFileName;
if (MainFileName.empty())
MainFileName = "<stdin>";
@@ -521,7 +536,7 @@ void CGDebugInfo::CreateCompileUnit() {
// a relative path, so we look into the actual file entry for the main
// file to determine the real absolute path for the file.
std::string MainFileDir;
- if (Optional<FileEntryRef> MainFile =
+ if (OptionalFileEntryRef MainFile =
SM.getFileEntryRefForID(SM.getMainFileID())) {
MainFileDir = std::string(MainFile->getDir().getName());
if (!llvm::sys::path::is_absolute(MainFileName)) {
@@ -548,11 +563,11 @@ void CGDebugInfo::CreateCompileUnit() {
if (LO.CPlusPlus) {
if (LO.ObjC)
LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
- else if (LO.CPlusPlus14 && (!CGM.getCodeGenOpts().DebugStrictDwarf ||
- CGM.getCodeGenOpts().DwarfVersion >= 5))
+ else if (CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
+ else if (LO.CPlusPlus14)
LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14;
- else if (LO.CPlusPlus11 && (!CGM.getCodeGenOpts().DebugStrictDwarf ||
- CGM.getCodeGenOpts().DwarfVersion >= 5))
+ else if (LO.CPlusPlus11)
LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11;
else
LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
@@ -563,6 +578,8 @@ void CGDebugInfo::CreateCompileUnit() {
LangTag = llvm::dwarf::DW_LANG_OpenCL;
} else if (LO.RenderScript) {
LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
+ } else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) {
+ LangTag = llvm::dwarf::DW_LANG_C11;
} else if (LO.C99) {
LangTag = llvm::dwarf::DW_LANG_C99;
} else {
@@ -883,10 +900,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return DBuilder.createBasicType(BTName, Size, Encoding);
}
-llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) {
- return DBuilder.createUnspecifiedType("auto");
-}
-
llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) {
StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt";
@@ -1137,13 +1150,12 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
QualType PointeeTy,
llvm::DIFile *Unit) {
// Bit size, align and offset of the type.
- // Size is always the size of a pointer. We can't use getTypeSize here
- // because that does not return the correct value for references.
- unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy);
- uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace);
+ // Size is always the size of a pointer.
+ uint64_t Size = CGM.getContext().getTypeSize(Ty);
auto Align = getTypeAlignIfRequired(Ty, CGM.getContext());
- Optional<unsigned> DWARFAddressSpace =
- CGM.getTarget().getDWARFAddressSpace(AddressSpace);
+ std::optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(
+ CGM.getTypes().getTargetAddressSpace(PointeeTy));
SmallVector<llvm::Metadata *, 4> Annots;
auto *BTFAttrTy = dyn_cast<BTFTagAttributedType>(PointeeTy);
@@ -1266,18 +1278,31 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
assert(Ty->isTypeAlias());
llvm::DIType *Src = getOrCreateType(Ty->getAliasedType(), Unit);
- auto *AliasDecl =
- cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl())
- ->getTemplatedDecl();
+ const TemplateDecl *TD = Ty->getTemplateName().getAsTemplateDecl();
+ if (isa<BuiltinTemplateDecl>(TD))
+ return Src;
+ const auto *AliasDecl = cast<TypeAliasTemplateDecl>(TD)->getTemplatedDecl();
if (AliasDecl->hasAttr<NoDebugAttr>())
return Src;
SmallString<128> NS;
llvm::raw_svector_ostream OS(NS);
- Ty->getTemplateName().print(OS, getPrintingPolicy(),
- TemplateName::Qualified::None);
- printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy());
+
+ auto PP = getPrintingPolicy();
+ Ty->getTemplateName().print(OS, PP, TemplateName::Qualified::None);
+
+ // Disable PrintCanonicalTypes here because we want
+ // the DW_AT_name to benefit from the TypePrinter's ability
+ // to skip defaulted template arguments.
+ //
+ // FIXME: Once -gsimple-template-names is enabled by default
+ // and we attach template parameters to alias template DIEs
+ // we don't need to worry about customizing the PrintingPolicy
+ // here anymore.
+ PP.PrintCanonicalTypes = false;
+ printTemplateArgumentList(OS, Ty->template_arguments(), PP,
+ TD->getTemplateParameters());
SourceLocation Loc = AliasDecl->getLocation();
return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc),
@@ -1285,6 +1310,33 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
getDeclContextDescriptor(AliasDecl));
}
+/// Convert an AccessSpecifier into the corresponding DINode flag.
+/// As an optimization, return 0 if the access specifier equals the
+/// default for the containing type.
+static llvm::DINode::DIFlags getAccessFlag(AccessSpecifier Access,
+ const RecordDecl *RD) {
+ AccessSpecifier Default = clang::AS_none;
+ if (RD && RD->isClass())
+ Default = clang::AS_private;
+ else if (RD && (RD->isStruct() || RD->isUnion()))
+ Default = clang::AS_public;
+
+ if (Access == Default)
+ return llvm::DINode::FlagZero;
+
+ switch (Access) {
+ case clang::AS_private:
+ return llvm::DINode::FlagPrivate;
+ case clang::AS_protected:
+ return llvm::DINode::FlagProtected;
+ case clang::AS_public:
+ return llvm::DINode::FlagPublic;
+ case clang::AS_none:
+ return llvm::DINode::FlagZero;
+ }
+ llvm_unreachable("unexpected access enumerator");
+}
+
llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
llvm::DIFile *Unit) {
llvm::DIType *Underlying =
@@ -1300,10 +1352,16 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
uint32_t Align = getDeclAlignIfRequired(Ty->getDecl(), CGM.getContext());
// Typedefs are derived from some other type.
llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(Ty->getDecl());
+
+ llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ const DeclContext *DC = Ty->getDecl()->getDeclContext();
+ if (isa<RecordDecl>(DC))
+ Flags = getAccessFlag(Ty->getDecl()->getAccess(), cast<RecordDecl>(DC));
+
return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(),
getOrCreateFile(Loc), getLineNumber(Loc),
getDeclContextDescriptor(Ty->getDecl()), Align,
- Annotations);
+ Flags, Annotations);
}
static unsigned getDwarfCC(CallingConv CC) {
@@ -1397,33 +1455,6 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
return F;
}
-/// Convert an AccessSpecifier into the corresponding DINode flag.
-/// As an optimization, return 0 if the access specifier equals the
-/// default for the containing type.
-static llvm::DINode::DIFlags getAccessFlag(AccessSpecifier Access,
- const RecordDecl *RD) {
- AccessSpecifier Default = clang::AS_none;
- if (RD && RD->isClass())
- Default = clang::AS_private;
- else if (RD && (RD->isStruct() || RD->isUnion()))
- Default = clang::AS_public;
-
- if (Access == Default)
- return llvm::DINode::FlagZero;
-
- switch (Access) {
- case clang::AS_private:
- return llvm::DINode::FlagPrivate;
- case clang::AS_protected:
- return llvm::DINode::FlagProtected;
- case clang::AS_public:
- return llvm::DINode::FlagPublic;
- case clang::AS_none:
- return llvm::DINode::FlagZero;
- }
- llvm_unreachable("unexpected access enumerator");
-}
-
llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
llvm::DIScope *RecordTy,
const RecordDecl *RD) {
@@ -1497,7 +1528,7 @@ void CGDebugInfo::CollectRecordLambdaFields(
if (C.capturesVariable()) {
SourceLocation Loc = C.getLocation();
assert(!Field->isBitField() && "lambdas don't have bitfield members!");
- VarDecl *V = C.getCapturedVar();
+ ValueDecl *V = C.getCapturedVar();
StringRef VName = V->getName();
llvm::DIFile *VUnit = getOrCreateFile(Loc);
auto Align = getDeclAlignIfRequired(V, CGM.getContext());
@@ -1637,28 +1668,31 @@ void CGDebugInfo::CollectRecordFields(
} else if (CGM.getCodeGenOpts().EmitCodeView) {
// Debug info for nested types is included in the member list only for
// CodeView.
- if (const auto *nestedType = dyn_cast<TypeDecl>(I))
+ if (const auto *nestedType = dyn_cast<TypeDecl>(I)) {
+ // MSVC doesn't generate nested type for anonymous struct/union.
+ if (isa<RecordDecl>(I) &&
+ cast<RecordDecl>(I)->isAnonymousStructOrUnion())
+ continue;
if (!nestedType->isImplicit() &&
nestedType->getDeclContext() == record)
CollectRecordNestedType(nestedType, elements);
+ }
}
}
}
llvm::DISubroutineType *
CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
- llvm::DIFile *Unit, bool decl) {
- const auto *Func = Method->getType()->castAs<FunctionProtoType>();
+ llvm::DIFile *Unit) {
+ const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>();
if (Method->isStatic())
return cast_or_null<llvm::DISubroutineType>(
getOrCreateType(QualType(Func, 0), Unit));
- return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit, decl);
+ return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit);
}
-llvm::DISubroutineType *
-CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr,
- const FunctionProtoType *Func,
- llvm::DIFile *Unit, bool decl) {
+llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
+ QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit) {
FunctionProtoType::ExtProtoInfo EPI = Func->getExtProtoInfo();
Qualifiers &Qc = EPI.TypeQuals;
Qc.removeConst();
@@ -1681,31 +1715,19 @@ CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr,
assert(Args.size() && "Invalid number of arguments!");
SmallVector<llvm::Metadata *, 16> Elts;
- // First element is always return type. For 'void' functions it is NULL.
- QualType temp = Func->getReturnType();
- if (temp->getTypeClass() == Type::Auto && decl) {
- const AutoType *AT = cast<AutoType>(temp);
- // It may be tricky in some cases to link the specification back the lambda
- // call operator and so we skip emitting "auto" for lambdas. This is
- // consistent with gcc as well.
- if (AT->isDeduced() && ThisPtr->getPointeeCXXRecordDecl()->isLambda())
- Elts.push_back(getOrCreateType(AT->getDeducedType(), Unit));
- else
- Elts.push_back(CreateType(AT));
- } else
- Elts.push_back(Args[0]);
+ // First element is always return type. For 'void' functions it is NULL.
+ Elts.push_back(Args[0]);
// "this" pointer is always first argument.
const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl();
if (isa<ClassTemplateSpecializationDecl>(RD)) {
// Create pointer type directly in this case.
const PointerType *ThisPtrTy = cast<PointerType>(ThisPtr);
- QualType PointeeTy = ThisPtrTy->getPointeeType();
- unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
- uint64_t Size = CGM.getTarget().getPointerWidth(AS);
+ uint64_t Size = CGM.getContext().getTypeSize(ThisPtrTy);
auto Align = getTypeAlignIfRequired(ThisPtrTy, CGM.getContext());
- llvm::DIType *PointeeType = getOrCreateType(PointeeTy, Unit);
+ llvm::DIType *PointeeType =
+ getOrCreateType(ThisPtrTy->getPointeeType(), Unit);
llvm::DIType *ThisPtrType =
DBuilder.createPointerType(PointeeType, Size, Align);
TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
@@ -1747,7 +1769,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method);
StringRef MethodName = getFunctionName(Method);
- llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit, true);
+ llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit);
// Since a single ctor/dtor corresponds to multiple functions, it doesn't
// make sense to give a single ctor/dtor a linkage name.
@@ -1775,7 +1797,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
int ThisAdjustment = 0;
- if (Method->isVirtual()) {
+ if (VTableContextBase::hasVtableSlot(Method)) {
if (Method->isPure())
SPFlags |= llvm::DISubprogram::SPFlagPureVirtual;
else
@@ -1979,7 +2001,7 @@ void CGDebugInfo::CollectCXXBasesAux(
}
llvm::DINodeArray
-CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs,
+CGDebugInfo::CollectTemplateParams(std::optional<TemplateArgs> OArgs,
llvm::DIFile *Unit) {
if (!OArgs)
return llvm::DINodeArray();
@@ -1989,35 +2011,23 @@ CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs,
const TemplateArgument &TA = Args.Args[i];
StringRef Name;
bool defaultParameter = false;
- if (Args.TList)
+ if (Args.TList) {
Name = Args.TList->getParam(i)->getName();
+
+ NamedDecl const *ND = Args.TList->getParam(i);
+ defaultParameter = clang::isSubstitutedDefaultArgument(
+ CGM.getContext(), TA, ND, Args.Args, Args.TList->getDepth());
+ }
+
switch (TA.getKind()) {
case TemplateArgument::Type: {
llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit);
-
- if (Args.TList)
- if (auto *templateType =
- dyn_cast_or_null<TemplateTypeParmDecl>(Args.TList->getParam(i)))
- if (templateType->hasDefaultArgument())
- defaultParameter =
- templateType->getDefaultArgument() == TA.getAsType();
-
TemplateParams.push_back(DBuilder.createTemplateTypeParameter(
TheCU, Name, TTy, defaultParameter));
} break;
case TemplateArgument::Integral: {
llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit);
- if (Args.TList && CGM.getCodeGenOpts().DwarfVersion >= 5)
- if (auto *templateType = dyn_cast_or_null<NonTypeTemplateParmDecl>(
- Args.TList->getParam(i)))
- if (templateType->hasDefaultArgument() &&
- !templateType->getDefaultArgument()->isValueDependent())
- defaultParameter = llvm::APSInt::isSameValue(
- templateType->getDefaultArgument()->EvaluateKnownConstInt(
- CGM.getContext()),
- TA.getAsIntegral());
-
TemplateParams.push_back(DBuilder.createTemplateValueParameter(
TheCU, Name, TTy, defaultParameter,
llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral())));
@@ -2093,7 +2103,7 @@ CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs,
TA.getAsTemplate().getAsTemplateDecl()->printQualifiedName(
OS, getPrintingPolicy());
TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
- TheCU, Name, nullptr, OS.str()));
+ TheCU, Name, nullptr, OS.str(), defaultParameter));
break;
}
case TemplateArgument::Pack:
@@ -2122,7 +2132,7 @@ CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs,
return DBuilder.getOrCreateArray(TemplateParams);
}
-Optional<CGDebugInfo::TemplateArgs>
+std::optional<CGDebugInfo::TemplateArgs>
CGDebugInfo::GetTemplateArgs(const FunctionDecl *FD) const {
if (FD->getTemplatedKind() ==
FunctionDecl::TK_FunctionTemplateSpecialization) {
@@ -2131,22 +2141,22 @@ CGDebugInfo::GetTemplateArgs(const FunctionDecl *FD) const {
->getTemplateParameters();
return {{TList, FD->getTemplateSpecializationArgs()->asArray()}};
}
- return None;
+ return std::nullopt;
}
-Optional<CGDebugInfo::TemplateArgs>
+std::optional<CGDebugInfo::TemplateArgs>
CGDebugInfo::GetTemplateArgs(const VarDecl *VD) const {
// Always get the full list of parameters, not just the ones from the
// specialization. A partial specialization may have fewer parameters than
// there are arguments.
auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VD);
if (!TS)
- return None;
+ return std::nullopt;
VarTemplateDecl *T = TS->getSpecializedTemplate();
const TemplateParameterList *TList = T->getTemplateParameters();
auto TA = TS->getTemplateArgs().asArray();
return {{TList, TA}};
}
-Optional<CGDebugInfo::TemplateArgs>
+std::optional<CGDebugInfo::TemplateArgs>
CGDebugInfo::GetTemplateArgs(const RecordDecl *RD) const {
if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
// Always get the full list of parameters, not just the ones from the
@@ -2157,7 +2167,7 @@ CGDebugInfo::GetTemplateArgs(const RecordDecl *RD) const {
const TemplateArgumentList &TAList = TSpecial->getTemplateArgs();
return {{TPList, TAList.asArray()}};
}
- return None;
+ return std::nullopt;
}
llvm::DINodeArray
@@ -2202,7 +2212,7 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements);
unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
- Optional<unsigned> DWARFAddressSpace =
+ std::optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
llvm::DIType *vtbl_ptr_type = DBuilder.createPointerType(
@@ -2299,7 +2309,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData;
unsigned VTableWidth = PtrWidth * VSlotCount;
unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
- Optional<unsigned> DWARFAddressSpace =
+ std::optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
// Create a very wide void* type and insert it directly in the element list.
@@ -2356,7 +2366,7 @@ void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI,
return;
llvm::MDNode *node;
if (AllocatedTy->isVoidType())
- node = llvm::MDNode::get(CGM.getLLVMContext(), None);
+ node = llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt);
else
node = getOrCreateType(AllocatedTy, getOrCreateFile(Loc));
@@ -2766,8 +2776,12 @@ llvm::DIModule *CGDebugInfo::getOrCreateModuleRef(ASTSourceDescriptor Mod,
llvm::DIBuilder DIB(CGM.getModule());
SmallString<0> PCM;
- if (!llvm::sys::path::is_absolute(Mod.getASTFile()))
- PCM = Mod.getPath();
+ if (!llvm::sys::path::is_absolute(Mod.getASTFile())) {
+ if (CGM.getHeaderSearchOpts().ModuleFileHomeIsCwd)
+ PCM = getCurrentDirname();
+ else
+ PCM = Mod.getPath();
+ }
llvm::sys::path::append(PCM, Mod.getASTFile());
DIB.createCompileUnit(
TheCU->getSourceLanguage(),
@@ -2928,6 +2942,9 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
else if (Field->getAccessControl() == ObjCIvarDecl::Public)
Flags = llvm::DINode::FlagPublic;
+ if (Field->isBitField())
+ Flags |= llvm::DINode::FlagBitField;
+
llvm::MDNode *PropertyNode = nullptr;
if (ObjCImplementationDecl *ImpD = ID->getImplementation()) {
if (ObjCPropertyImplDecl *PImpD =
@@ -3160,7 +3177,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
return DBuilder.createMemberPointerType(
getOrCreateInstanceMethodType(
CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()),
- FPT, U, false),
+ FPT, U),
ClassType, Size, /*Align=*/0, Flags);
}
@@ -3285,7 +3302,7 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType();
break;
case Type::TypeOf:
- T = cast<TypeOfType>(T)->getUnderlyingType();
+ T = cast<TypeOfType>(T)->getUnmodifiedType();
break;
case Type::Decltype:
T = cast<DecltypeType>(T)->getUnderlyingType();
@@ -3351,7 +3368,7 @@ void CGDebugInfo::completeTemplateDefinition(
}
void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) {
- if (DebugKind <= codegenoptions::DebugLineTablesOnly)
+ if (DebugKind <= codegenoptions::DebugLineTablesOnly || D.isDynamicClass())
return;
completeClassData(&D);
@@ -3613,7 +3630,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
// them distinct if they are ODR-uniqued.
if (Identifier.empty())
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case llvm::dwarf::DW_TAG_structure_type:
case llvm::dwarf::DW_TAG_union_type:
@@ -3910,7 +3927,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
return SP;
}
- for (auto NextFD : FD->redecls()) {
+ for (auto *NextFD : FD->redecls()) {
auto MI = SPCache.find(NextFD->getCanonicalDecl());
if (MI != SPCache.end()) {
auto *SP = dyn_cast_or_null<llvm::DISubprogram>(MI->second);
@@ -3968,10 +3985,11 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
!CGM.getCodeGenOpts().EmitCodeView))
// Create fake but valid subroutine type. Otherwise -verify would fail, and
// subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields.
- return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None));
+ return DBuilder.createSubroutineType(
+ DBuilder.getOrCreateTypeArray(std::nullopt));
if (const auto *Method = dyn_cast<CXXMethodDecl>(D))
- return getOrCreateMethodType(Method, F, false);
+ return getOrCreateMethodType(Method, F);
const auto *FTy = FnType->getAs<FunctionType>();
CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C;
@@ -4097,8 +4115,12 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (Name.startswith("\01"))
Name = Name.substr(1);
+ assert((!D || !isa<VarDecl>(D) ||
+ GD.getDynamicInitKind() != DynamicInitKind::NoStub) &&
+ "Unexpected DynamicInitKind !");
+
if (!HasDecl || D->isImplicit() || D->hasAttr<ArtificialAttr>() ||
- (isa<VarDecl>(D) && GD.getDynamicInitKind() != DynamicInitKind::NoStub)) {
+ isa<VarDecl>(D) || isa<CapturedDecl>(D)) {
Flags |= llvm::DINode::FlagArtificial;
// Artificial functions should not silently reuse CurLoc.
CurLoc = SourceLocation();
@@ -4196,10 +4218,28 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
SPFlags |= llvm::DISubprogram::SPFlagOptimized;
llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D);
- llvm::DISubprogram *SP = DBuilder.createFunction(
- FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
- TParamsArray.get(), getFunctionDeclaration(D), nullptr, Annotations);
+ llvm::DISubroutineType *STy = getOrCreateFunctionType(D, FnType, Unit);
+ llvm::DISubprogram *SP =
+ DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo, STy,
+ ScopeLine, Flags, SPFlags, TParamsArray.get(),
+ getFunctionDeclaration(D), nullptr, Annotations);
+
+ // Preserve btf_decl_tag attributes for parameters of extern functions
+ // for BPF target. The parameters created in this loop are attached as
+ // DISubprogram's retainedNodes in the subsequent finalizeSubprogram call.
+ if (IsDeclForCallSite && CGM.getTarget().getTriple().isBPF()) {
+ if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+ llvm::DITypeRefArray ParamTypes = STy->getTypeArray();
+ unsigned ArgNo = 1;
+ for (ParmVarDecl *PD : FD->parameters()) {
+ llvm::DINodeArray ParamAnnotations = CollectBTFDeclTagAnnotations(PD);
+ DBuilder.createParameterVariable(
+ SP, PD->getName(), ArgNo, Unit, LineNo, ParamTypes[ArgNo], true,
+ llvm::DINode::FlagZero, ParamAnnotations);
+ ++ArgNo;
+ }
+ }
+ }
if (IsDeclForCallSite)
Fn->setSubprogram(SP);
@@ -4218,17 +4258,11 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke,
if (Func->getSubprogram())
return;
- // Do not emit a declaration subprogram for a builtin, a function with nodebug
- // attribute, or if call site info isn't required. Also, elide declarations
- // for functions with reserved names, as call site-related features aren't
- // interesting in this case (& also, the compiler may emit calls to these
- // functions without debug locations, which makes the verifier complain).
- if (CalleeDecl->getBuiltinID() != 0 || CalleeDecl->hasAttr<NoDebugAttr>() ||
+ // Do not emit a declaration subprogram for a function with nodebug
+ // attribute, or if call site info isn't required.
+ if (CalleeDecl->hasAttr<NoDebugAttr>() ||
getCallSiteRelatedAttrs() == llvm::DINode::FlagZero)
return;
- if (CalleeDecl->isReserved(CGM.getLangOpts()) !=
- ReservedIdentifierStatus::NotReserved)
- return;
// If there is no DISubprogram attached to the function being called,
// create the one describing the function in order to have complete
@@ -4282,7 +4316,7 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
void CGDebugInfo::AppendAddressSpaceXDeref(
unsigned AddressSpace, SmallVectorImpl<uint64_t> &Expr) const {
- Optional<unsigned> DWARFAddressSpace =
+ std::optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(AddressSpace);
if (!DWARFAddressSpace)
return;
@@ -4379,7 +4413,7 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
CharUnits Align = CGM.getContext().getDeclAlign(VD);
if (Align > CGM.getContext().toCharUnitsFromBits(
- CGM.getTarget().getPointerAlign(0))) {
+ CGM.getTarget().getPointerAlign(LangAS::Default))) {
CharUnits FieldOffsetInBytes =
CGM.getContext().toCharUnitsFromBits(FieldOffset);
CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align);
@@ -4413,7 +4447,7 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
llvm::Value *Storage,
- llvm::Optional<unsigned> ArgNo,
+ std::optional<unsigned> ArgNo,
CGBuilderTy &Builder,
const bool UsePointerValue) {
assert(CGM.getCodeGenOpts().hasReducedDebugInfo());
@@ -4453,7 +4487,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
- unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType());
+ unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(VD->getType());
AppendAddressSpaceXDeref(AddressSpace, Expr);
// If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an
@@ -4479,7 +4513,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
// offset of __forwarding field
offset = CGM.getContext().toCharUnitsFromBits(
- CGM.getTarget().getPointerWidth(0));
+ CGM.getTarget().getPointerWidth(LangAS::Default));
Expr.push_back(offset.getQuantity());
Expr.push_back(llvm::dwarf::DW_OP_deref);
Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
@@ -4593,7 +4627,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
llvm::Value *Storage,
- llvm::Optional<unsigned> ArgNo,
+ std::optional<unsigned> ArgNo,
CGBuilderTy &Builder,
const bool UsePointerValue) {
assert(CGM.getCodeGenOpts().hasReducedDebugInfo());
@@ -4614,7 +4648,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
return nullptr;
auto Align = getDeclAlignIfRequired(BD, CGM.getContext());
- unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(BD->getType());
+ unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(BD->getType());
SmallVector<uint64_t, 3> Expr;
AppendAddressSpaceXDeref(AddressSpace, Expr);
@@ -4684,11 +4718,11 @@ CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage,
if (auto *DD = dyn_cast<DecompositionDecl>(VD))
for (auto *B : DD->bindings()) {
- EmitDeclare(B, Storage, llvm::None, Builder,
+ EmitDeclare(B, Storage, std::nullopt, Builder,
VD->getType()->isReferenceType());
}
- return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue);
+ return EmitDeclare(VD, Storage, std::nullopt, Builder, UsePointerValue);
}
void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) {
@@ -5139,7 +5173,7 @@ std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const {
if (!CGM.getCodeGenOpts().hasReducedDebugInfo())
TemplateNamesKind = codegenoptions::DebugTemplateNamesKind::Full;
- Optional<TemplateArgs> Args;
+ std::optional<TemplateArgs> Args;
bool IsOperatorOverload = false; // isa<CXXConversionDecl>(ND);
if (auto *RD = dyn_cast<CXXRecordDecl>(ND)) {
@@ -5293,8 +5327,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
auto Align = getDeclAlignIfRequired(D, CGM.getContext());
SmallVector<uint64_t, 4> Expr;
- unsigned AddressSpace =
- CGM.getContext().getTargetAddressSpace(D->getType());
+ unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(D->getType());
if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) {
if (D->hasAttr<CUDASharedAttr>())
AddressSpace =
@@ -5380,10 +5413,18 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
llvm::DIExpression *InitExpr = nullptr;
if (CGM.getContext().getTypeSize(VD->getType()) <= 64) {
// FIXME: Add a representation for integer constants wider than 64 bits.
- if (Init.isInt())
- InitExpr =
- DBuilder.createConstantValueExpression(Init.getInt().getExtValue());
- else if (Init.isFloat())
+ if (Init.isInt()) {
+ const llvm::APSInt &InitInt = Init.getInt();
+ std::optional<uint64_t> InitIntOpt;
+ if (InitInt.isUnsigned())
+ InitIntOpt = InitInt.tryZExtValue();
+ else if (auto tmp = InitInt.trySExtValue(); tmp.has_value())
+ // Transform a signed optional to unsigned optional. When cpp 23 comes,
+ // use std::optional::transform
+ InitIntOpt = (uint64_t)tmp.value();
+ if (InitIntOpt)
+ InitExpr = DBuilder.createConstantValueExpression(InitIntOpt.value());
+ } else if (Init.isFloat())
InitExpr = DBuilder.createConstantValueExpression(
Init.getFloat().bitcastToAPInt().getZExtValue());
}
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 38e3fa5b2fa9..95484a060cd8 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -25,11 +25,11 @@
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Allocator.h"
+#include <optional>
namespace llvm {
class MDNode;
@@ -177,7 +177,6 @@ class CGDebugInfo {
/// ivars and property accessors.
llvm::DIType *CreateType(const BuiltinType *Ty);
llvm::DIType *CreateType(const ComplexType *Ty);
- llvm::DIType *CreateType(const AutoType *Ty);
llvm::DIType *CreateType(const BitIntType *Ty);
llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty,
@@ -231,10 +230,10 @@ class CGDebugInfo {
/// not updated to include implicit \c this pointer. Use this routine
/// to get a method type which includes \c this pointer.
llvm::DISubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method,
- llvm::DIFile *F, bool decl);
+ llvm::DIFile *F);
llvm::DISubroutineType *
getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func,
- llvm::DIFile *Unit, bool decl);
+ llvm::DIFile *Unit);
llvm::DISubroutineType *
getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F);
/// \return debug info descriptor for vtable.
@@ -280,7 +279,7 @@ class CGDebugInfo {
llvm::ArrayRef<TemplateArgument> Args;
};
/// A helper function to collect template parameters.
- llvm::DINodeArray CollectTemplateParams(Optional<TemplateArgs> Args,
+ llvm::DINodeArray CollectTemplateParams(std::optional<TemplateArgs> Args,
llvm::DIFile *Unit);
/// A helper function to collect debug info for function template
/// parameters.
@@ -292,9 +291,9 @@ class CGDebugInfo {
llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD,
llvm::DIFile *Unit);
- Optional<TemplateArgs> GetTemplateArgs(const VarDecl *) const;
- Optional<TemplateArgs> GetTemplateArgs(const RecordDecl *) const;
- Optional<TemplateArgs> GetTemplateArgs(const FunctionDecl *) const;
+ std::optional<TemplateArgs> GetTemplateArgs(const VarDecl *) const;
+ std::optional<TemplateArgs> GetTemplateArgs(const RecordDecl *) const;
+ std::optional<TemplateArgs> GetTemplateArgs(const FunctionDecl *) const;
/// A helper function to collect debug info for template
/// parameters.
@@ -587,7 +586,7 @@ private:
/// Returns a pointer to the DILocalVariable associated with the
/// llvm.dbg.declare, or nullptr otherwise.
llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI,
- llvm::Optional<unsigned> ArgNo,
+ std::optional<unsigned> ArgNo,
CGBuilderTy &Builder,
const bool UsePointerValue = false);
@@ -595,7 +594,7 @@ private:
/// Returns a pointer to the DILocalVariable associated with the
/// llvm.dbg.declare, or nullptr otherwise.
llvm::DILocalVariable *EmitDeclare(const BindingDecl *decl, llvm::Value *AI,
- llvm::Optional<unsigned> ArgNo,
+ std::optional<unsigned> ArgNo,
CGBuilderTy &Builder,
const bool UsePointerValue = false);
@@ -631,11 +630,11 @@ private:
void CreateCompileUnit();
/// Compute the file checksum debug info for input file ID.
- Optional<llvm::DIFile::ChecksumKind>
- computeChecksum(FileID FID, SmallString<32> &Checksum) const;
+ std::optional<llvm::DIFile::ChecksumKind>
+ computeChecksum(FileID FID, SmallString<64> &Checksum) const;
/// Get the source of the given file ID.
- Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
+ std::optional<StringRef> getSource(const SourceManager &SM, FileID FID);
/// Convenience function to get the file debug info descriptor for the input
/// location.
@@ -644,8 +643,8 @@ private:
/// Create a file debug info descriptor for a source file.
llvm::DIFile *
createFile(StringRef FileName,
- Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
- Optional<StringRef> Source);
+ std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ std::optional<StringRef> Source);
/// Get the type from the cache or create a new type if necessary.
llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index f04af0d2cdf8..ceaddc4e694a 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -90,6 +91,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::Export:
case Decl::ObjCPropertyImpl:
case Decl::FileScopeAsm:
+ case Decl::TopLevelStmt:
case Decl::Friend:
case Decl::FriendTemplate:
case Decl::Block:
@@ -100,6 +102,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::ObjCTypeParam:
case Decl::Binding:
case Decl::UnresolvedUsingIfExists:
+ case Decl::HLSLBuffer:
llvm_unreachable("Declaration should not be in declstmts!");
case Decl::Record: // struct/union/class X;
case Decl::CXXRecord: // struct/union/class X; [C++]
@@ -126,6 +129,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::OMPRequires:
case Decl::Empty:
case Decl::Concept:
+ case Decl::ImplicitConceptSpecialization:
case Decl::LifetimeExtendedTemporary:
case Decl::RequiresExprBody:
// None of these decls require codegen support.
@@ -755,7 +759,7 @@ void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS,
if (!SanOpts.has(SanitizerKind::NullabilityAssign))
return;
- auto Nullability = LHS.getType()->getNullability(getContext());
+ auto Nullability = LHS.getType()->getNullability();
if (!Nullability || *Nullability != NullabilityKind::NonNull)
return;
@@ -839,7 +843,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
// If D is pseudo-strong, treat it like __unsafe_unretained here. This means
// that we omit the retain, and causes non-autoreleased return values to be
// immediately released.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case Qualifiers::OCL_ExplicitNone:
@@ -2612,7 +2616,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// function satisfy their nullability preconditions. This makes it necessary
// to emit null checks for args in the function body itself.
if (requiresReturnValueNullabilityCheck()) {
- auto Nullability = Ty->getNullability(getContext());
+ auto Nullability = Ty->getNullability();
if (Nullability && *Nullability == NullabilityKind::NonNull) {
SanitizerScope SanScope(this);
RetValNullabilityPrecondition =
@@ -2695,7 +2699,7 @@ void CodeGenModule::EmitOMPAllocateDecl(const OMPAllocateDecl *D) {
}
}
-llvm::Optional<CharUnits>
+std::optional<CharUnits>
CodeGenModule::getOMPAllocateAlignment(const VarDecl *VD) {
if (const auto *AA = VD->getAttr<OMPAllocateDeclAttr>()) {
if (Expr *Alignment = AA->getAlignment()) {
@@ -2711,5 +2715,5 @@ CodeGenModule::getOMPAllocateAlignment(const VarDecl *VD) {
std::max<unsigned>(UserAlign, NaturalAlign.getQuantity()));
}
}
- return llvm::None;
+ return std::nullopt;
}
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 949112c63cc9..dcd811ea257b 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
+#include "CGHLSLRuntime.h"
#include "CGObjCRuntime.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
@@ -194,7 +195,7 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
// For example, in the above CUDA code, the static local variable s has a
// "shared" address space qualifier, but the constructor of StructWithCtor
// expects "this" in the "generic" address space.
- unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(T);
+ unsigned ExpectedAddrSpace = getTypes().getTargetAddressSpace(T);
unsigned ActualAddrSpace = GV->getAddressSpace();
llvm::Constant *DeclPtr = GV;
if (ActualAddrSpace != ExpectedAddrSpace) {
@@ -552,7 +553,18 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
CXXThreadLocalInits.push_back(Fn);
CXXThreadLocalInitVars.push_back(D);
} else if (PerformInit && ISA) {
- EmitPointerToInitFunc(D, Addr, Fn, ISA);
+ // Contract with backend that "init_seg(compiler)" corresponds to priority
+ // 200 and "init_seg(lib)" corresponds to priority 400.
+ int Priority = -1;
+ if (ISA->getSection() == ".CRT$XCC")
+ Priority = 200;
+ else if (ISA->getSection() == ".CRT$XCL")
+ Priority = 400;
+
+ if (Priority != -1)
+ AddGlobalCtor(Fn, Priority, ~0U, COMDATKey);
+ else
+ EmitPointerToInitFunc(D, Addr, Fn, ISA);
} else if (auto *IPA = D->getAttr<InitPriorityAttr>()) {
OrderGlobalInitsOrStermFinalizers Key(IPA->getPriority(),
PrioritizedCXXGlobalInits.size());
@@ -576,8 +588,16 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
// SelectAny globals will be comdat-folded. Put the initializer into a
// COMDAT group associated with the global, so the initializers get folded
// too.
-
- AddGlobalCtor(Fn, 65535, COMDATKey);
+ I = DelayedCXXInitPosition.find(D);
+ // CXXGlobalInits.size() is the lex order number for the next deferred
+ // VarDecl. Use it when the current VarDecl is non-deferred. Although this
+ // lex order number is shared between current VarDecl and some following
+ // VarDecls, their order of insertion into `llvm.global_ctors` is the same
+ // as the lexing order and the following stable sort would preserve such
+ // order.
+ unsigned LexOrder =
+ I == DelayedCXXInitPosition.end() ? CXXGlobalInits.size() : I->second;
+ AddGlobalCtor(Fn, 65535, LexOrder, COMDATKey);
if (COMDATKey && (getTriple().isOSBinFormatELF() ||
getTarget().getCXXABI().isMicrosoft())) {
// When COMDAT is used on ELF or in the MS C++ ABI, the key must be in
@@ -620,7 +640,12 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() {
/* Build the initializer for a C++20 module:
This is arranged to be run only once regardless of how many times the module
- might be included transitively. This arranged by using a control variable.
+ might be included transitively. This arranged by using a guard variable.
+
+ If there are no initalizers at all (and also no imported modules) we reduce
+ this to an empty function (since the Itanium ABI requires that this function
+ be available to a caller, which might be produced by a different
+ implementation).
First we call any initializers for imported modules.
We then call initializers for the Global Module Fragment (if present)
@@ -632,13 +657,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
while (!CXXGlobalInits.empty() && !CXXGlobalInits.back())
CXXGlobalInits.pop_back();
- // We create the function, even if it is empty, since an importer of this
- // module will refer to it unconditionally (for the current implementation
- // there is no way for the importer to know that an importee does not need
- // an initializer to be run).
-
+ // As noted above, we create the function, even if it is empty.
// Module initializers for imported modules are emitted first.
- // Collect the modules that we import
+
+ // Collect all the modules that we import
SmallVector<Module *> AllImports;
// Ones that we export
for (auto I : Primary->Exports)
@@ -649,8 +671,8 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
SmallVector<llvm::Function *, 8> ModuleInits;
for (Module *M : AllImports) {
- // No Itanium initializer in module map modules.
- if (M->isModuleMapModule())
+ // No Itanium initializer in header like modules.
+ if (M->isHeaderLikeModule())
continue; // TODO: warn of mixed use of module map modules and C++20?
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
SmallString<256> FnName;
@@ -665,7 +687,6 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
FTy, llvm::Function::ExternalLinkage, FnName.str(), &getModule());
ModuleInits.push_back(Fn);
}
- AllImports.clear();
// Add any initializers with specified priority; this uses the same approach
// as EmitCXXGlobalInitFunc().
@@ -683,13 +704,11 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
for (; I < PrioE; ++I)
ModuleInits.push_back(I->second);
}
- PrioritizedCXXGlobalInits.clear();
}
// Now append the ones without specified priority.
- for (auto F : CXXGlobalInits)
+ for (auto *F : CXXGlobalInits)
ModuleInits.push_back(F);
- CXXGlobalInits.clear();
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction();
@@ -699,7 +718,6 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
// each init is run just once (even though a module might be imported
// multiple times via nested use).
llvm::Function *Fn;
- llvm::GlobalVariable *Guard = nullptr;
{
SmallString<256> InitFnName;
llvm::raw_svector_ostream Out(InitFnName);
@@ -709,18 +727,26 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
FTy, llvm::Twine(InitFnName), FI, SourceLocation(), false,
llvm::GlobalVariable::ExternalLinkage);
- Guard = new llvm::GlobalVariable(getModule(), Int8Ty, /*isConstant=*/false,
- llvm::GlobalVariable::InternalLinkage,
- llvm::ConstantInt::get(Int8Ty, 0),
- InitFnName.str() + "__in_chrg");
+ // If we have a completely empty initializer then we do not want to create
+ // the guard variable.
+ ConstantAddress GuardAddr = ConstantAddress::invalid();
+ if (!AllImports.empty() || !PrioritizedCXXGlobalInits.empty() ||
+ !CXXGlobalInits.empty()) {
+ // Create the guard var.
+ llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
+ getModule(), Int8Ty, /*isConstant=*/false,
+ llvm::GlobalVariable::InternalLinkage,
+ llvm::ConstantInt::get(Int8Ty, 0), InitFnName.str() + "__in_chrg");
+ CharUnits GuardAlign = CharUnits::One();
+ Guard->setAlignment(GuardAlign.getAsAlign());
+ GuardAddr = ConstantAddress(Guard, Int8Ty, GuardAlign);
+ }
+ CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits,
+ GuardAddr);
}
- CharUnits GuardAlign = CharUnits::One();
- Guard->setAlignment(GuardAlign.getAsAlign());
- CodeGenFunction(*this).GenerateCXXGlobalInitFunc(
- Fn, ModuleInits, ConstantAddress(Guard, Int8Ty, GuardAlign));
- // We allow for the case that a module object is added to a linked binary
- // without a specific call to the the initializer. This also ensure that
+ // We allow for the case that a module object is added to a linked binary
+ // without a specific call to the the initializer. This also ensures that
// implementation partition initializers are called when the partition
// is not imported as an interface.
AddGlobalCtor(Fn);
@@ -739,6 +765,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
Fn->addFnAttr("device-init");
}
+ // We are done with the inits.
+ AllImports.clear();
+ PrioritizedCXXGlobalInits.clear();
+ CXXGlobalInits.clear();
ModuleInits.clear();
}
@@ -778,8 +808,8 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
SmallVector<llvm::Function *, 8> ModuleInits;
if (CXX20ModuleInits)
for (Module *M : ImportedModules) {
- // No Itanium initializer in module map modules.
- if (M->isModuleMapModule())
+ // No Itanium initializer in header like modules.
+ if (M->isHeaderLikeModule())
continue;
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
SmallString<256> FnName;
@@ -824,7 +854,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
// Prepend the module inits to the highest priority set.
if (!ModuleInits.empty()) {
- for (auto F : ModuleInits)
+ for (auto *F : ModuleInits)
LocalCXXGlobalInits.push_back(F);
ModuleInits.clear();
}
@@ -842,7 +872,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
CXXGlobalInits.empty())
return;
- for (auto F : CXXGlobalInits)
+ for (auto *F : CXXGlobalInits)
ModuleInits.push_back(F);
CXXGlobalInits.clear();
@@ -977,6 +1007,9 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
}
+ if (getLangOpts().HLSL)
+ CGM.getHLSLRuntime().annotateHLSLResource(D, Addr);
+
FinishFunction();
}
diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 76c6beb090a9..6fa7871588f7 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -158,7 +158,7 @@ static const EHPersonality &getObjCPersonality(const TargetInfo &Target,
case ObjCRuntime::GNUstep:
if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7))
return EHPersonality::GNUstep_ObjC;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ObjCRuntime::GCC:
case ObjCRuntime::ObjFW:
if (L.hasSjLjExceptions())
@@ -249,7 +249,7 @@ const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) {
// For outlined finallys and filters, use the SEH personality in case they
// contain more SEH. This mostly only affects finallys. Filters could
// hypothetically use gnu statement expressions to sneak in nested SEH.
- FD = FD ? FD : CGF.CurSEHParent;
+ FD = FD ? FD : CGF.CurSEHParent.getDecl();
return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD));
}
@@ -1223,8 +1223,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// Wasm uses Windows-style EH instructions, but merges all catch clauses into
// one big catchpad. So we save the old funclet pad here before we traverse
// each catch handler.
- SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
- CurrentFuncletPad);
+ SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
llvm::BasicBlock *WasmCatchStartBlock = nullptr;
if (EHPersonality::get(*this).isWasmPersonality()) {
auto *CatchSwitch =
@@ -1257,8 +1256,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
RunCleanupsScope CatchScope(*this);
// Initialize the catch variable and set up the cleanups.
- SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
- CurrentFuncletPad);
+ SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
CGM.getCXXABI().emitBeginCatch(*this, C);
// Emit the PGO counter increment.
@@ -1582,8 +1580,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() {
// Create the cleanuppad using the current parent pad as its token. Use 'none'
// if this is a top-level terminate scope, which is the common case.
- SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
- CurrentFuncletPad);
+ SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
llvm::Value *ParentPad = CurrentFuncletPad;
if (!ParentPad)
ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
@@ -1628,7 +1625,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
llvm::Value *Sel = getSelectorFromSlot();
llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), Sel->getType());
- llvm::Value *LPadVal = llvm::UndefValue::get(LPadType);
+ llvm::Value *LPadVal = llvm::PoisonValue::get(LPadType);
LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val");
LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val");
@@ -2005,7 +2002,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
SmallString<128> Name;
{
llvm::raw_svector_ostream OS(Name);
- const NamedDecl *ParentSEHFn = ParentCGF.CurSEHParent;
+ GlobalDecl ParentSEHFn = ParentCGF.CurSEHParent;
assert(ParentSEHFn && "No CurSEHParent!");
MangleContext &Mangler = CGM.getCXXABI().getMangleContext();
if (IsFilter)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index bf3dd812b9e8..c26dd1b23321 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include <optional>
#include <string>
using namespace clang;
@@ -123,7 +124,7 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
const Twine &Name) {
CharUnits Align =
- CharUnits::fromQuantity(CGM.getDataLayout().getPrefTypeAlignment(Ty));
+ CharUnits::fromQuantity(CGM.getDataLayout().getPrefTypeAlign(Ty));
return CreateTempAlloca(Ty, Align, Name);
}
@@ -875,52 +876,6 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
}
}
-/// Determine whether this expression refers to a flexible array member in a
-/// struct. We disable array bounds checks for such members.
-static bool isFlexibleArrayMemberExpr(const Expr *E,
- unsigned StrictFlexArraysLevel) {
- // For compatibility with existing code, we treat arrays of length 0 or
- // 1 as flexible array members.
- // FIXME: This is inconsistent with the warning code in SemaChecking. Unify
- // the two mechanisms.
- const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe();
- if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
- // FIXME: Sema doesn't treat [1] as a flexible array member if the bound
- // was produced by macro expansion.
- if (StrictFlexArraysLevel >= 2 && CAT->getSize().ugt(0))
- return false;
- // FIXME: While the default -fstrict-flex-arrays=0 permits Size>1 trailing
- // arrays to be treated as flexible-array-members, we still emit ubsan
- // checks as if they are not.
- if (CAT->getSize().ugt(1))
- return false;
- } else if (!isa<IncompleteArrayType>(AT))
- return false;
-
- E = E->IgnoreParens();
-
- // A flexible array member must be the last member in the class.
- if (const auto *ME = dyn_cast<MemberExpr>(E)) {
- // FIXME: If the base type of the member expr is not FD->getParent(),
- // this should not be treated as a flexible array member access.
- if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
- // FIXME: Sema doesn't treat a T[1] union member as a flexible array
- // member, only a T[0] or T[] member gets that treatment.
- // Under StrictFlexArraysLevel, obey c99+ that disallows FAM in union, see
- // C11 6.7.2.1 §18
- if (FD->getParent()->isUnion())
- return StrictFlexArraysLevel < 2;
- RecordDecl::field_iterator FI(
- DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
- return ++FI == FD->getParent()->field_end();
- }
- } else if (const auto *IRE = dyn_cast<ObjCIvarRefExpr>(E)) {
- return IRE->getDecl()->getNextIvar() == nullptr;
- }
-
- return false;
-}
-
llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E,
QualType EltTy) {
ASTContext &C = getContext();
@@ -965,7 +920,8 @@ llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E,
static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
const Expr *Base,
QualType &IndexedType,
- unsigned StrictFlexArraysLevel) {
+ LangOptions::StrictFlexArraysLevelKind
+ StrictFlexArraysLevel) {
// For the vector indexing extension, the bound is the number of elements.
if (const VectorType *VT = Base->getType()->getAs<VectorType>()) {
IndexedType = Base->getType();
@@ -976,7 +932,8 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
if (const auto *CE = dyn_cast<CastExpr>(Base)) {
if (CE->getCastKind() == CK_ArrayToPointerDecay &&
- !isFlexibleArrayMemberExpr(CE->getSubExpr(), StrictFlexArraysLevel)) {
+ !CE->getSubExpr()->isFlexibleArrayMemberLike(CGF.getContext(),
+ StrictFlexArraysLevel)) {
IndexedType = CE->getSubExpr()->getType();
const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
@@ -1003,7 +960,8 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
"should not be called unless adding bounds checks");
SanitizerScope SanScope(this);
- const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays;
+ const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+ getLangOpts().getStrictFlexArraysLevel();
QualType IndexedType;
llvm::Value *Bound =
@@ -1426,6 +1384,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E));
case Expr::ExtVectorElementExprClass:
return EmitExtVectorElementExpr(cast<ExtVectorElementExpr>(E));
+ case Expr::CXXThisExprClass:
+ return MakeAddrLValue(LoadCXXThisAddress(), E->getType());
case Expr::MemberExprClass:
return EmitMemberExpr(cast<MemberExpr>(E));
case Expr::CompoundLiteralExprClass:
@@ -1661,21 +1621,7 @@ static bool getRangeForType(CodeGenFunction &CGF, QualType Ty,
End = llvm::APInt(CGF.getContext().getTypeSize(Ty), 2);
} else {
const EnumDecl *ED = ET->getDecl();
- llvm::Type *LTy = CGF.ConvertTypeForMem(ED->getIntegerType());
- unsigned Bitwidth = LTy->getScalarSizeInBits();
- unsigned NumNegativeBits = ED->getNumNegativeBits();
- unsigned NumPositiveBits = ED->getNumPositiveBits();
-
- if (NumNegativeBits) {
- unsigned NumBits = std::max(NumNegativeBits, NumPositiveBits + 1);
- assert(NumBits <= Bitwidth);
- End = llvm::APInt(Bitwidth, 1) << (NumBits - 1);
- Min = -End;
- } else {
- assert(NumPositiveBits <= Bitwidth);
- End = llvm::APInt(Bitwidth, 1) << NumPositiveBits;
- Min = llvm::APInt::getZero(Bitwidth);
- }
+ ED->getValueRange(End, Min);
}
return true;
}
@@ -1743,6 +1689,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
LValueBaseInfo BaseInfo,
TBAAAccessInfo TBAAInfo,
bool isNontemporal) {
+ if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr.getPointer()))
+ if (GV->isThreadLocal())
+ Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV));
+
if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
// Boolean vectors use `iN` as storage type.
if (ClangVecTy->isExtVectorBoolType()) {
@@ -1802,8 +1752,11 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
// In order to prevent the optimizer from throwing away the check, don't
// attach range metadata to the load.
} else if (CGM.getCodeGenOpts().OptimizationLevel > 0)
- if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty))
+ if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) {
Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo);
+ Load->setMetadata(llvm::LLVMContext::MD_noundef,
+ llvm::MDNode::get(getLLVMContext(), std::nullopt));
+ }
return EmitFromMemory(Load, Ty);
}
@@ -1884,6 +1837,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
LValueBaseInfo BaseInfo,
TBAAAccessInfo TBAAInfo,
bool isInit, bool isNontemporal) {
+ if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr.getPointer()))
+ if (GV->isThreadLocal())
+ Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV));
+
llvm::Type *SrcTy = Value->getType();
if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
@@ -2537,16 +2494,18 @@ static LValue EmitThreadPrivateVarDeclLValue(
static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF,
const VarDecl *VD, QualType T) {
- llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- // Return an invalid address if variable is MT_To and unified
- // memory is not enabled. For all other cases: MT_Link and
- // MT_To with unified memory, return a valid address.
- if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ // Return an invalid address if variable is MT_To (or MT_Enter starting with
+ // OpenMP 5.2) and unified memory is not enabled. For all other cases: MT_Link
+ // and MT_To (or MT_Enter) with unified memory, return a valid address.
+ if (!Res || ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
!CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()))
return Address::invalid();
assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) &&
"Expected link clause OR to clause with unified memory enabled.");
QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
@@ -2614,6 +2573,10 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
}
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
+
+ if (VD->getTLSKind() != VarDecl::TLS_None)
+ V = CGF.Builder.CreateThreadLocalAddress(V);
+
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
@@ -2785,7 +2748,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
getContext().getDeclAlign(VD));
llvm::Type *VarTy = getTypes().ConvertTypeForMem(VD->getType());
auto *PTy = llvm::PointerType::get(
- VarTy, getContext().getTargetAddressSpace(VD->getType()));
+ VarTy, getTypes().getTargetAddressSpace(VD->getType()));
Addr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PTy, VarTy);
} else {
// Should we be using the alignment of the constant pointer we emitted?
@@ -2883,6 +2846,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
}
+ // Handle threadlocal function locals.
+ if (VD->getTLSKind() != VarDecl::TLS_None)
+ addr =
+ addr.withPointer(Builder.CreateThreadLocalAddress(addr.getPointer()));
// Check for OpenMP threadprivate variables.
if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
@@ -2940,8 +2907,13 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// FIXME: While we're emitting a binding from an enclosing scope, all other
// DeclRefExprs we see should be implicitly treated as if they also refer to
// an enclosing scope.
- if (const auto *BD = dyn_cast<BindingDecl>(ND))
+ if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
+ if (E->refersToEnclosingVariableOrCapture()) {
+ auto *FD = LambdaCaptureFields.lookup(BD);
+ return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
+ }
return EmitLValue(BD->getBinding());
+ }
// We can form DeclRefExprs naming GUID declarations when reconstituting
// non-type template parameters into expressions.
@@ -3090,10 +3062,9 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
// Format the type name as if for a diagnostic, including quotes and
// optionally an 'aka'.
SmallString<32> Buffer;
- CGM.getDiags().ConvertArgToString(DiagnosticsEngine::ak_qualtype,
- (intptr_t)T.getAsOpaquePtr(),
- StringRef(), StringRef(), None, Buffer,
- None);
+ CGM.getDiags().ConvertArgToString(
+ DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(),
+ StringRef(), std::nullopt, Buffer, std::nullopt);
llvm::Constant *Components[] = {
Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo),
@@ -3122,7 +3093,7 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
// Floating-point types which fit into intptr_t are bitcast to integers
// and then passed directly (after zero-extension, if necessary).
if (V->getType()->isFloatingPointTy()) {
- unsigned Bits = V->getType()->getPrimitiveSizeInBits().getFixedSize();
+ unsigned Bits = V->getType()->getPrimitiveSizeInBits().getFixedValue();
if (Bits <= TargetTy->getIntegerBitWidth())
V = Builder.CreateBitCast(V, llvm::Type::getIntNTy(getLLVMContext(),
Bits));
@@ -3186,7 +3157,8 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
auto FilenameGV =
CGM.GetAddrOfConstantCString(std::string(FilenameString), ".src");
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(
- cast<llvm::GlobalVariable>(FilenameGV.getPointer()));
+ cast<llvm::GlobalVariable>(
+ FilenameGV.getPointer()->stripPointerCasts()));
Filename = FilenameGV.getPointer();
Line = PLoc.getLine();
Column = PLoc.getColumn();
@@ -3244,7 +3216,7 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF,
CheckRecoverableKind RecoverKind, bool IsFatal,
llvm::BasicBlock *ContBB) {
assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable);
- Optional<ApplyDebugLocation> DL;
+ std::optional<ApplyDebugLocation> DL;
if (!CGF.Builder.getCurrentDebugLocation()) {
// Ensure that the call has at least an artificial debug location.
DL.emplace(CGF, SourceLocation());
@@ -3292,7 +3264,7 @@ void CodeGenFunction::EmitCheck(
assert(IsSanitizerScope);
assert(Checked.size() > 0);
assert(CheckHandler >= 0 &&
- size_t(CheckHandler) < llvm::array_lengthof(SanitizerHandlers));
+ size_t(CheckHandler) < std::size(SanitizerHandlers));
const StringRef CheckName = SanitizerHandlers[CheckHandler].Name;
llvm::Value *FatalCond = nullptr;
@@ -3354,13 +3326,15 @@ void CodeGenFunction::EmitCheck(
// Emit handler arguments and create handler function type.
if (!StaticArgs.empty()) {
llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
- auto *InfoPtr =
- new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
- llvm::GlobalVariable::PrivateLinkage, Info);
+ auto *InfoPtr = new llvm::GlobalVariable(
+ CGM.getModule(), Info->getType(), false,
+ llvm::GlobalVariable::PrivateLinkage, Info, "", nullptr,
+ llvm::GlobalVariable::NotThreadLocal,
+ CGM.getDataLayout().getDefaultGlobalsAddressSpace());
InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
- Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy));
- ArgTypes.push_back(Int8PtrTy);
+ Args.push_back(EmitCastToVoidPtr(InfoPtr));
+ ArgTypes.push_back(Args.back()->getType());
}
for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) {
@@ -3561,7 +3535,7 @@ void CodeGenFunction::EmitUnreachable(SourceLocation Loc) {
EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
SanitizerKind::Unreachable),
SanitizerHandler::BuiltinUnreachable,
- EmitCheckSourceLocation(Loc), None);
+ EmitCheckSourceLocation(Loc), std::nullopt);
}
Builder.CreateUnreachable();
}
@@ -3576,7 +3550,8 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
TrapBBs.resize(CheckHandlerID + 1);
llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
- if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB) {
+ if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB ||
+ (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
TrapBB = createBasicBlock("trap");
Builder.CreateCondBr(Checked, Cont, TrapBB);
EmitBlock(TrapBB);
@@ -3755,7 +3730,7 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
const llvm::Twine &name = "arrayidx") {
// All the indices except that last must be zero.
#ifndef NDEBUG
- for (auto idx : indices.drop_back())
+ for (auto *idx : indices.drop_back())
assert(isa<llvm::ConstantInt>(idx) &&
cast<llvm::ConstantInt>(idx)->isZero());
#endif
@@ -4038,14 +4013,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
llvm::APSInt ConstLength;
if (Length) {
// Idx = LowerBound + Length - 1;
- if (Optional<llvm::APSInt> CL = Length->getIntegerConstantExpr(C)) {
+ if (std::optional<llvm::APSInt> CL = Length->getIntegerConstantExpr(C)) {
ConstLength = CL->zextOrTrunc(PointerWidthInBits);
Length = nullptr;
}
auto *LowerBound = E->getLowerBound();
llvm::APSInt ConstLowerBound(PointerWidthInBits, /*isUnsigned=*/false);
if (LowerBound) {
- if (Optional<llvm::APSInt> LB = LowerBound->getIntegerConstantExpr(C)) {
+ if (std::optional<llvm::APSInt> LB =
+ LowerBound->getIntegerConstantExpr(C)) {
ConstLowerBound = LB->zextOrTrunc(PointerWidthInBits);
LowerBound = nullptr;
}
@@ -4085,7 +4061,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
: BaseTy;
if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) {
Length = VAT->getSizeExpr();
- if (Optional<llvm::APSInt> L = Length->getIntegerConstantExpr(C)) {
+ if (std::optional<llvm::APSInt> L = Length->getIntegerConstantExpr(C)) {
ConstLength = *L;
Length = nullptr;
}
@@ -4292,7 +4268,7 @@ unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
unsigned FieldIndex) {
unsigned I = 0, Skipped = 0;
- for (auto F : Rec->getDefinition()->fields()) {
+ for (auto *F : Rec->getDefinition()->fields()) {
if (I == FieldIndex)
break;
if (F->isUnnamedBitfield())
@@ -4596,11 +4572,11 @@ LValue CodeGenFunction::EmitInitListLValue(const InitListExpr *E) {
/// Emit the operand of a glvalue conditional operator. This is either a glvalue
/// or a (possibly-parenthesized) throw-expression. If this is a throw, no
/// LValue is returned and the current block has been terminated.
-static Optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF,
- const Expr *Operand) {
+static std::optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF,
+ const Expr *Operand) {
if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Operand->IgnoreParens())) {
CGF.EmitCXXThrowExpr(ThrowExpr, /*KeepInsertionPoint*/false);
- return None;
+ return std::nullopt;
}
return CGF.EmitLValue(Operand);
@@ -4609,7 +4585,7 @@ static Optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF,
namespace {
// Handle the case where the condition is a constant evaluatable simple integer,
// which means we don't have to separately handle the true/false blocks.
-llvm::Optional<LValue> HandleConditionalOperatorLValueSimpleCase(
+std::optional<LValue> HandleConditionalOperatorLValueSimpleCase(
CodeGenFunction &CGF, const AbstractConditionalOperator *E) {
const Expr *condExpr = E->getCond();
bool CondExprBool;
@@ -4635,11 +4611,11 @@ llvm::Optional<LValue> HandleConditionalOperatorLValueSimpleCase(
return CGF.EmitLValue(Live);
}
}
- return llvm::None;
+ return std::nullopt;
}
struct ConditionalInfo {
llvm::BasicBlock *lhsBlock, *rhsBlock;
- Optional<LValue> LHS, RHS;
+ std::optional<LValue> LHS, RHS;
};
// Create and generate the 3 blocks for a conditional operator.
@@ -4649,8 +4625,8 @@ ConditionalInfo EmitConditionalBlocks(CodeGenFunction &CGF,
const AbstractConditionalOperator *E,
const FuncTy &BranchGenFunc) {
ConditionalInfo Info{CGF.createBasicBlock("cond.true"),
- CGF.createBasicBlock("cond.false"), llvm::None,
- llvm::None};
+ CGF.createBasicBlock("cond.false"), std::nullopt,
+ std::nullopt};
llvm::BasicBlock *endBlock = CGF.createBasicBlock("cond.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
@@ -4708,7 +4684,7 @@ LValue CodeGenFunction::EmitConditionalOperatorLValue(
}
OpaqueValueMapping binding(*this, expr);
- if (llvm::Optional<LValue> Res =
+ if (std::optional<LValue> Res =
HandleConditionalOperatorLValueSimpleCase(*this, expr))
return *Res;
@@ -5046,7 +5022,9 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
if (auto builtinID = FD->getBuiltinID()) {
std::string NoBuiltinFD = ("no-builtin-" + FD->getName()).str();
std::string NoBuiltins = "no-builtins";
- std::string FDInlineName = (FD->getName() + ".inline").str();
+
+ StringRef Ident = CGF.CGM.getMangledName(GD);
+ std::string FDInlineName = (Ident + ".inline").str();
bool IsPredefinedLibFunction =
CGF.getContext().BuiltinInfo.isPredefinedLibFunction(builtinID);
@@ -5271,6 +5249,15 @@ llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface,
return CGM.getObjCRuntime().EmitIvarOffset(*this, Interface, Ivar);
}
+llvm::Value *
+CodeGenFunction::EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface,
+ const ObjCIvarDecl *Ivar) {
+ llvm::Value *OffsetValue = EmitIvarOffset(Interface, Ivar);
+ QualType PointerDiffType = getContext().getPointerDiffType();
+ return Builder.CreateZExtOrTrunc(OffsetValue,
+ getTypes().ConvertType(PointerDiffType));
+}
+
LValue CodeGenFunction::EmitLValueForIvar(QualType ObjectTy,
llvm::Value *BaseValue,
const ObjCIvarDecl *Ivar,
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 73b05690537d..34e535a78dd6 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -87,8 +87,9 @@ public:
void EmitMoveFromReturnSlot(const Expr *E, RValue Src);
- void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
- QualType ArrayQTy, InitListExpr *E);
+ void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, QualType ArrayQTy,
+ Expr *ExprToVisit, ArrayRef<Expr *> Args,
+ Expr *ArrayFiller);
AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T))
@@ -172,6 +173,9 @@ public:
void VisitAbstractConditionalOperator(const AbstractConditionalOperator *CO);
void VisitChooseExpr(const ChooseExpr *CE);
void VisitInitListExpr(InitListExpr *E);
+ void VisitCXXParenListOrInitListExpr(Expr *ExprToVisit, ArrayRef<Expr *> Args,
+ FieldDecl *InitializedFieldInUnion,
+ Expr *ArrayFiller);
void VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::Value *outerBegin = nullptr);
void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E);
@@ -201,10 +205,22 @@ public:
return EmitFinalDestCopy(E->getType(), LV);
}
- CGF.EmitPseudoObjectRValue(E, EnsureSlot(E->getType()));
+ AggValueSlot Slot = EnsureSlot(E->getType());
+ bool NeedsDestruction =
+ !Slot.isExternallyDestructed() &&
+ E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
+ if (NeedsDestruction)
+ Slot.setExternallyDestructed();
+ CGF.EmitPseudoObjectRValue(E, Slot);
+ if (NeedsDestruction)
+ CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Slot.getAddress(),
+ E->getType());
}
void VisitVAArgExpr(VAArgExpr *E);
+ void VisitCXXParenListInitExpr(CXXParenListInitExpr *E);
+ void VisitCXXParenListOrInitListExpr(Expr *ExprToVisit, ArrayRef<Expr *> Args,
+ Expr *ArrayFiller);
void EmitInitializationToLValue(Expr *E, LValue Address);
void EmitNullInitializationToLValue(LValue Address);
@@ -471,10 +487,12 @@ static bool isTrivialFiller(Expr *E) {
return false;
}
-/// Emit initialization of an array from an initializer list.
+/// Emit initialization of an array from an initializer list. ExprToVisit must
+/// be either an InitListEpxr a CXXParenInitListExpr.
void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
- QualType ArrayQTy, InitListExpr *E) {
- uint64_t NumInitElements = E->getNumInits();
+ QualType ArrayQTy, Expr *ExprToVisit,
+ ArrayRef<Expr *> Args, Expr *ArrayFiller) {
+ uint64_t NumInitElements = Args.size();
uint64_t NumArrayElements = AType->getNumElements();
assert(NumInitElements <= NumArrayElements);
@@ -503,7 +521,8 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
CodeGen::CodeGenModule &CGM = CGF.CGM;
ConstantEmitter Emitter(CGF);
LangAS AS = ArrayQTy.getAddressSpace();
- if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) {
+ if (llvm::Constant *C =
+ Emitter.tryEmitForInitializer(ExprToVisit, AS, ArrayQTy)) {
auto GV = new llvm::GlobalVariable(
CGM.getModule(), C->getType(),
CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true),
@@ -568,12 +587,11 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
LValue elementLV = CGF.MakeAddrLValue(
Address(element, llvmElementType, elementAlign), elementType);
- EmitInitializationToLValue(E->getInit(i), elementLV);
+ EmitInitializationToLValue(Args[i], elementLV);
}
// Check whether there's a non-trivial array-fill expression.
- Expr *filler = E->getArrayFiller();
- bool hasTrivialFiller = isTrivialFiller(filler);
+ bool hasTrivialFiller = isTrivialFiller(ArrayFiller);
// Any remaining elements need to be zero-initialized, possibly
// using the filler expression. We can skip this if the we're
@@ -616,8 +634,8 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
CodeGenFunction::RunCleanupsScope CleanupsScope(CGF);
LValue elementLV = CGF.MakeAddrLValue(
Address(currentElement, llvmElementType, elementAlign), elementType);
- if (filler)
- EmitInitializationToLValue(filler, elementLV);
+ if (ArrayFiller)
+ EmitInitializationToLValue(ArrayFiller, elementLV);
else
EmitNullInitializationToLValue(elementLV);
}
@@ -850,7 +868,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
return;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case CK_NoOp:
@@ -1591,46 +1609,64 @@ void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) {
}
}
+void AggExprEmitter::VisitCXXParenListInitExpr(CXXParenListInitExpr *E) {
+ VisitCXXParenListOrInitListExpr(E, E->getInitExprs(),
+ E->getInitializedFieldInUnion(),
+ E->getArrayFiller());
+}
+
void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
+ if (E->hadArrayRangeDesignator())
+ CGF.ErrorUnsupported(E, "GNU array range designator extension");
+
+ if (E->isTransparent())
+ return Visit(E->getInit(0));
+
+ VisitCXXParenListOrInitListExpr(
+ E, E->inits(), E->getInitializedFieldInUnion(), E->getArrayFiller());
+}
+
+void AggExprEmitter::VisitCXXParenListOrInitListExpr(
+ Expr *ExprToVisit, ArrayRef<Expr *> InitExprs,
+ FieldDecl *InitializedFieldInUnion, Expr *ArrayFiller) {
#if 0
// FIXME: Assess perf here? Figure out what cases are worth optimizing here
// (Length of globals? Chunks of zeroed-out space?).
//
// If we can, prefer a copy from a global; this is a lot less code for long
// globals, and it's easier for the current optimizers to analyze.
- if (llvm::Constant* C = CGF.CGM.EmitConstantExpr(E, E->getType(), &CGF)) {
+ if (llvm::Constant *C =
+ CGF.CGM.EmitConstantExpr(ExprToVisit, ExprToVisit->getType(), &CGF)) {
llvm::GlobalVariable* GV =
new llvm::GlobalVariable(CGF.CGM.getModule(), C->getType(), true,
llvm::GlobalValue::InternalLinkage, C, "");
- EmitFinalDestCopy(E->getType(), CGF.MakeAddrLValue(GV, E->getType()));
+ EmitFinalDestCopy(ExprToVisit->getType(),
+ CGF.MakeAddrLValue(GV, ExprToVisit->getType()));
return;
}
#endif
- if (E->hadArrayRangeDesignator())
- CGF.ErrorUnsupported(E, "GNU array range designator extension");
-
- if (E->isTransparent())
- return Visit(E->getInit(0));
- AggValueSlot Dest = EnsureSlot(E->getType());
+ AggValueSlot Dest = EnsureSlot(ExprToVisit->getType());
- LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
+ LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), ExprToVisit->getType());
// Handle initialization of an array.
- if (E->getType()->isArrayType()) {
+ if (ExprToVisit->getType()->isArrayType()) {
auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType());
- EmitArrayInit(Dest.getAddress(), AType, E->getType(), E);
+ EmitArrayInit(Dest.getAddress(), AType, ExprToVisit->getType(), ExprToVisit,
+ InitExprs, ArrayFiller);
return;
}
- assert(E->getType()->isRecordType() && "Only support structs/unions here!");
+ assert(ExprToVisit->getType()->isRecordType() &&
+ "Only support structs/unions here!");
// Do struct initialization; this code just sets each individual member
// to the approprate value. This makes bitfield support automatic;
// the disadvantage is that the generated code is more difficult for
// the optimizer, especially with bitfields.
- unsigned NumInitElements = E->getNumInits();
- RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl();
+ unsigned NumInitElements = InitExprs.size();
+ RecordDecl *record = ExprToVisit->getType()->castAs<RecordType>()->getDecl();
// We'll need to enter cleanup scopes in case any of the element
// initializers throws an exception.
@@ -1648,7 +1684,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Emit initialization of base classes.
if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) {
- assert(E->getNumInits() >= CXXRD->getNumBases() &&
+ assert(NumInitElements >= CXXRD->getNumBases() &&
"missing initializer for base class");
for (auto &Base : CXXRD->bases()) {
assert(!Base.isVirtual() && "should not see vbases here");
@@ -1662,7 +1698,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased,
CGF.getOverlapForBaseInit(CXXRD, BaseRD, Base.isVirtual()));
- CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot);
+ CGF.EmitAggExpr(InitExprs[curInitIndex++], AggSlot);
if (QualType::DestructionKind dtorKind =
Base.getType().isDestructedType()) {
@@ -1678,25 +1714,25 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
if (record->isUnion()) {
// Only initialize one field of a union. The field itself is
// specified by the initializer list.
- if (!E->getInitializedFieldInUnion()) {
+ if (!InitializedFieldInUnion) {
// Empty union; we have nothing to do.
#ifndef NDEBUG
// Make sure that it's really an empty and not a failure of
// semantic analysis.
for (const auto *Field : record->fields())
- assert(Field->isUnnamedBitfield() && "Only unnamed bitfields allowed");
+ assert((Field->isUnnamedBitfield() || Field->isAnonymousStructOrUnion()) && "Only unnamed bitfields or ananymous class allowed");
#endif
return;
}
// FIXME: volatility
- FieldDecl *Field = E->getInitializedFieldInUnion();
+ FieldDecl *Field = InitializedFieldInUnion;
LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestLV, Field);
if (NumInitElements) {
// Store the initializer into the field
- EmitInitializationToLValue(E->getInit(0), FieldLoc);
+ EmitInitializationToLValue(InitExprs[0], FieldLoc);
} else {
// Default-initialize to null.
EmitNullInitializationToLValue(FieldLoc);
@@ -1720,7 +1756,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// have a zeroed object, and the rest of the fields are
// zero-initializable.
if (curInitIndex == NumInitElements && Dest.isZeroed() &&
- CGF.getTypes().isZeroInitializable(E->getType()))
+ CGF.getTypes().isZeroInitializable(ExprToVisit->getType()))
break;
@@ -1730,7 +1766,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
if (curInitIndex < NumInitElements) {
// Store the initializer into the field.
- EmitInitializationToLValue(E->getInit(curInitIndex++), LV);
+ EmitInitializationToLValue(InitExprs[curInitIndex++], LV);
} else {
// We're out of initializers; default-initialize to null
EmitNullInitializationToLValue(LV);
@@ -1926,7 +1962,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
// Reference values are always non-null and have the width of a pointer.
if (Field->getType()->isReferenceType())
NumNonZeroBytes += CGF.getContext().toCharUnitsFromBits(
- CGF.getTarget().getPointerWidth(0));
+ CGF.getTarget().getPointerWidth(LangAS::Default));
else
NumNonZeroBytes += GetNumNonZeroBytesInInit(E, CGF);
}
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 3cc144361542..b889a4e05ee1 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -646,7 +646,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
case CXXConstructExpr::CK_VirtualBase:
ForVirtualBase = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case CXXConstructExpr::CK_NonVirtualBase:
Type = Ctor_Base;
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index 5409e82d437e..7a14a418c7b6 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -206,12 +206,13 @@ public:
return VisitPrePostIncDec(E, true, true);
}
ComplexPairTy VisitUnaryDeref(const Expr *E) { return EmitLoadOfLValue(E); }
- ComplexPairTy VisitUnaryPlus (const UnaryOperator *E) {
- TestAndClearIgnoreReal();
- TestAndClearIgnoreImag();
- return Visit(E->getSubExpr());
- }
- ComplexPairTy VisitUnaryMinus (const UnaryOperator *E);
+
+ ComplexPairTy VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ ComplexPairTy VisitPlus(const UnaryOperator *E, QualType PromotionType);
+ ComplexPairTy VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ ComplexPairTy VisitMinus(const UnaryOperator *E, QualType PromotionType);
ComplexPairTy VisitUnaryNot (const UnaryOperator *E);
// LNot,Real,Imag never return complex.
ComplexPairTy VisitUnaryExtension(const UnaryOperator *E) {
@@ -251,9 +252,13 @@ public:
ComplexPairTy LHS;
ComplexPairTy RHS;
QualType Ty; // Computation Type.
+ FPOptions FPFeatures;
};
- BinOpInfo EmitBinOps(const BinaryOperator *E);
+ BinOpInfo EmitBinOps(const BinaryOperator *E,
+ QualType PromotionTy = QualType());
+ ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy);
+ ComplexPairTy EmitPromotedComplexOperand(const Expr *E, QualType PromotionTy);
LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
ComplexPairTy (ComplexExprEmitter::*Func)
(const BinOpInfo &),
@@ -270,19 +275,33 @@ public:
ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName,
const BinOpInfo &Op);
- ComplexPairTy VisitBinAdd(const BinaryOperator *E) {
- return EmitBinAdd(EmitBinOps(E));
- }
- ComplexPairTy VisitBinSub(const BinaryOperator *E) {
- return EmitBinSub(EmitBinOps(E));
- }
- ComplexPairTy VisitBinMul(const BinaryOperator *E) {
- return EmitBinMul(EmitBinOps(E));
+ QualType getPromotionType(QualType Ty) {
+ if (auto *CT = Ty->getAs<ComplexType>()) {
+ QualType ElementType = CT->getElementType();
+ if (ElementType.UseExcessPrecision(CGF.getContext()))
+ return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+ }
+ if (Ty.UseExcessPrecision(CGF.getContext()))
+ return CGF.getContext().FloatTy;
+ return QualType();
}
- ComplexPairTy VisitBinDiv(const BinaryOperator *E) {
- return EmitBinDiv(EmitBinOps(E));
+
+#define HANDLEBINOP(OP) \
+ ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \
+ QualType promotionTy = getPromotionType(E->getType()); \
+ ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \
+ if (!promotionTy.isNull()) \
+ result = \
+ CGF.EmitUnPromotedValue(result, E->getType()); \
+ return result; \
}
+ HANDLEBINOP(Mul)
+ HANDLEBINOP(Div)
+ HANDLEBINOP(Add)
+ HANDLEBINOP(Sub)
+#undef HANDLEBINOP
+
ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
return Visit(E->getSemanticForm());
}
@@ -556,10 +575,45 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
llvm_unreachable("unknown cast resulting in complex value");
}
-ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
+ComplexPairTy ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ ComplexPairTy result = VisitPlus(E, promotionTy);
+ if (!promotionTy.isNull())
+ return CGF.EmitUnPromotedValue(result, E->getSubExpr()->getType());
+ return result;
+}
+
+ComplexPairTy ComplexExprEmitter::VisitPlus(const UnaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreReal();
TestAndClearIgnoreImag();
- ComplexPairTy Op = Visit(E->getSubExpr());
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedComplexExpr(E->getSubExpr(), PromotionType);
+ return Visit(E->getSubExpr());
+}
+
+ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ ComplexPairTy result = VisitMinus(E, promotionTy);
+ if (!promotionTy.isNull())
+ return CGF.EmitUnPromotedValue(result, E->getSubExpr()->getType());
+ return result;
+}
+ComplexPairTy ComplexExprEmitter::VisitMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ TestAndClearIgnoreReal();
+ TestAndClearIgnoreImag();
+ ComplexPairTy Op;
+ if (!PromotionType.isNull())
+ Op = CGF.EmitPromotedComplexExpr(E->getSubExpr(), PromotionType);
+ else
+ Op = Visit(E->getSubExpr());
llvm::Value *ResR, *ResI;
if (Op.first->getType()->isFloatingPointTy()) {
@@ -590,6 +644,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinAdd(const BinOpInfo &Op) {
llvm::Value *ResR, *ResI;
if (Op.LHS.first->getType()->isFloatingPointTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures);
ResR = Builder.CreateFAdd(Op.LHS.first, Op.RHS.first, "add.r");
if (Op.LHS.second && Op.RHS.second)
ResI = Builder.CreateFAdd(Op.LHS.second, Op.RHS.second, "add.i");
@@ -608,6 +663,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinAdd(const BinOpInfo &Op) {
ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) {
llvm::Value *ResR, *ResI;
if (Op.LHS.first->getType()->isFloatingPointTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures);
ResR = Builder.CreateFSub(Op.LHS.first, Op.RHS.first, "sub.r");
if (Op.LHS.second && Op.RHS.second)
ResI = Builder.CreateFSub(Op.LHS.second, Op.RHS.second, "sub.i");
@@ -700,6 +756,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) {
// FIXME: C11 also provides for imaginary types which would allow folding
// still more of this within the type system.
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures);
if (Op.LHS.second && Op.RHS.second) {
// If both operands are complex, emit the core math directly, and then
// test for NaNs. If we find NaNs in the result, we delegate to a libcall
@@ -801,6 +858,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
//
// FIXME: We would be able to avoid the libcall in many places if we
// supported imaginary types in addition to complex types.
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures);
if (RHSi && !CGF.getLangOpts().FastMath) {
BinOpInfo LibCallOp = Op;
// If LHS was a real, supply a null imaginary part.
@@ -876,21 +934,103 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
return ComplexPairTy(DSTr, DSTi);
}
+ComplexPairTy CodeGenFunction::EmitUnPromotedValue(ComplexPairTy result,
+ QualType UnPromotionType) {
+ llvm::Type *ComplexElementTy =
+ ConvertType(UnPromotionType->castAs<ComplexType>()->getElementType());
+ if (result.first)
+ result.first =
+ Builder.CreateFPTrunc(result.first, ComplexElementTy, "unpromotion");
+ if (result.second)
+ result.second =
+ Builder.CreateFPTrunc(result.second, ComplexElementTy, "unpromotion");
+ return result;
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedValue(ComplexPairTy result,
+ QualType PromotionType) {
+ llvm::Type *ComplexElementTy =
+ ConvertType(PromotionType->castAs<ComplexType>()->getElementType());
+ if (result.first)
+ result.first = Builder.CreateFPExt(result.first, ComplexElementTy, "ext");
+ if (result.second)
+ result.second = Builder.CreateFPExt(result.second, ComplexElementTy, "ext");
+
+ return result;
+}
+
+ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E,
+ QualType PromotionType) {
+ E = E->IgnoreParens();
+ if (auto BO = dyn_cast<BinaryOperator>(E)) {
+ switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP) \
+ case BO_##OP: \
+ return EmitBin##OP(EmitBinOps(BO, PromotionType));
+ HANDLE_BINOP(Add)
+ HANDLE_BINOP(Sub)
+ HANDLE_BINOP(Mul)
+ HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+ default:
+ break;
+ }
+ } else if (auto UO = dyn_cast<UnaryOperator>(E)) {
+ switch (UO->getOpcode()) {
+ case UO_Minus:
+ return VisitMinus(UO, PromotionType);
+ case UO_Plus:
+ return VisitPlus(UO, PromotionType);
+ default:
+ break;
+ }
+ }
+ auto result = Visit(const_cast<Expr *>(E));
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedValue(result, PromotionType);
+ else
+ return result;
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E,
+ QualType DstTy) {
+ return ComplexExprEmitter(*this).EmitPromoted(E, DstTy);
+}
+
+ComplexPairTy
+ComplexExprEmitter::EmitPromotedComplexOperand(const Expr *E,
+ QualType OverallPromotionType) {
+ if (E->getType()->isAnyComplexType()) {
+ if (!OverallPromotionType.isNull())
+ return CGF.EmitPromotedComplexExpr(E, OverallPromotionType);
+ else
+ return Visit(const_cast<Expr *>(E));
+ } else {
+ if (!OverallPromotionType.isNull()) {
+ QualType ComplexElementTy =
+ OverallPromotionType->castAs<ComplexType>()->getElementType();
+ return ComplexPairTy(CGF.EmitPromotedScalarExpr(E, ComplexElementTy),
+ nullptr);
+ } else {
+ return ComplexPairTy(CGF.EmitScalarExpr(E), nullptr);
+ }
+ }
+}
+
ComplexExprEmitter::BinOpInfo
-ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) {
+ComplexExprEmitter::EmitBinOps(const BinaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreReal();
TestAndClearIgnoreImag();
BinOpInfo Ops;
- if (E->getLHS()->getType()->isRealFloatingType())
- Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
- else
- Ops.LHS = Visit(E->getLHS());
- if (E->getRHS()->getType()->isRealFloatingType())
- Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
- else
- Ops.RHS = Visit(E->getRHS());
- Ops.Ty = E->getType();
+ Ops.LHS = EmitPromotedComplexOperand(E->getLHS(), PromotionType);
+ Ops.RHS = EmitPromotedComplexOperand(E->getRHS(), PromotionType);
+ if (!PromotionType.isNull())
+ Ops.Ty = PromotionType;
+ else
+ Ops.Ty = E->getType();
+ Ops.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
return Ops;
}
@@ -905,41 +1045,74 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E,
if (const AtomicType *AT = LHSTy->getAs<AtomicType>())
LHSTy = AT->getValueType();
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
BinOpInfo OpInfo;
+ OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures);
// Load the RHS and LHS operands.
// __block variables need to have the rhs evaluated first, plus this should
// improve codegen a little.
- OpInfo.Ty = E->getComputationResultType();
- QualType ComplexElementTy = cast<ComplexType>(OpInfo.Ty)->getElementType();
+ QualType PromotionTypeCR;
+ PromotionTypeCR = getPromotionType(E->getComputationResultType());
+ if (PromotionTypeCR.isNull())
+ PromotionTypeCR = E->getComputationResultType();
+ OpInfo.Ty = PromotionTypeCR;
+ QualType ComplexElementTy =
+ OpInfo.Ty->castAs<ComplexType>()->getElementType();
+ QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
// The RHS should have been converted to the computation type.
if (E->getRHS()->getType()->isRealFloatingType()) {
- assert(
- CGF.getContext()
- .hasSameUnqualifiedType(ComplexElementTy, E->getRHS()->getType()));
- OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+ if (!PromotionTypeRHS.isNull())
+ OpInfo.RHS = ComplexPairTy(
+ CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS), nullptr);
+ else {
+ assert(CGF.getContext().hasSameUnqualifiedType(ComplexElementTy,
+ E->getRHS()->getType()));
+
+ OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+ }
} else {
- assert(CGF.getContext()
- .hasSameUnqualifiedType(OpInfo.Ty, E->getRHS()->getType()));
- OpInfo.RHS = Visit(E->getRHS());
+ if (!PromotionTypeRHS.isNull()) {
+ OpInfo.RHS = ComplexPairTy(
+ CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionTypeRHS));
+ } else {
+ assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty,
+ E->getRHS()->getType()));
+ OpInfo.RHS = Visit(E->getRHS());
+ }
}
LValue LHS = CGF.EmitLValue(E->getLHS());
// Load from the l-value and convert it.
SourceLocation Loc = E->getExprLoc();
+ QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
if (LHSTy->isAnyComplexType()) {
ComplexPairTy LHSVal = EmitLoadOfLValue(LHS, Loc);
- OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
+ if (!PromotionTypeLHS.isNull())
+ OpInfo.LHS =
+ EmitComplexToComplexCast(LHSVal, LHSTy, PromotionTypeLHS, Loc);
+ else
+ OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
} else {
llvm::Value *LHSVal = CGF.EmitLoadOfScalar(LHS, Loc);
// For floating point real operands we can directly pass the scalar form
// to the binary operator emission and potentially get more efficient code.
if (LHSTy->isRealFloatingType()) {
- if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
- LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
+ QualType PromotedComplexElementTy;
+ if (!PromotionTypeLHS.isNull()) {
+ PromotedComplexElementTy =
+ cast<ComplexType>(PromotionTypeLHS)->getElementType();
+ if (!CGF.getContext().hasSameUnqualifiedType(PromotedComplexElementTy,
+ PromotionTypeLHS))
+ LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy,
+ PromotedComplexElementTy, Loc);
+ } else {
+ if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
+ LHSVal =
+ CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
+ }
OpInfo.LHS = ComplexPairTy(LHSVal, nullptr);
} else {
OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index b83a87443250..c38feaaca35a 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -46,7 +47,7 @@ struct ConstantAggregateBuilderUtils {
CharUnits getAlignment(const llvm::Constant *C) const {
return CharUnits::fromQuantity(
- CGM.getDataLayout().getABITypeAlignment(C->getType()));
+ CGM.getDataLayout().getABITypeAlign(C->getType()));
}
CharUnits getSize(llvm::Type *Ty) const {
@@ -94,7 +95,7 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
bool NaturalLayout = true;
bool split(size_t Index, CharUnits Hint);
- Optional<size_t> splitAt(CharUnits Pos);
+ std::optional<size_t> splitAt(CharUnits Pos);
static llvm::Constant *buildFrom(CodeGenModule &CGM,
ArrayRef<llvm::Constant *> Elems,
@@ -158,12 +159,12 @@ bool ConstantAggregateBuilder::add(llvm::Constant *C, CharUnits Offset,
}
// Uncommon case: constant overlaps what we've already created.
- llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset);
+ std::optional<size_t> FirstElemToReplace = splitAt(Offset);
if (!FirstElemToReplace)
return false;
CharUnits CSize = getSize(C);
- llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + CSize);
+ std::optional<size_t> LastElemToReplace = splitAt(Offset + CSize);
if (!LastElemToReplace)
return false;
@@ -222,10 +223,10 @@ bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits,
// Partial byte: update the existing integer if there is one. If we
// can't split out a 1-CharUnit range to update, then we can't add
// these bits and fail the entire constant emission.
- llvm::Optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars);
+ std::optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars);
if (!FirstElemToUpdate)
return false;
- llvm::Optional<size_t> LastElemToUpdate =
+ std::optional<size_t> LastElemToUpdate =
splitAt(OffsetInChars + CharUnits::One());
if (!LastElemToUpdate)
return false;
@@ -283,8 +284,8 @@ bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits,
/// Returns a position within Elems and Offsets such that all elements
/// before the returned index end before Pos and all elements at or after
/// the returned index begin at or after Pos. Splits elements as necessary
-/// to ensure this. Returns None if we find something we can't split.
-Optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) {
+/// to ensure this. Returns std::nullopt if we find something we can't split.
+std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) {
if (Pos >= Size)
return Offsets.size();
@@ -305,7 +306,7 @@ Optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) {
// Try to decompose it into smaller constants.
if (!split(LastAtOrBeforePosIndex, Pos))
- return None;
+ return std::nullopt;
}
}
@@ -517,12 +518,12 @@ void ConstantAggregateBuilder::condense(CharUnits Offset,
llvm::Type *DesiredTy) {
CharUnits Size = getSize(DesiredTy);
- llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset);
+ std::optional<size_t> FirstElemToReplace = splitAt(Offset);
if (!FirstElemToReplace)
return;
size_t First = *FirstElemToReplace;
- llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + Size);
+ std::optional<size_t> LastElemToReplace = splitAt(Offset + Size);
if (!LastElemToReplace)
return;
size_t Last = *LastElemToReplace;
@@ -543,8 +544,8 @@ void ConstantAggregateBuilder::condense(CharUnits Offset,
}
llvm::Constant *Replacement = buildFrom(
- CGM, makeArrayRef(Elems).slice(First, Length),
- makeArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy),
+ CGM, ArrayRef(Elems).slice(First, Length),
+ ArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy),
/*known to have natural layout=*/false, DesiredTy, false);
replace(Elems, First, Last, {Replacement});
replace(Offsets, First, Last, {Offset});
@@ -913,17 +914,16 @@ bool ConstStructBuilder::UpdateStruct(ConstantEmitter &Emitter,
// ConstExprEmitter
//===----------------------------------------------------------------------===//
-static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM,
- CodeGenFunction *CGF,
- const CompoundLiteralExpr *E) {
+static ConstantAddress
+tryEmitGlobalCompoundLiteral(ConstantEmitter &emitter,
+ const CompoundLiteralExpr *E) {
+ CodeGenModule &CGM = emitter.CGM;
CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType());
if (llvm::GlobalVariable *Addr =
CGM.getAddrOfConstantCompoundLiteralIfEmitted(E))
return ConstantAddress(Addr, Addr->getValueType(), Align);
LangAS addressSpace = E->getType().getAddressSpace();
-
- ConstantEmitter emitter(CGM, CGF);
llvm::Constant *C = emitter.tryEmitForInitializer(E->getInitializer(),
addressSpace, E->getType());
if (!C) {
@@ -972,7 +972,7 @@ EmitArrayConstant(CodeGenModule &CGM, llvm::ArrayType *DesiredType,
if (CommonElementType && NonzeroLength >= 8) {
llvm::Constant *Initial = llvm::ConstantArray::get(
llvm::ArrayType::get(CommonElementType, NonzeroLength),
- makeArrayRef(Elements).take_front(NonzeroLength));
+ ArrayRef(Elements).take_front(NonzeroLength));
Elements.resize(2);
Elements[0] = Initial;
} else {
@@ -1395,15 +1395,12 @@ ConstantEmitter::tryEmitAbstract(const APValue &value, QualType destType) {
llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) {
if (!CE->hasAPValueResult())
return nullptr;
- const Expr *Inner = CE->getSubExpr()->IgnoreImplicit();
- QualType RetType;
- if (auto *Call = dyn_cast<CallExpr>(Inner))
- RetType = Call->getCallReturnType(CGM.getContext());
- else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Inner))
- RetType = Ctor->getType();
- llvm::Constant *Res =
- emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType);
- return Res;
+
+ QualType RetType = CE->getType();
+ if (CE->isGLValue())
+ RetType = CGM.getContext().getLValueReferenceType(RetType);
+
+ return emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType);
}
llvm::Constant *
@@ -1970,7 +1967,9 @@ ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) {
ConstantLValue
ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
- return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E);
+ ConstantEmitter CompoundLiteralEmitter(CGM, Emitter.CGF);
+ CompoundLiteralEmitter.setInConstantContext(Emitter.isInConstantContext());
+ return tryEmitGlobalCompoundLiteral(CompoundLiteralEmitter, E);
}
ConstantLValue
@@ -2214,7 +2213,8 @@ void CodeGenModule::setAddrOfConstantCompoundLiteral(
ConstantAddress
CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) {
assert(E->isFileScope() && "not a file-scope compound literal expr");
- return tryEmitGlobalCompoundLiteral(*this, nullptr, E);
+ ConstantEmitter emitter(*this);
+ return tryEmitGlobalCompoundLiteral(emitter, E);
}
llvm::Constant *
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index b150aaa376eb..a0dcb978b1ac 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -28,7 +28,6 @@
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFixedPoint.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -43,6 +42,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/TypeSize.h"
#include <cstdarg>
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -152,16 +152,16 @@ static bool MustVisitNullValue(const Expr *E) {
}
/// If \p E is a widened promoted integer, get its base (unpromoted) type.
-static llvm::Optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx,
- const Expr *E) {
+static std::optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx,
+ const Expr *E) {
const Expr *Base = E->IgnoreImpCasts();
if (E == Base)
- return llvm::None;
+ return std::nullopt;
QualType BaseTy = Base->getType();
- if (!BaseTy->isPromotableIntegerType() ||
+ if (!Ctx.isPromotableIntegerType(BaseTy) ||
Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType()))
- return llvm::None;
+ return std::nullopt;
return BaseTy;
}
@@ -255,7 +255,7 @@ public:
if (VD->getType()->isReferenceType()) {
if (const auto *TTy =
- dyn_cast<TypedefType>(VD->getType().getNonReferenceType()))
+ VD->getType().getNonReferenceType()->getAs<TypedefType>())
AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>();
} else {
// Assumptions for function parameters are emitted at the start of the
@@ -271,8 +271,7 @@ public:
}
if (!AVAttr)
- if (const auto *TTy =
- dyn_cast<TypedefType>(E->getType()))
+ if (const auto *TTy = E->getType()->getAs<TypedefType>())
AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>();
if (!AVAttr)
@@ -468,6 +467,9 @@ public:
return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue());
}
Value *VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E) {
+ if (E->getType()->isVoidType())
+ return nullptr;
+
return EmitNullValue(E->getType());
}
Value *VisitGNUNullExpr(const GNUNullExpr *E) {
@@ -620,16 +622,22 @@ public:
return Visit(E->getSubExpr()); // the actual value should be unused
return EmitLoadOfLValue(E);
}
- Value *VisitUnaryPlus(const UnaryOperator *E) {
- // This differs from gcc, though, most likely due to a bug in gcc.
- TestAndClearIgnoreResultAssign();
- return Visit(E->getSubExpr());
- }
- Value *VisitUnaryMinus (const UnaryOperator *E);
+
+ Value *VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitPlus(const UnaryOperator *E, QualType PromotionType);
+ Value *VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitMinus(const UnaryOperator *E, QualType PromotionType);
+
Value *VisitUnaryNot (const UnaryOperator *E);
Value *VisitUnaryLNot (const UnaryOperator *E);
- Value *VisitUnaryReal (const UnaryOperator *E);
- Value *VisitUnaryImag (const UnaryOperator *E);
+ Value *VisitUnaryReal(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitReal(const UnaryOperator *E, QualType PromotionType);
+ Value *VisitUnaryImag(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitImag(const UnaryOperator *E, QualType PromotionType);
Value *VisitUnaryExtension(const UnaryOperator *E) {
return Visit(E->getSubExpr());
}
@@ -719,7 +727,7 @@ public:
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), Ops))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
@@ -791,7 +799,13 @@ public:
// Helper functions for fixed point binary operations.
Value *EmitFixedPointBinOp(const BinOpInfo &Ops);
- BinOpInfo EmitBinOps(const BinaryOperator *E);
+ BinOpInfo EmitBinOps(const BinaryOperator *E,
+ QualType PromotionTy = QualType());
+
+ Value *EmitPromotedValue(Value *result, QualType PromotionType);
+ Value *EmitUnPromotedValue(Value *result, QualType ExprType);
+ Value *EmitPromoted(const Expr *E, QualType PromotionType);
+
LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
Value *(ScalarExprEmitter::*F)(const BinOpInfo &),
Value *&Result);
@@ -799,13 +813,28 @@ public:
Value *EmitCompoundAssign(const CompoundAssignOperator *E,
Value *(ScalarExprEmitter::*F)(const BinOpInfo &));
+ QualType getPromotionType(QualType Ty) {
+ if (auto *CT = Ty->getAs<ComplexType>()) {
+ QualType ElementType = CT->getElementType();
+ if (ElementType.UseExcessPrecision(CGF.getContext()))
+ return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+ }
+ if (Ty.UseExcessPrecision(CGF.getContext()))
+ return CGF.getContext().FloatTy;
+ return QualType();
+ }
+
// Binary operators and binary compound assignment operators.
-#define HANDLEBINOP(OP) \
- Value *VisitBin ## OP(const BinaryOperator *E) { \
- return Emit ## OP(EmitBinOps(E)); \
- } \
- Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \
- return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \
+#define HANDLEBINOP(OP) \
+ Value *VisitBin##OP(const BinaryOperator *E) { \
+ QualType promotionTy = getPromotionType(E->getType()); \
+ auto result = Emit##OP(EmitBinOps(E, promotionTy)); \
+ if (result && !promotionTy.isNull()) \
+ result = EmitUnPromotedValue(result, E->getType()); \
+ return result; \
+ } \
+ Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) { \
+ return EmitCompoundAssign(E, &ScalarExprEmitter::Emit##OP); \
}
HANDLEBINOP(Mul)
HANDLEBINOP(Div)
@@ -1599,21 +1628,14 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) {
Value *
ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
ASTContext &Context = CGF.getContext();
- llvm::Optional<LangAS> GlobalAS =
- Context.getTargetInfo().getConstantAddressSpace();
+ unsigned AddrSpace =
+ Context.getTargetAddressSpace(CGF.CGM.GetGlobalConstantAddressSpace());
llvm::Constant *GlobalConstStr = Builder.CreateGlobalStringPtr(
- E->ComputeName(Context), "__usn_str",
- static_cast<unsigned>(GlobalAS.value_or(LangAS::Default)));
+ E->ComputeName(Context), "__usn_str", AddrSpace);
- unsigned ExprAS = Context.getTargetAddressSpace(E->getType());
-
- if (GlobalConstStr->getType()->getPointerAddressSpace() == ExprAS)
- return GlobalConstStr;
-
- llvm::PointerType *PtrTy = cast<llvm::PointerType>(GlobalConstStr->getType());
- llvm::PointerType *NewPtrTy =
- llvm::PointerType::getWithSamePointeeType(PtrTy, ExprAS);
- return Builder.CreateAddrSpaceCast(GlobalConstStr, NewPtrTy, "usn_addr_cast");
+ llvm::Type *ExprTy = ConvertType(E->getType());
+ return Builder.CreatePointerBitCastOrAddrSpaceCast(GlobalConstStr, ExprTy,
+ "usn_addr_cast");
}
Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
@@ -1643,7 +1665,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
// newv = insert newv, x, i
auto *RTy = llvm::FixedVectorType::get(LTy->getElementType(),
MTy->getNumElements());
- Value* NewV = llvm::UndefValue::get(RTy);
+ Value* NewV = llvm::PoisonValue::get(RTy);
for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) {
Value *IIndx = llvm::ConstantInt::get(CGF.SizeTy, i);
Value *Indx = Builder.CreateExtractElement(Mask, IIndx, "shuf_idx");
@@ -1999,6 +2021,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Expr *E = CE->getSubExpr();
QualType DestTy = CE->getType();
CastKind Kind = CE->getCastKind();
+ CodeGenFunction::CGFPOptionsRAII FPOptions(CGF, CE);
// These cases are generally not written to ignore the result of
// evaluating their sub-expressions, so we clear this now.
@@ -2479,7 +2502,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(InVal, Amount, Name);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case LangOptions::SOB_Trapping:
if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
@@ -2584,7 +2607,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
} else if (type->isIntegerType()) {
QualType promotedType;
bool canPerformLossyDemotionCheck = false;
- if (type->isPromotableIntegerType()) {
+ if (CGF.getContext().isPromotableIntegerType(type)) {
promotedType = CGF.getContext().getPromotedIntegerType(type);
assert(promotedType != type && "Shouldn't promote to the same type.");
canPerformLossyDemotionCheck = true;
@@ -2818,10 +2841,45 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
}
+Value *ScalarExprEmitter::VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitPlus(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
-Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
+Value *ScalarExprEmitter::VisitPlus(const UnaryOperator *E,
+ QualType PromotionType) {
+ // This differs from gcc, though, most likely due to a bug in gcc.
TestAndClearIgnoreResultAssign();
- Value *Op = Visit(E->getSubExpr());
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedScalarExpr(E->getSubExpr(), PromotionType);
+ return Visit(E->getSubExpr());
+}
+
+Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitMinus(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
+
+Value *ScalarExprEmitter::VisitMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ TestAndClearIgnoreResultAssign();
+ Value *Op;
+ if (!PromotionType.isNull())
+ Op = CGF.EmitPromotedScalarExpr(E->getSubExpr(), PromotionType);
+ else
+ Op = Visit(E->getSubExpr());
// Generate a unary FNeg for FP ops.
if (Op->getType()->isFPOrFPVectorTy())
@@ -2841,7 +2899,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
Value *ScalarExprEmitter::VisitUnaryNot(const UnaryOperator *E) {
TestAndClearIgnoreResultAssign();
Value *Op = Visit(E->getSubExpr());
- return Builder.CreateNot(Op, "neg");
+ return Builder.CreateNot(Op, "not");
}
Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
@@ -3006,33 +3064,75 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
return Builder.getInt(E->EvaluateKnownConstInt(CGF.getContext()));
}
-Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E) {
+Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitReal(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
+
+Value *ScalarExprEmitter::VisitReal(const UnaryOperator *E,
+ QualType PromotionType) {
Expr *Op = E->getSubExpr();
if (Op->getType()->isAnyComplexType()) {
// If it's an l-value, load through the appropriate subobject l-value.
// Note that we have to ask E because Op might be an l-value that
// this won't work for, e.g. an Obj-C property.
- if (E->isGLValue())
- return CGF.EmitLoadOfLValue(CGF.EmitLValue(E),
- E->getExprLoc()).getScalarVal();
-
+ if (E->isGLValue()) {
+ if (!PromotionType.isNull()) {
+ CodeGenFunction::ComplexPairTy result = CGF.EmitComplexExpr(
+ Op, /*IgnoreReal*/ IgnoreResultAssign, /*IgnoreImag*/ true);
+ if (result.first)
+ result.first = CGF.EmitPromotedValue(result, PromotionType).first;
+ return result.first;
+ } else {
+ return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc())
+ .getScalarVal();
+ }
+ }
// Otherwise, calculate and project.
return CGF.EmitComplexExpr(Op, false, true).first;
}
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedScalarExpr(Op, PromotionType);
return Visit(Op);
}
-Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
+Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitImag(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
+
+Value *ScalarExprEmitter::VisitImag(const UnaryOperator *E,
+ QualType PromotionType) {
Expr *Op = E->getSubExpr();
if (Op->getType()->isAnyComplexType()) {
// If it's an l-value, load through the appropriate subobject l-value.
// Note that we have to ask E because Op might be an l-value that
// this won't work for, e.g. an Obj-C property.
- if (Op->isGLValue())
- return CGF.EmitLoadOfLValue(CGF.EmitLValue(E),
- E->getExprLoc()).getScalarVal();
-
+ if (Op->isGLValue()) {
+ if (!PromotionType.isNull()) {
+ CodeGenFunction::ComplexPairTy result = CGF.EmitComplexExpr(
+ Op, /*IgnoreReal*/ true, /*IgnoreImag*/ IgnoreResultAssign);
+ if (result.second)
+ result.second = CGF.EmitPromotedValue(result, PromotionType).second;
+ return result.second;
+ } else {
+ return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc())
+ .getScalarVal();
+ }
+ }
// Otherwise, calculate and project.
return CGF.EmitComplexExpr(Op, true, false).second;
}
@@ -3041,8 +3141,12 @@ Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
// effects are evaluated, but not the actual value.
if (Op->isGLValue())
CGF.EmitLValue(Op);
+ else if (!PromotionType.isNull())
+ CGF.EmitPromotedScalarExpr(Op, PromotionType);
else
CGF.EmitScalarExpr(Op, true);
+ if (!PromotionType.isNull())
+ return llvm::Constant::getNullValue(ConvertType(PromotionType));
return llvm::Constant::getNullValue(ConvertType(E->getType()));
}
@@ -3050,12 +3154,65 @@ Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
// Binary Operators
//===----------------------------------------------------------------------===//
-BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
+Value *ScalarExprEmitter::EmitPromotedValue(Value *result,
+ QualType PromotionType) {
+ return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext");
+}
+
+Value *ScalarExprEmitter::EmitUnPromotedValue(Value *result,
+ QualType ExprType) {
+ return CGF.Builder.CreateFPTrunc(result, ConvertType(ExprType), "unpromotion");
+}
+
+Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) {
+ E = E->IgnoreParens();
+ if (auto BO = dyn_cast<BinaryOperator>(E)) {
+ switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP) \
+ case BO_##OP: \
+ return Emit##OP(EmitBinOps(BO, PromotionType));
+ HANDLE_BINOP(Add)
+ HANDLE_BINOP(Sub)
+ HANDLE_BINOP(Mul)
+ HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+ default:
+ break;
+ }
+ } else if (auto UO = dyn_cast<UnaryOperator>(E)) {
+ switch (UO->getOpcode()) {
+ case UO_Imag:
+ return VisitImag(UO, PromotionType);
+ case UO_Real:
+ return VisitReal(UO, PromotionType);
+ case UO_Minus:
+ return VisitMinus(UO, PromotionType);
+ case UO_Plus:
+ return VisitPlus(UO, PromotionType);
+ default:
+ break;
+ }
+ }
+ auto result = Visit(const_cast<Expr *>(E));
+ if (result) {
+ if (!PromotionType.isNull())
+ return EmitPromotedValue(result, PromotionType);
+ else
+ return EmitUnPromotedValue(result, E->getType());
+ }
+ return result;
+}
+
+BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreResultAssign();
BinOpInfo Result;
- Result.LHS = Visit(E->getLHS());
- Result.RHS = Visit(E->getRHS());
- Result.Ty = E->getType();
+ Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType);
+ Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+ if (!PromotionType.isNull())
+ Result.Ty = PromotionType;
+ else
+ Result.Ty = E->getType();
Result.Opcode = E->getOpcode();
Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
Result.E = E;
@@ -3074,8 +3231,18 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
// Emit the RHS first. __block variables need to have the rhs evaluated
// first, plus this should improve codegen a little.
- OpInfo.RHS = Visit(E->getRHS());
- OpInfo.Ty = E->getComputationResultType();
+
+ QualType PromotionTypeCR;
+ PromotionTypeCR = getPromotionType(E->getComputationResultType());
+ if (PromotionTypeCR.isNull())
+ PromotionTypeCR = E->getComputationResultType();
+ QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
+ QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
+ if (!PromotionTypeRHS.isNull())
+ OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS);
+ else
+ OpInfo.RHS = Visit(E->getRHS());
+ OpInfo.Ty = PromotionTypeCR;
OpInfo.Opcode = E->getOpcode();
OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
OpInfo.E = E;
@@ -3154,16 +3321,20 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures);
SourceLocation Loc = E->getExprLoc();
- OpInfo.LHS =
- EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc);
+ if (!PromotionTypeLHS.isNull())
+ OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionTypeLHS,
+ E->getExprLoc());
+ else
+ OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
+ E->getComputationLHSType(), Loc);
// Expand the binary operator.
Result = (this->*Func)(OpInfo);
// Convert the result back to the LHS type,
// potentially with Implicit Conversion sanitizer check.
- Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
- Loc, ScalarConversionOpts(CGF.SanOpts));
+ Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc,
+ ScalarConversionOpts(CGF.SanOpts));
if (atomicPHI) {
llvm::BasicBlock *curBlock = Builder.GetInsertBlock();
@@ -3651,7 +3822,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
@@ -3801,7 +3972,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
@@ -4759,8 +4930,7 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF,
Value *Src, unsigned NumElementsDst) {
static constexpr int Mask[] = {0, 1, 2, -1};
- return Builder.CreateShuffleVector(Src,
- llvm::makeArrayRef(Mask, NumElementsDst));
+ return Builder.CreateShuffleVector(Src, llvm::ArrayRef(Mask, NumElementsDst));
}
// Create cast instructions for converting LLVM value \p Src to LLVM type \p
@@ -4897,6 +5067,16 @@ Value *CodeGenFunction::EmitComplexToScalarConversion(ComplexPairTy Src,
}
+Value *
+CodeGenFunction::EmitPromotedScalarExpr(const Expr *E,
+ QualType PromotionType) {
+ if (!PromotionType.isNull())
+ return ScalarExprEmitter(*this).EmitPromoted(E, PromotionType);
+ else
+ return ScalarExprEmitter(*this).Visit(const_cast<Expr *>(E));
+}
+
+
llvm::Value *CodeGenFunction::
EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index fdd2fa18bb4a..c39e0cc75f2d 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -162,7 +162,7 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
// amdgpu
llvm::Constant *Size =
llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()),
- static_cast<uint32_t>(r.second.getFixedSize()));
+ static_cast<uint32_t>(r.second.getFixedValue()));
Vec.push_back(Size);
}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 7dfcc65969a8..5882f491d597 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -13,16 +13,22 @@
//===----------------------------------------------------------------------===//
#include "CGHLSLRuntime.h"
+#include "CGDebugInfo.h"
#include "CodeGenModule.h"
+#include "clang/AST/Decl.h"
#include "clang/Basic/TargetOptions.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/FormatVariadic.h"
using namespace clang;
using namespace CodeGen;
+using namespace clang::hlsl;
using namespace llvm;
namespace {
+
void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) {
// The validation of ValVersionStr is done at HLSLToolChain::TranslateArgs.
// Assume ValVersionStr is legal here.
@@ -39,14 +45,415 @@ void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) {
IRBuilder<> B(M.getContext());
MDNode *Val = MDNode::get(Ctx, {ConstantAsMetadata::get(B.getInt32(Major)),
ConstantAsMetadata::get(B.getInt32(Minor))});
- StringRef DxilValKey = "dx.valver";
- M.addModuleFlag(llvm::Module::ModFlagBehavior::AppendUnique, DxilValKey, Val);
+ StringRef DXILValKey = "dx.valver";
+ auto *DXILValMD = M.getOrInsertNamedMetadata(DXILValKey);
+ DXILValMD->addOperand(Val);
+}
+void addDisableOptimizations(llvm::Module &M) {
+ StringRef Key = "dx.disable_optimizations";
+ M.addModuleFlag(llvm::Module::ModFlagBehavior::Override, Key, 1);
+}
+// cbuffer will be translated into global variable in special address space.
+// If translate into C,
+// cbuffer A {
+// float a;
+// float b;
+// }
+// float foo() { return a + b; }
+//
+// will be translated into
+//
+// struct A {
+// float a;
+// float b;
+// } cbuffer_A __attribute__((address_space(4)));
+// float foo() { return cbuffer_A.a + cbuffer_A.b; }
+//
+// layoutBuffer will create the struct A type.
+// replaceBuffer will replace use of global variable a and b with cbuffer_A.a
+// and cbuffer_A.b.
+//
+void layoutBuffer(CGHLSLRuntime::Buffer &Buf, const DataLayout &DL) {
+ if (Buf.Constants.empty())
+ return;
+
+ std::vector<llvm::Type *> EltTys;
+ for (auto &Const : Buf.Constants) {
+ GlobalVariable *GV = Const.first;
+ Const.second = EltTys.size();
+ llvm::Type *Ty = GV->getValueType();
+ EltTys.emplace_back(Ty);
+ }
+ Buf.LayoutStruct = llvm::StructType::get(EltTys[0]->getContext(), EltTys);
+}
+
+GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) {
+ // Create global variable for CB.
+ GlobalVariable *CBGV = new GlobalVariable(
+ Buf.LayoutStruct, /*isConstant*/ true,
+ GlobalValue::LinkageTypes::ExternalLinkage, nullptr,
+ llvm::formatv("{0}{1}", Buf.Name, Buf.IsCBuffer ? ".cb." : ".tb."),
+ GlobalValue::NotThreadLocal);
+
+ IRBuilder<> B(CBGV->getContext());
+ Value *ZeroIdx = B.getInt32(0);
+ // Replace Const use with CB use.
+ for (auto &[GV, Offset] : Buf.Constants) {
+ Value *GEP =
+ B.CreateGEP(Buf.LayoutStruct, CBGV, {ZeroIdx, B.getInt32(Offset)});
+
+ assert(Buf.LayoutStruct->getElementType(Offset) == GV->getValueType() &&
+ "constant type mismatch");
+
+ // Replace.
+ GV->replaceAllUsesWith(GEP);
+ // Erase GV.
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ }
+ return CBGV;
}
+
} // namespace
+void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) {
+ if (D->getStorageClass() == SC_Static) {
+ // For static inside cbuffer, take as global static.
+ // Don't add to cbuffer.
+ CGM.EmitGlobal(D);
+ return;
+ }
+
+ auto *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(D));
+ // Add debug info for constVal.
+ if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
+ if (CGM.getCodeGenOpts().getDebugInfo() >=
+ codegenoptions::DebugInfoKind::LimitedDebugInfo)
+ DI->EmitGlobalVariable(cast<GlobalVariable>(GV), D);
+
+ // FIXME: support packoffset.
+ // See https://github.com/llvm/llvm-project/issues/57914.
+ uint32_t Offset = 0;
+ bool HasUserOffset = false;
+
+ unsigned LowerBound = HasUserOffset ? Offset : UINT_MAX;
+ CB.Constants.emplace_back(std::make_pair(GV, LowerBound));
+}
+
+void CGHLSLRuntime::addBufferDecls(const DeclContext *DC, Buffer &CB) {
+ for (Decl *it : DC->decls()) {
+ if (auto *ConstDecl = dyn_cast<VarDecl>(it)) {
+ addConstant(ConstDecl, CB);
+ } else if (isa<CXXRecordDecl, EmptyDecl>(it)) {
+ // Nothing to do for this declaration.
+ } else if (isa<FunctionDecl>(it)) {
+ // A function within an cbuffer is effectively a top-level function,
+ // as it only refers to globally scoped declarations.
+ CGM.EmitTopLevelDecl(it);
+ }
+ }
+}
+
+void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *D) {
+ Buffers.emplace_back(Buffer(D));
+ addBufferDecls(D, Buffers.back());
+}
+
void CGHLSLRuntime::finishCodeGen() {
auto &TargetOpts = CGM.getTarget().getTargetOpts();
+ llvm::Module &M = CGM.getModule();
+ Triple T(M.getTargetTriple());
+ if (T.getArch() == Triple::ArchType::dxil)
+ addDxilValVersion(TargetOpts.DxilValidatorVersion, M);
+
+ generateGlobalCtorDtorCalls();
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ addDisableOptimizations(M);
+
+ const DataLayout &DL = M.getDataLayout();
+
+ for (auto &Buf : Buffers) {
+ layoutBuffer(Buf, DL);
+ GlobalVariable *GV = replaceBuffer(Buf);
+ M.getGlobalList().push_back(GV);
+ llvm::hlsl::ResourceClass RC = Buf.IsCBuffer
+ ? llvm::hlsl::ResourceClass::CBuffer
+ : llvm::hlsl::ResourceClass::SRV;
+ llvm::hlsl::ResourceKind RK = Buf.IsCBuffer
+ ? llvm::hlsl::ResourceKind::CBuffer
+ : llvm::hlsl::ResourceKind::TBuffer;
+ std::string TyName =
+ Buf.Name.str() + (Buf.IsCBuffer ? ".cb." : ".tb.") + "ty";
+ addBufferResourceAnnotation(GV, TyName, RC, RK, Buf.Binding);
+ }
+}
+
+CGHLSLRuntime::Buffer::Buffer(const HLSLBufferDecl *D)
+ : Name(D->getName()), IsCBuffer(D->isCBuffer()),
+ Binding(D->getAttr<HLSLResourceBindingAttr>()) {}
+void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
+ llvm::StringRef TyName,
+ llvm::hlsl::ResourceClass RC,
+ llvm::hlsl::ResourceKind RK,
+ BufferResBinding &Binding) {
llvm::Module &M = CGM.getModule();
- addDxilValVersion(TargetOpts.DxilValidatorVersion, M);
+
+ NamedMDNode *ResourceMD = nullptr;
+ switch (RC) {
+ case llvm::hlsl::ResourceClass::UAV:
+ ResourceMD = M.getOrInsertNamedMetadata("hlsl.uavs");
+ break;
+ case llvm::hlsl::ResourceClass::SRV:
+ ResourceMD = M.getOrInsertNamedMetadata("hlsl.srvs");
+ break;
+ case llvm::hlsl::ResourceClass::CBuffer:
+ ResourceMD = M.getOrInsertNamedMetadata("hlsl.cbufs");
+ break;
+ default:
+ assert(false && "Unsupported buffer type!");
+ return;
+ }
+
+ assert(ResourceMD != nullptr &&
+ "ResourceMD must have been set by the switch above.");
+
+ llvm::hlsl::FrontendResource Res(
+ GV, TyName, RK, Binding.Reg.value_or(UINT_MAX), Binding.Space);
+ ResourceMD->addOperand(Res.getMetadata());
+}
+
+static llvm::hlsl::ResourceKind
+castResourceShapeToResourceKind(HLSLResourceAttr::ResourceKind RK) {
+ switch (RK) {
+ case HLSLResourceAttr::ResourceKind::Texture1D:
+ return llvm::hlsl::ResourceKind::Texture1D;
+ case HLSLResourceAttr::ResourceKind::Texture2D:
+ return llvm::hlsl::ResourceKind::Texture2D;
+ case HLSLResourceAttr::ResourceKind::Texture2DMS:
+ return llvm::hlsl::ResourceKind::Texture2DMS;
+ case HLSLResourceAttr::ResourceKind::Texture3D:
+ return llvm::hlsl::ResourceKind::Texture3D;
+ case HLSLResourceAttr::ResourceKind::TextureCube:
+ return llvm::hlsl::ResourceKind::TextureCube;
+ case HLSLResourceAttr::ResourceKind::Texture1DArray:
+ return llvm::hlsl::ResourceKind::Texture1DArray;
+ case HLSLResourceAttr::ResourceKind::Texture2DArray:
+ return llvm::hlsl::ResourceKind::Texture2DArray;
+ case HLSLResourceAttr::ResourceKind::Texture2DMSArray:
+ return llvm::hlsl::ResourceKind::Texture2DMSArray;
+ case HLSLResourceAttr::ResourceKind::TextureCubeArray:
+ return llvm::hlsl::ResourceKind::TextureCubeArray;
+ case HLSLResourceAttr::ResourceKind::TypedBuffer:
+ return llvm::hlsl::ResourceKind::TypedBuffer;
+ case HLSLResourceAttr::ResourceKind::RawBuffer:
+ return llvm::hlsl::ResourceKind::RawBuffer;
+ case HLSLResourceAttr::ResourceKind::StructuredBuffer:
+ return llvm::hlsl::ResourceKind::StructuredBuffer;
+ case HLSLResourceAttr::ResourceKind::CBufferKind:
+ return llvm::hlsl::ResourceKind::CBuffer;
+ case HLSLResourceAttr::ResourceKind::SamplerKind:
+ return llvm::hlsl::ResourceKind::Sampler;
+ case HLSLResourceAttr::ResourceKind::TBuffer:
+ return llvm::hlsl::ResourceKind::TBuffer;
+ case HLSLResourceAttr::ResourceKind::RTAccelerationStructure:
+ return llvm::hlsl::ResourceKind::RTAccelerationStructure;
+ case HLSLResourceAttr::ResourceKind::FeedbackTexture2D:
+ return llvm::hlsl::ResourceKind::FeedbackTexture2D;
+ case HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray:
+ return llvm::hlsl::ResourceKind::FeedbackTexture2DArray;
+ }
+ // Make sure to update HLSLResourceAttr::ResourceKind when add new Kind to
+ // hlsl::ResourceKind. Assume FeedbackTexture2DArray is the last enum for
+ // HLSLResourceAttr::ResourceKind.
+ static_assert(
+ static_cast<uint32_t>(
+ HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray) ==
+ (static_cast<uint32_t>(llvm::hlsl::ResourceKind::NumEntries) - 2));
+ llvm_unreachable("all switch cases should be covered");
+}
+
+void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) {
+ const Type *Ty = D->getType()->getPointeeOrArrayElementType();
+ if (!Ty)
+ return;
+ const auto *RD = Ty->getAsCXXRecordDecl();
+ if (!RD)
+ return;
+ const auto *Attr = RD->getAttr<HLSLResourceAttr>();
+ if (!Attr)
+ return;
+
+ HLSLResourceAttr::ResourceClass RC = Attr->getResourceType();
+ llvm::hlsl::ResourceKind RK =
+ castResourceShapeToResourceKind(Attr->getResourceShape());
+
+ QualType QT(Ty, 0);
+ BufferResBinding Binding(D->getAttr<HLSLResourceBindingAttr>());
+ addBufferResourceAnnotation(GV, QT.getAsString(),
+ static_cast<llvm::hlsl::ResourceClass>(RC), RK,
+ Binding);
+}
+
+CGHLSLRuntime::BufferResBinding::BufferResBinding(
+ HLSLResourceBindingAttr *Binding) {
+ if (Binding) {
+ llvm::APInt RegInt(64, 0);
+ Binding->getSlot().substr(1).getAsInteger(10, RegInt);
+ Reg = RegInt.getLimitedValue();
+ llvm::APInt SpaceInt(64, 0);
+ Binding->getSpace().substr(5).getAsInteger(10, SpaceInt);
+ Space = SpaceInt.getLimitedValue();
+ } else {
+ Space = 0;
+ }
+}
+
+void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
+ const FunctionDecl *FD, llvm::Function *Fn) {
+ const auto *ShaderAttr = FD->getAttr<HLSLShaderAttr>();
+ assert(ShaderAttr && "All entry functions must have a HLSLShaderAttr");
+ const StringRef ShaderAttrKindStr = "hlsl.shader";
+ Fn->addFnAttr(ShaderAttrKindStr,
+ ShaderAttr->ConvertShaderTypeToStr(ShaderAttr->getType()));
+ if (HLSLNumThreadsAttr *NumThreadsAttr = FD->getAttr<HLSLNumThreadsAttr>()) {
+ const StringRef NumThreadsKindStr = "hlsl.numthreads";
+ std::string NumThreadsStr =
+ formatv("{0},{1},{2}", NumThreadsAttr->getX(), NumThreadsAttr->getY(),
+ NumThreadsAttr->getZ());
+ Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
+ }
+}
+
+static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
+ if (const auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ Value *Result = PoisonValue::get(Ty);
+ for (unsigned I = 0; I < VT->getNumElements(); ++I) {
+ Value *Elt = B.CreateCall(F, {B.getInt32(I)});
+ Result = B.CreateInsertElement(Result, Elt, I);
+ }
+ return Result;
+ }
+ return B.CreateCall(F, {B.getInt32(0)});
+}
+
+llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B,
+ const ParmVarDecl &D,
+ llvm::Type *Ty) {
+ assert(D.hasAttrs() && "Entry parameter missing annotation attribute!");
+ if (D.hasAttr<HLSLSV_GroupIndexAttr>()) {
+ llvm::Function *DxGroupIndex =
+ CGM.getIntrinsic(Intrinsic::dx_flattened_thread_id_in_group);
+ return B.CreateCall(FunctionCallee(DxGroupIndex));
+ }
+ if (D.hasAttr<HLSLSV_DispatchThreadIDAttr>()) {
+ llvm::Function *DxThreadID = CGM.getIntrinsic(Intrinsic::dx_thread_id);
+ return buildVectorInput(B, DxThreadID, Ty);
+ }
+ assert(false && "Unhandled parameter attribute");
+ return nullptr;
+}
+
+void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
+ llvm::Function *Fn) {
+ llvm::Module &M = CGM.getModule();
+ llvm::LLVMContext &Ctx = M.getContext();
+ auto *EntryTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
+ Function *EntryFn =
+ Function::Create(EntryTy, Function::ExternalLinkage, FD->getName(), &M);
+
+ // Copy function attributes over, we have no argument or return attributes
+ // that can be valid on the real entry.
+ AttributeList NewAttrs = AttributeList::get(Ctx, AttributeList::FunctionIndex,
+ Fn->getAttributes().getFnAttrs());
+ EntryFn->setAttributes(NewAttrs);
+ setHLSLEntryAttributes(FD, EntryFn);
+
+ // Set the called function as internal linkage.
+ Fn->setLinkage(GlobalValue::InternalLinkage);
+
+ BasicBlock *BB = BasicBlock::Create(Ctx, "entry", EntryFn);
+ IRBuilder<> B(BB);
+ llvm::SmallVector<Value *> Args;
+ // FIXME: support struct parameters where semantics are on members.
+ // See: https://github.com/llvm/llvm-project/issues/57874
+ unsigned SRetOffset = 0;
+ for (const auto &Param : Fn->args()) {
+ if (Param.hasStructRetAttr()) {
+ // FIXME: support output.
+ // See: https://github.com/llvm/llvm-project/issues/57874
+ SRetOffset = 1;
+ Args.emplace_back(PoisonValue::get(Param.getType()));
+ continue;
+ }
+ const ParmVarDecl *PD = FD->getParamDecl(Param.getArgNo() - SRetOffset);
+ Args.push_back(emitInputSemantic(B, *PD, Param.getType()));
+ }
+
+ CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args);
+ (void)CI;
+ // FIXME: Handle codegen for return type semantics.
+ // See: https://github.com/llvm/llvm-project/issues/57875
+ B.CreateRetVoid();
+}
+
+static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
+ bool CtorOrDtor) {
+ const auto *GV =
+ M.getNamedGlobal(CtorOrDtor ? "llvm.global_ctors" : "llvm.global_dtors");
+ if (!GV)
+ return;
+ const auto *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA)
+ return;
+ // The global_ctor array elements are a struct [Priority, Fn *, COMDat].
+ // HLSL neither supports priorities or COMDat values, so we will check those
+ // in an assert but not handle them.
+
+ llvm::SmallVector<Function *> CtorFns;
+ for (const auto &Ctor : CA->operands()) {
+ if (isa<ConstantAggregateZero>(Ctor))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(Ctor);
+
+ assert(cast<ConstantInt>(CS->getOperand(0))->getValue() == 65535 &&
+ "HLSL doesn't support setting priority for global ctors.");
+ assert(isa<ConstantPointerNull>(CS->getOperand(2)) &&
+ "HLSL doesn't support COMDat for global ctors.");
+ Fns.push_back(cast<Function>(CS->getOperand(1)));
+ }
+}
+
+void CGHLSLRuntime::generateGlobalCtorDtorCalls() {
+ llvm::Module &M = CGM.getModule();
+ SmallVector<Function *> CtorFns;
+ SmallVector<Function *> DtorFns;
+ gatherFunctions(CtorFns, M, true);
+ gatherFunctions(DtorFns, M, false);
+
+ // Insert a call to the global constructor at the beginning of the entry block
+ // to externally exported functions. This is a bit of a hack, but HLSL allows
+ // global constructors, but doesn't support driver initialization of globals.
+ for (auto &F : M.functions()) {
+ if (!F.hasFnAttribute("hlsl.shader"))
+ continue;
+ IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin());
+ for (auto *Fn : CtorFns)
+ B.CreateCall(FunctionCallee(Fn));
+
+ // Insert global dtors before the terminator of the last instruction
+ B.SetInsertPoint(F.back().getTerminator());
+ for (auto *Fn : DtorFns)
+ B.CreateCall(FunctionCallee(Fn));
+ }
+
+ // No need to keep global ctors/dtors for non-lib profile after call to
+ // ctors/dtors added for entry.
+ Triple T(M.getTargetTriple());
+ if (T.getEnvironment() != Triple::EnvironmentType::Library) {
+ if (auto *GV = M.getNamedGlobal("llvm.global_ctors"))
+ GV->eraseFromParent();
+ if (auto *GV = M.getNamedGlobal("llvm.global_dtors"))
+ GV->eraseFromParent();
+ }
}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 268810f2ec9e..67413fbd4a78 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -15,21 +15,88 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
+#include "llvm/IR/IRBuilder.h"
+
+#include "clang/Basic/HLSLRuntime.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
+
+#include <optional>
+#include <vector>
+
+namespace llvm {
+class GlobalVariable;
+class Function;
+class StructType;
+} // namespace llvm
+
namespace clang {
+class VarDecl;
+class ParmVarDecl;
+class HLSLBufferDecl;
+class HLSLResourceBindingAttr;
+class Type;
+class DeclContext;
+
+class FunctionDecl;
namespace CodeGen {
class CodeGenModule;
class CGHLSLRuntime {
+public:
+ struct BufferResBinding {
+ // The ID like 2 in register(b2, space1).
+ std::optional<unsigned> Reg;
+ // The Space like 1 is register(b2, space1).
+ // Default value is 0.
+ unsigned Space;
+ BufferResBinding(HLSLResourceBindingAttr *Attr);
+ };
+ struct Buffer {
+ Buffer(const HLSLBufferDecl *D);
+ llvm::StringRef Name;
+ // IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer).
+ bool IsCBuffer;
+ BufferResBinding Binding;
+ // Global variable and offset for each constant.
+ std::vector<std::pair<llvm::GlobalVariable *, unsigned>> Constants;
+ llvm::StructType *LayoutStruct = nullptr;
+ };
+
protected:
CodeGenModule &CGM;
+ llvm::Value *emitInputSemantic(llvm::IRBuilder<> &B, const ParmVarDecl &D,
+ llvm::Type *Ty);
+
public:
CGHLSLRuntime(CodeGenModule &CGM) : CGM(CGM) {}
virtual ~CGHLSLRuntime() {}
+ void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV);
+ void generateGlobalCtorDtorCalls();
+
+ void addBuffer(const HLSLBufferDecl *D);
void finishCodeGen();
+
+ void setHLSLEntryAttributes(const FunctionDecl *FD, llvm::Function *Fn);
+
+ void emitEntryFunction(const FunctionDecl *FD, llvm::Function *Fn);
+ void setHLSLFunctionAttributes(llvm::Function *, const FunctionDecl *);
+
+private:
+ void addBufferResourceAnnotation(llvm::GlobalVariable *GV,
+ llvm::StringRef TyName,
+ llvm::hlsl::ResourceClass RC,
+ llvm::hlsl::ResourceKind RK,
+ BufferResBinding &Binding);
+ void addConstant(VarDecl *D, Buffer &CB);
+ void addBufferDecls(const DeclContext *DC, Buffer &CB);
+ llvm::SmallVector<Buffer> Buffers;
};
} // namespace CodeGen
diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp
index 12a6cd8da603..e5d9db273c2d 100644
--- a/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
+#include <optional>
using namespace clang::CodeGen;
using namespace llvm;
@@ -37,7 +38,7 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
- Optional<bool> Enabled;
+ std::optional<bool> Enabled;
if (Attrs.PipelineDisabled)
Enabled = false;
else if (Attrs.PipelineInitiationInterval != 0)
@@ -82,11 +83,11 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
- Optional<bool> Enabled;
+ std::optional<bool> Enabled;
if (Attrs.UnrollEnable == LoopAttributes::Disable)
Enabled = false;
else if (Attrs.UnrollEnable == LoopAttributes::Full)
- Enabled = None;
+ Enabled = std::nullopt;
else if (Attrs.UnrollEnable != LoopAttributes::Unspecified ||
Attrs.UnrollCount != 0)
Enabled = true;
@@ -144,7 +145,7 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
- Optional<bool> Enabled;
+ std::optional<bool> Enabled;
if (Attrs.UnrollAndJamEnable == LoopAttributes::Disable)
Enabled = false;
else if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable ||
@@ -212,7 +213,7 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
- Optional<bool> Enabled;
+ std::optional<bool> Enabled;
if (Attrs.VectorizeEnable == LoopAttributes::Disable)
Enabled = false;
else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified ||
@@ -330,7 +331,7 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
- Optional<bool> Enabled;
+ std::optional<bool> Enabled;
if (Attrs.DistributeEnable == LoopAttributes::Disable)
Enabled = false;
if (Attrs.DistributeEnable == LoopAttributes::Enable)
@@ -380,7 +381,7 @@ MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
- Optional<bool> Enabled;
+ std::optional<bool> Enabled;
if (Attrs.UnrollEnable == LoopAttributes::Disable)
Enabled = false;
else if (Attrs.UnrollEnable == LoopAttributes::Full)
@@ -496,7 +497,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
!EndLoc && !Attrs.MustProgress)
return;
- TempLoopID = MDNode::getTemporary(Header->getContext(), None);
+ TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt);
}
void LoopInfo::finish() {
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 1b6acb2b7212..7df2088a81d7 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -22,11 +22,14 @@
#include "clang/AST/StmtObjC.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/CodeGenABITypes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -138,7 +141,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc());
cast<llvm::LoadInst>(Ptr)->setMetadata(
CGM.getModule().getMDKindID("invariant.load"),
- llvm::MDNode::get(getLLVMContext(), None));
+ llvm::MDNode::get(getLLVMContext(), std::nullopt));
return Builder.CreateBitCast(Ptr, ConvertType(E->getType()));
}
@@ -370,16 +373,14 @@ static const Expr *findWeakLValue(const Expr *E) {
///
/// If the runtime does support a required entrypoint, then this method will
/// generate a call and return the resulting value. Otherwise it will return
-/// None and the caller can generate a msgSend instead.
-static Optional<llvm::Value *>
-tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
- llvm::Value *Receiver,
- const CallArgList& Args, Selector Sel,
- const ObjCMethodDecl *method,
- bool isClassMessage) {
+/// std::nullopt and the caller can generate a msgSend instead.
+static std::optional<llvm::Value *> tryGenerateSpecializedMessageSend(
+ CodeGenFunction &CGF, QualType ResultType, llvm::Value *Receiver,
+ const CallArgList &Args, Selector Sel, const ObjCMethodDecl *method,
+ bool isClassMessage) {
auto &CGM = CGF.CGM;
if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls)
- return None;
+ return std::nullopt;
auto &Runtime = CGM.getLangOpts().ObjCRuntime;
switch (Sel.getMethodFamily()) {
@@ -400,7 +401,7 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
if (isa<llvm::ConstantPointerNull>(arg))
return CGF.EmitObjCAllocWithZone(Receiver,
CGF.ConvertType(ResultType));
- return None;
+ return std::nullopt;
}
}
break;
@@ -431,7 +432,7 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
default:
break;
}
- return None;
+ return std::nullopt;
}
CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend(
@@ -439,7 +440,7 @@ CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend(
Selector Sel, llvm::Value *Receiver, const CallArgList &Args,
const ObjCInterfaceDecl *OID, const ObjCMethodDecl *Method,
bool isClassMessage) {
- if (Optional<llvm::Value *> SpecializedResult =
+ if (std::optional<llvm::Value *> SpecializedResult =
tryGenerateSpecializedMessageSend(CGF, ResultType, Receiver, Args,
Sel, Method, isClassMessage)) {
return RValue::get(*SpecializedResult);
@@ -520,36 +521,36 @@ CGObjCRuntime::GetRuntimeProtocolList(ObjCProtocolDecl::protocol_iterator begin,
/// Instead of '[[MyClass alloc] init]', try to generate
/// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the
/// caller side, as well as the optimized objc_alloc.
-static Optional<llvm::Value *>
+static std::optional<llvm::Value *>
tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) {
auto &Runtime = CGF.getLangOpts().ObjCRuntime;
if (!Runtime.shouldUseRuntimeFunctionForCombinedAllocInit())
- return None;
+ return std::nullopt;
// Match the exact pattern '[[MyClass alloc] init]'.
Selector Sel = OME->getSelector();
if (OME->getReceiverKind() != ObjCMessageExpr::Instance ||
!OME->getType()->isObjCObjectPointerType() || !Sel.isUnarySelector() ||
Sel.getNameForSlot(0) != "init")
- return None;
+ return std::nullopt;
// Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]'
// with 'cls' a Class.
auto *SubOME =
dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParenCasts());
if (!SubOME)
- return None;
+ return std::nullopt;
Selector SubSel = SubOME->getSelector();
if (!SubOME->getType()->isObjCObjectPointerType() ||
!SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc")
- return None;
+ return std::nullopt;
llvm::Value *Receiver = nullptr;
switch (SubOME->getReceiverKind()) {
case ObjCMessageExpr::Instance:
if (!SubOME->getInstanceReceiver()->getType()->isObjCClassType())
- return None;
+ return std::nullopt;
Receiver = CGF.EmitScalarExpr(SubOME->getInstanceReceiver());
break;
@@ -563,7 +564,7 @@ tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) {
}
case ObjCMessageExpr::SuperInstance:
case ObjCMessageExpr::SuperClass:
- return None;
+ return std::nullopt;
}
return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType()));
@@ -590,7 +591,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
}
}
- if (Optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E))
+ if (std::optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E))
return AdjustObjCObjectType(*this, E->getType(), RValue::get(*Val));
// We don't retain the receiver in delegate init calls, and this is
@@ -768,7 +769,8 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
}
args.push_back(OMD->getSelfDecl());
- args.push_back(OMD->getCmdDecl());
+ if (!OMD->isDirectMethod())
+ args.push_back(OMD->getCmdDecl());
args.append(OMD->param_begin(), OMD->param_end());
@@ -1110,11 +1112,47 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF,
callee, ReturnValueSlot(), args);
}
+// emitCmdValueForGetterSetterBody - Handle emitting the load necessary for
+// the `_cmd` selector argument for getter/setter bodies. For direct methods,
+// this returns an undefined/poison value; this matches behavior prior to `_cmd`
+// being removed from the direct method ABI as the getter/setter caller would
+// never load one. For non-direct methods, this emits a load of the implicit
+// `_cmd` storage.
+static llvm::Value *emitCmdValueForGetterSetterBody(CodeGenFunction &CGF,
+ ObjCMethodDecl *MD) {
+ if (MD->isDirectMethod()) {
+ // Direct methods do not have a `_cmd` argument. Emit an undefined/poison
+ // value. This will be passed to objc_getProperty/objc_setProperty, which
+ // has not appeared bothered by the `_cmd` argument being undefined before.
+ llvm::Type *selType = CGF.ConvertType(CGF.getContext().getObjCSelType());
+ return llvm::PoisonValue::get(selType);
+ }
+
+ return CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(MD->getCmdDecl()), "cmd");
+}
+
void
CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
const ObjCPropertyImplDecl *propImpl,
const ObjCMethodDecl *GetterMethodDecl,
llvm::Constant *AtomicHelperFn) {
+
+ ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
+
+ if (ivar->getType().isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+ if (!AtomicHelperFn) {
+ LValue Src =
+ EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, 0);
+ LValue Dst = MakeAddrLValue(ReturnValue, ivar->getType());
+ callCStructCopyConstructor(Dst, Src);
+ } else {
+ ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
+ emitCPPObjectAtomicGetterCall(*this, ReturnValue.getPointer(), ivar,
+ AtomicHelperFn);
+ }
+ return;
+ }
+
// If there's a non-trivial 'get' expression, we just have to emit that.
if (!hasTrivialGetExpr(propImpl)) {
if (!AtomicHelperFn) {
@@ -1135,8 +1173,6 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
QualType propType = prop->getType();
ObjCMethodDecl *getterMethod = propImpl->getGetterMethodDecl();
- ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
-
// Pick an implementation strategy.
PropertyImplStrategy strategy(CGM, propImpl);
switch (strategy.getKind()) {
@@ -1188,11 +1224,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
// Return (ivar-type) objc_getProperty((id) self, _cmd, offset, true).
// FIXME: Can't this be simpler? This might even be worse than the
// corresponding gcc code.
- llvm::Value *cmd =
- Builder.CreateLoad(GetAddrOfLocalVar(getterMethod->getCmdDecl()), "cmd");
+ llvm::Value *cmd = emitCmdValueForGetterSetterBody(*this, getterMethod);
llvm::Value *self = Builder.CreateBitCast(LoadObjCSelf(), VoidPtrTy);
llvm::Value *ivarOffset =
- EmitIvarOffset(classImpl->getClassInterface(), ivar);
+ EmitIvarOffsetAsPointerDiff(classImpl->getClassInterface(), ivar);
CallArgList args;
args.add(RValue::get(self), getContext().getObjCIdType());
@@ -1404,6 +1439,24 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
ObjCMethodDecl *setterMethod = propImpl->getSetterMethodDecl();
+ if (ivar->getType().isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+ ParmVarDecl *PVD = *setterMethod->param_begin();
+ if (!AtomicHelperFn) {
+ // Call the move assignment operator instead of calling the copy
+ // assignment operator and destructor.
+ LValue Dst = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar,
+ /*quals*/ 0);
+ LValue Src = MakeAddrLValue(GetAddrOfLocalVar(PVD), ivar->getType());
+ callCStructMoveAssignmentOperator(Dst, Src);
+ } else {
+ // If atomic, assignment is called via a locking api.
+ emitCPPObjectAtomicSetterCall(*this, setterMethod, ivar, AtomicHelperFn);
+ }
+ // Decativate the destructor for the setter parameter.
+ DeactivateCleanupBlock(CalleeDestructedParamCleanups[PVD], AllocaInsertPt);
+ return;
+ }
+
// Just use the setter expression if Sema gave us one and it's
// non-trivial.
if (!hasTrivialSetExpr(propImpl)) {
@@ -1474,12 +1527,11 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
// Emit objc_setProperty((id) self, _cmd, offset, arg,
// <is-atomic>, <is-copy>).
- llvm::Value *cmd =
- Builder.CreateLoad(GetAddrOfLocalVar(setterMethod->getCmdDecl()));
+ llvm::Value *cmd = emitCmdValueForGetterSetterBody(*this, setterMethod);
llvm::Value *self =
Builder.CreateBitCast(LoadObjCSelf(), VoidPtrTy);
llvm::Value *ivarOffset =
- EmitIvarOffset(classImpl->getClassInterface(), ivar);
+ EmitIvarOffsetAsPointerDiff(classImpl->getClassInterface(), ivar);
Address argAddr = GetAddrOfLocalVar(*setterMethod->param_begin());
llvm::Value *arg = Builder.CreateLoad(argAddr, "arg");
arg = Builder.CreateBitCast(arg, VoidPtrTy);
@@ -1749,7 +1801,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
&CGM.getContext().Idents.get("count")
};
Selector FastEnumSel =
- CGM.getContext().Selectors.getSelector(llvm::array_lengthof(II), &II[0]);
+ CGM.getContext().Selectors.getSelector(std::size(II), &II[0]);
QualType ItemsTy =
getContext().getConstantArrayType(getContext().getObjCIdType(),
@@ -2290,7 +2342,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
CGM.getObjCEntrypoints().objc_retainBlock);
call->setMetadata("clang.arc.copy_on_escape",
- llvm::MDNode::get(Builder.getContext(), None));
+ llvm::MDNode::get(Builder.getContext(), std::nullopt));
}
return result;
@@ -2332,7 +2384,8 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
// Call the marker asm if we made one, which we do only at -O0.
if (marker)
- CGF.Builder.CreateCall(marker, None, CGF.getBundlesForFunclet(marker));
+ CGF.Builder.CreateCall(marker, std::nullopt,
+ CGF.getBundlesForFunclet(marker));
}
static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value,
@@ -2418,7 +2471,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value,
if (precise == ARCImpreciseLifetime) {
call->setMetadata("clang.imprecise_release",
- llvm::MDNode::get(Builder.getContext(), None));
+ llvm::MDNode::get(Builder.getContext(), std::nullopt));
}
}
@@ -2816,7 +2869,7 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value,
if (precise == ARCImpreciseLifetime) {
call->setMetadata("clang.imprecise_release",
- llvm::MDNode::get(Builder.getContext(), None));
+ llvm::MDNode::get(Builder.getContext(), std::nullopt));
}
}
@@ -3661,15 +3714,27 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) {
llvm::Constant *
CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
const ObjCPropertyImplDecl *PID) {
+ const ObjCPropertyDecl *PD = PID->getPropertyDecl();
+ if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic)))
+ return nullptr;
+
+ QualType Ty = PID->getPropertyIvarDecl()->getType();
+ ASTContext &C = getContext();
+
+ if (Ty.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+ // Call the move assignment operator instead of calling the copy assignment
+ // operator and destructor.
+ CharUnits Alignment = C.getTypeAlignInChars(Ty);
+ llvm::Constant *Fn = getNonTrivialCStructMoveAssignmentOperator(
+ CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty);
+ return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ }
+
if (!getLangOpts().CPlusPlus ||
!getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
return nullptr;
- QualType Ty = PID->getPropertyIvarDecl()->getType();
if (!Ty->isRecordType())
return nullptr;
- const ObjCPropertyDecl *PD = PID->getPropertyDecl();
- if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic)))
- return nullptr;
llvm::Constant *HelperFn = nullptr;
if (hasTrivialSetExpr(PID))
return nullptr;
@@ -3677,7 +3742,6 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
if ((HelperFn = CGM.getAtomicSetterHelperFnMap(Ty)))
return HelperFn;
- ASTContext &C = getContext();
IdentifierInfo *II
= &CGM.getContext().Idents.get("__assign_helper_atomic_property_");
@@ -3748,18 +3812,27 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
return HelperFn;
}
-llvm::Constant *
-CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
- const ObjCPropertyImplDecl *PID) {
+llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
+ const ObjCPropertyImplDecl *PID) {
+ const ObjCPropertyDecl *PD = PID->getPropertyDecl();
+ if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic)))
+ return nullptr;
+
+ QualType Ty = PD->getType();
+ ASTContext &C = getContext();
+
+ if (Ty.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+ CharUnits Alignment = C.getTypeAlignInChars(Ty);
+ llvm::Constant *Fn = getNonTrivialCStructCopyConstructor(
+ CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty);
+ return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ }
+
if (!getLangOpts().CPlusPlus ||
!getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
return nullptr;
- const ObjCPropertyDecl *PD = PID->getPropertyDecl();
- QualType Ty = PD->getType();
if (!Ty->isRecordType())
return nullptr;
- if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic)))
- return nullptr;
llvm::Constant *HelperFn = nullptr;
if (hasTrivialGetExpr(PID))
return nullptr;
@@ -3767,7 +3840,6 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty)))
return HelperFn;
- ASTContext &C = getContext();
IdentifierInfo *II =
&CGM.getContext().Idents.get("__copy_helper_atomic_property_");
@@ -3911,7 +3983,8 @@ static llvm::Value *emitIsPlatformVersionAtLeast(CodeGenFunction &CGF,
llvm::SmallVector<llvm::Value *, 8> Args;
auto EmitArgs = [&](const VersionTuple &Version, const llvm::Triple &TT) {
- Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor();
+ std::optional<unsigned> Min = Version.getMinor(),
+ SMin = Version.getSubminor();
Args.push_back(
llvm::ConstantInt::get(CGM.Int32Ty, getBaseMachOPlatformID(TT)));
Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor()));
@@ -3949,7 +4022,8 @@ CodeGenFunction::EmitBuiltinAvailable(const VersionTuple &Version) {
CGM.CreateRuntimeFunction(FTy, "__isOSVersionAtLeast");
}
- Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor();
+ std::optional<unsigned> Min = Version.getMinor(),
+ SMin = Version.getSubminor();
llvm::Value *Args[] = {
llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor()),
llvm::ConstantInt::get(CGM.Int32Ty, Min.value_or(0)),
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index 7bbe9af7ed59..c7b193e34ea0 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -71,7 +71,7 @@ public:
FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
}
else {
- FTy = llvm::FunctionType::get(RetTy, None, false);
+ FTy = llvm::FunctionType::get(RetTy, std::nullopt, false);
}
}
@@ -985,7 +985,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto LiteralLength = SL->getLength();
- if ((CGM.getTarget().getPointerWidth(0) == 64) &&
+ if ((CGM.getTarget().getPointerWidth(LangAS::Default) == 64) &&
(LiteralLength < 9) && !isNonASCII) {
// Tiny strings are only used on 64-bit platforms. They store 8 7-bit
// ASCII characters in the high 56 bits, followed by a 4-bit length and a
@@ -1064,7 +1064,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
// Hash. Not currently initialised by the compiler.
Fields.addInt(Int32Ty, 0);
// pointer to the data string.
- auto Arr = llvm::makeArrayRef(&ToBuf[0], ToPtr+1);
+ auto Arr = llvm::ArrayRef(&ToBuf[0], ToPtr + 1);
auto *C = llvm::ConstantDataArray::get(VMContext, Arr);
auto *Buffer = new llvm::GlobalVariable(TheModule, C->getType(),
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str");
@@ -3316,7 +3316,7 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
auto fields = builder.beginStruct();
fields.addInt(Int32Ty, values.size());
auto array = fields.beginArray();
- for (auto v : values) array.add(v);
+ for (auto *v : values) array.add(v);
array.finishAndAddTo(fields);
llvm::Constant *GS =
@@ -3851,9 +3851,9 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
llvm::Type *moduleEltTys[] = {
LongTy, LongTy, PtrToInt8Ty, symtab->getType(), IntTy
};
- llvm::StructType *moduleTy =
- llvm::StructType::get(CGM.getLLVMContext(),
- makeArrayRef(moduleEltTys).drop_back(unsigned(RuntimeVersion < 10)));
+ llvm::StructType *moduleTy = llvm::StructType::get(
+ CGM.getLLVMContext(),
+ ArrayRef(moduleEltTys).drop_back(unsigned(RuntimeVersion < 10)));
ConstantInitBuilder builder(CGM);
auto module = builder.beginStruct(moduleTy);
@@ -3864,8 +3864,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
// The path to the source file where this module was declared
SourceManager &SM = CGM.getContext().getSourceManager();
- Optional<FileEntryRef> mainFile =
- SM.getFileEntryRefForID(SM.getMainFileID());
+ OptionalFileEntryRef mainFile = SM.getFileEntryRefForID(SM.getMainFileID());
std::string path =
(mainFile->getDir().getName() + "/" + mainFile->getName()).str();
module.add(MakeConstantString(path, ".objc_source_file_name"));
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 46e65eb1ed43..c739d3742f80 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -174,6 +174,7 @@ protected:
public:
llvm::IntegerType *ShortTy, *IntTy, *LongTy;
llvm::PointerType *Int8PtrTy, *Int8PtrPtrTy;
+ llvm::PointerType *Int8PtrProgramASTy;
llvm::Type *IvarOffsetVarTy;
/// ObjectPtrTy - LLVM type for object handles (typeof(id))
@@ -736,14 +737,17 @@ public:
// Also it is safe to make it readnone, since we never load or store the
// classref except by calling this function.
llvm::Type *params[] = { Int8PtrPtrTy };
+ llvm::LLVMContext &C = CGM.getLLVMContext();
+ llvm::AttributeSet AS = llvm::AttributeSet::get(C, {
+ llvm::Attribute::get(C, llvm::Attribute::NonLazyBind),
+ llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none()),
+ llvm::Attribute::get(C, llvm::Attribute::NoUnwind),
+ });
llvm::FunctionCallee F = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(ClassnfABIPtrTy, params, false),
"objc_loadClassref",
llvm::AttributeList::get(CGM.getLLVMContext(),
- llvm::AttributeList::FunctionIndex,
- {llvm::Attribute::NonLazyBind,
- llvm::Attribute::ReadNone,
- llvm::Attribute::NoUnwind}));
+ llvm::AttributeList::FunctionIndex, AS));
if (!CGM.getTriple().isOSBinFormatCOFF())
cast<llvm::Function>(F.getCallee())->setLinkage(
llvm::Function::ExternalWeakLinkage);
@@ -1170,7 +1174,7 @@ public:
static ProtocolMethodLists get(const ObjCProtocolDecl *PD) {
ProtocolMethodLists result;
- for (auto MD : PD->methods()) {
+ for (auto *MD : PD->methods()) {
size_t index = (2 * size_t(MD->isOptional()))
+ (size_t(MD->isClassMethod()));
result.Methods[index].push_back(MD);
@@ -2144,7 +2148,8 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
if (!IsSuper)
Arg0 = CGF.Builder.CreateBitCast(Arg0, ObjCTypes.ObjectPtrTy);
ActualArgs.add(RValue::get(Arg0), Arg0Ty);
- ActualArgs.add(RValue::get(SelValue), selTy);
+ if (!Method || !Method->isDirectMethod())
+ ActualArgs.add(RValue::get(SelValue), selTy);
ActualArgs.addFrom(CallArgs);
// If we're calling a method, use the formal signature.
@@ -2402,8 +2407,8 @@ void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) {
Qualifiers::GC GCAttr = GetGCAttrTypeForType(CGM.getContext(), type);
if (GCAttr == Qualifiers::Strong) {
- assert(CGM.getContext().getTypeSize(type)
- == CGM.getTarget().getPointerWidth(0));
+ assert(CGM.getContext().getTypeSize(type) ==
+ CGM.getTarget().getPointerWidth(LangAS::Default));
IvarsInfo.push_back(IvarInfo(fieldOffset, /*size in words*/ 1));
}
}
@@ -2696,7 +2701,7 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
llvm::Constant *nullPtr = llvm::Constant::getNullValue(CGM.Int8PtrTy);
if (RunSkipBlockVars.empty())
return nullPtr;
- unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(0);
+ unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(LangAS::Default);
unsigned ByteSizeInBits = CGM.getTarget().getCharWidth();
unsigned WordSizeInBytes = WordSizeInBits/ByteSizeInBits;
@@ -2882,7 +2887,7 @@ void CGObjCCommonMac::fillRunSkipBlockVars(CodeGenModule &CGM,
RunSkipBlockVars.clear();
bool hasUnion = false;
- unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(0);
+ unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(LangAS::Default);
unsigned ByteSizeInBits = CGM.getTarget().getCharWidth();
unsigned WordSizeInBytes = WordSizeInBits/ByteSizeInBits;
@@ -3453,7 +3458,7 @@ static bool hasWeakMember(QualType type) {
}
if (auto recType = type->getAs<RecordType>()) {
- for (auto field : recType->getDecl()->fields()) {
+ for (auto *field : recType->getDecl()->fields()) {
if (hasWeakMember(field->getType()))
return true;
}
@@ -4102,6 +4107,9 @@ void CGObjCCommonMac::GenerateDirectMethodPrologue(
// only synthesize _cmd if it's referenced
if (OMD->getCmdDecl()->isUsed()) {
+ // `_cmd` is not a parameter to direct methods, so storage must be
+ // explicitly declared for it.
+ CGF.EmitVarDecl(*OMD->getCmdDecl());
Builder.CreateStore(GetSelector(CGF, OMD),
CGF.GetAddrOfLocalVar(OMD->getCmdDecl()));
}
@@ -5739,11 +5747,13 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
{
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
+ unsigned ProgramAS = CGM.getDataLayout().getProgramAddressSpace();
ShortTy = cast<llvm::IntegerType>(Types.ConvertType(Ctx.ShortTy));
IntTy = CGM.IntTy;
LongTy = cast<llvm::IntegerType>(Types.ConvertType(Ctx.LongTy));
Int8PtrTy = CGM.Int8PtrTy;
+ Int8PtrProgramASTy = llvm::PointerType::get(CGM.Int8Ty, ProgramAS);
Int8PtrPtrTy = CGM.Int8PtrPtrTy;
// arm64 targets use "int" ivar offset variables. All others,
@@ -5812,7 +5822,7 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
// char *_imp;
// }
MethodTy = llvm::StructType::create("struct._objc_method", SelectorPtrTy,
- Int8PtrTy, Int8PtrTy);
+ Int8PtrTy, Int8PtrProgramASTy);
// struct _objc_cache *
CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache");
@@ -6198,8 +6208,7 @@ void CGObjCNonFragileABIMac::AddModuleClassList(
llvm::GlobalVariable *GV = new llvm::GlobalVariable(
CGM.getModule(), Init->getType(), false,
llvm::GlobalValue::PrivateLinkage, Init, SymbolName);
- GV->setAlignment(
- llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType())));
+ GV->setAlignment(CGM.getDataLayout().getABITypeAlign(Init->getType()));
GV->setSection(SectionName);
CGM.addCompilerUsedGlobal(GV);
}
@@ -6431,8 +6440,7 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI,
if (CGM.getTriple().isOSBinFormatMachO())
GV->setSection("__DATA, __objc_data");
- GV->setAlignment(llvm::Align(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy)));
+ GV->setAlignment(CGM.getDataLayout().getABITypeAlign(ObjCTypes.ClassnfABITy));
if (!CGM.getTriple().isOSBinFormatCOFF())
if (HiddenVisibility)
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -6771,11 +6779,11 @@ void CGObjCNonFragileABIMac::emitMethodConstant(ConstantArrayBuilder &builder,
if (forProtocol) {
// Protocol methods have no implementation. So, this entry is always NULL.
- method.addNullPointer(ObjCTypes.Int8PtrTy);
+ method.addNullPointer(ObjCTypes.Int8PtrProgramASTy);
} else {
llvm::Function *fn = GetMethodDefinition(MD);
assert(fn && "no definition for method?");
- method.addBitCast(fn, ObjCTypes.Int8PtrTy);
+ method.addBitCast(fn, ObjCTypes.Int8PtrProgramASTy);
}
method.finishAndAddTo(builder);
@@ -6893,8 +6901,8 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID,
llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar);
IvarOffsetGV->setInitializer(
llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset));
- IvarOffsetGV->setAlignment(llvm::Align(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy)));
+ IvarOffsetGV->setAlignment(
+ CGM.getDataLayout().getABITypeAlign(ObjCTypes.IvarOffsetVarTy));
if (!CGM.getTriple().isOSBinFormatCOFF()) {
// FIXME: This matches gcc, but shouldn't the visibility be set on the use
@@ -7122,8 +7130,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
ProtocolRef);
if (!CGM.getTriple().isOSBinFormatMachO())
PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef));
- PTGV->setAlignment(llvm::Align(
- CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy)));
+ PTGV->setAlignment(
+ CGM.getDataLayout().getABITypeAlign(ObjCTypes.ProtocolnfABIPtrTy));
PTGV->setSection(GetSectionName("__objc_protolist",
"coalesced,no_dead_strip"));
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -7222,7 +7230,7 @@ CGObjCNonFragileABIMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
if (IsIvarOffsetKnownIdempotent(CGF, Ivar))
cast<llvm::LoadInst>(IvarOffsetValue)
->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
- llvm::MDNode::get(VMContext, None));
+ llvm::MDNode::get(VMContext, std::nullopt));
}
// This could be 32bit int or 64bit integer depending on the architecture.
@@ -7622,7 +7630,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF,
llvm::LoadInst* LI = CGF.Builder.CreateLoad(Addr);
LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
- llvm::MDNode::get(VMContext, None));
+ llvm::MDNode::get(VMContext, std::nullopt));
return LI;
}
diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp
index 550fd3d70bdc..9097a8cf7009 100644
--- a/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -22,6 +22,7 @@
#include "clang/AST/StmtObjC.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/CodeGenABITypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
@@ -227,13 +228,18 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CatchHandler &Handler = Handlers[I];
CGF.EmitBlock(Handler.Block);
- llvm::CatchPadInst *CPI = nullptr;
- SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(CGF.CurrentFuncletPad);
- if (useFunclets)
- if ((CPI = dyn_cast_or_null<llvm::CatchPadInst>(Handler.Block->getFirstNonPHI()))) {
+
+ CodeGenFunction::LexicalScope Cleanups(CGF, Handler.Body->getSourceRange());
+ SaveAndRestore RevertAfterScope(CGF.CurrentFuncletPad);
+ if (useFunclets) {
+ llvm::Instruction *CPICandidate = Handler.Block->getFirstNonPHI();
+ if (auto *CPI = dyn_cast_or_null<llvm::CatchPadInst>(CPICandidate)) {
CGF.CurrentFuncletPad = CPI;
CPI->setOperand(2, CGF.getExceptionSlot().getPointer());
+ CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
}
+ }
+
llvm::Value *RawExn = CGF.getExceptionFromSlot();
// Enter the catch.
@@ -241,8 +247,6 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
if (beginCatchFn)
Exn = CGF.EmitNounwindRuntimeCall(beginCatchFn, RawExn, "exn.adjusted");
- CodeGenFunction::LexicalScope cleanups(CGF, Handler.Body->getSourceRange());
-
if (endCatchFn) {
// Add a cleanup to leave the catch.
bool EndCatchMightThrow = (Handler.Variable == nullptr);
@@ -260,15 +264,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CGF.EmitAutoVarDecl(*CatchParam);
EmitInitOfCatchParam(CGF, CastExn, CatchParam);
}
- if (CPI)
- CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
CGF.ObjCEHValueStack.push_back(Exn);
CGF.EmitStmt(Handler.Body);
CGF.ObjCEHValueStack.pop_back();
// Leave any cleanups associated with the catch.
- cleanups.ForceCleanup();
+ Cleanups.ForceCleanup();
CGF.EmitBranchThroughCleanup(Cont);
}
@@ -293,7 +295,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF,
switch (paramDecl->getType().getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
exn = CGF.EmitARCRetainNonBlock(exn);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
@@ -360,13 +362,15 @@ CGObjCRuntime::MessageSendInfo
CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method,
QualType resultType,
CallArgList &callArgs) {
+ unsigned ProgramAS = CGM.getDataLayout().getProgramAddressSpace();
+
// If there's a method, use information from that.
if (method) {
const CGFunctionInfo &signature =
CGM.getTypes().arrangeObjCMessageSendSignature(method, callArgs[0].Ty);
llvm::PointerType *signatureType =
- CGM.getTypes().GetFunctionType(signature)->getPointerTo();
+ CGM.getTypes().GetFunctionType(signature)->getPointerTo(ProgramAS);
const CGFunctionInfo &signatureForCall =
CGM.getTypes().arrangeCall(signature, callArgs);
@@ -380,7 +384,7 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method,
// Derive the signature to call from that.
llvm::PointerType *signatureType =
- CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo();
+ CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo(ProgramAS);
return MessageSendInfo(argsInfo, signatureType);
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 091eb9da5af4..2284aa1d1eb6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <numeric>
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -409,7 +410,7 @@ private:
/// RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
- llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
+ llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
bool NoInheritance = false;
@@ -1057,14 +1058,16 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
return Field;
}
-CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
- StringRef Separator)
- : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
- OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
+CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
+ : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() {
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
-
+ llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false,
+ hasRequiresUnifiedSharedMemory(),
+ CGM.getLangOpts().OpenMPOffloadMandatory);
// Initialize Types used in OpenMPIRBuilder from OMPKinds.def
OMPBuilder.initialize();
+ OMPBuilder.setConfig(Config);
+ OffloadEntriesInfoManager.setConfig(Config);
loadOffloadInfoMetadata();
}
@@ -1084,14 +1087,7 @@ void CGOpenMPRuntime::clear() {
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
- SmallString<128> Buffer;
- llvm::raw_svector_ostream OS(Buffer);
- StringRef Sep = FirstSeparator;
- for (StringRef Part : Parts) {
- OS << Sep << Part;
- Sep = Separator;
- }
- return std::string(OS.str());
+ return OMPBuilder.createPlatformSpecificName(Parts);
}
static llvm::Function *
@@ -1369,10 +1365,11 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
- unsigned Flags) {
+ unsigned Flags, bool EmitLoc) {
uint32_t SrcLocStrSize;
llvm::Constant *SrcLocStr;
- if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
+ if ((!EmitLoc &&
+ CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) ||
Loc.isInvalid()) {
SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
} else {
@@ -1595,9 +1592,9 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
/// Obtain information that uniquely identifies a target entry. This
/// consists of the file and device IDs as well as line number associated with
/// the relevant entry source location.
-static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
- unsigned &DeviceID, unsigned &FileID,
- unsigned &LineNum) {
+static llvm::TargetRegionEntryInfo
+getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
+ StringRef ParentName = "") {
SourceManager &SM = C.getSourceManager();
// The loc should be always valid and have a file ID (the user cannot use
@@ -1617,29 +1614,27 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
<< PLoc.getFilename() << EC.message();
}
- DeviceID = ID.getDevice();
- FileID = ID.getFile();
- LineNum = PLoc.getLine();
+ return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
+ PLoc.getLine());
}
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
- llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
HasRequiresUnifiedSharedMemory))) {
SmallString<64> PtrName;
{
llvm::raw_svector_ostream OS(PtrName);
OS << CGM.getMangledName(GlobalDecl(VD));
if (!VD->isExternallyVisible()) {
- unsigned DeviceID, FileID, Line;
- getTargetEntryUniqueInfo(CGM.getContext(),
- VD->getCanonicalDecl()->getBeginLoc(),
- DeviceID, FileID, Line);
- OS << llvm::format("_%x", FileID);
+ auto EntryInfo = getTargetEntryUniqueInfo(
+ CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc());
+ OS << llvm::format("_%x", EntryInfo.FileID);
}
OS << "_decl_tgt_ref_ptr";
}
@@ -1647,7 +1642,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
if (!Ptr) {
- Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
+ Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName);
auto *GV = cast<llvm::GlobalVariable>(Ptr);
GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
@@ -1667,8 +1662,8 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
!CGM.getContext().getTargetInfo().isTLSSupported());
// Lookup the entry, lazily creating it if necessary.
std::string Suffix = getName({"cache", ""});
- return getOrCreateInternalVariable(
- CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
+ return OMPBuilder.getOrCreateInternalVariable(
+ CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
}
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
@@ -1840,10 +1835,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
if (CGM.getLangOpts().OMPTargetTriples.empty() &&
!CGM.getLangOpts().OpenMPIsDevice)
return false;
- Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
HasRequiresUnifiedSharedMemory))
return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
@@ -1858,16 +1854,10 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
// Produce the unique prefix to identify the new target regions. We use
// the source location of the variable declaration which we know to not
// conflict with any target region.
- unsigned DeviceID;
- unsigned FileID;
- unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
+ auto EntryInfo =
+ getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName());
SmallString<128> Buffer, Out;
- {
- llvm::raw_svector_ostream OS(Buffer);
- OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
- << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
- }
+ OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
const Expr *Init = VD->getAnyInitializer();
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
@@ -1883,6 +1873,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
llvm::GlobalValue::WeakODRLinkage);
+ Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
if (CGM.getTriple().isAMDGCN())
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
@@ -1912,9 +1903,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
// Register the information for the entry associated with the constructor.
Out.clear();
+ auto CtorEntryInfo = EntryInfo;
+ CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
- DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
- ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
+ CtorEntryInfo, Ctor, ID,
+ llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
}
if (VD->getType().isDestructedType() != QualType::DK_none) {
llvm::Constant *Dtor;
@@ -1929,6 +1922,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
llvm::GlobalValue::WeakODRLinkage);
+ Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
if (CGM.getTriple().isAMDGCN())
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
@@ -1958,9 +1952,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
}
// Register the information for the entry associated with the destructor.
Out.clear();
+ auto DtorEntryInfo = EntryInfo;
+ DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
- DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
- ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
+ DtorEntryInfo, Dtor, ID,
+ llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
}
return CGM.getLangOpts().OpenMPIsDevice;
}
@@ -1970,8 +1966,8 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
StringRef Name) {
std::string Suffix = getName({"artificial", ""});
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
- llvm::GlobalVariable *GAddr =
- getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
+ llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
+ VarLVType, Twine(Name).concat(Suffix).str());
if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
CGM.getTarget().isTLSSupported()) {
GAddr->setThreadLocal(/*Val=*/true);
@@ -1985,8 +1981,9 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
/*isSigned=*/false),
- getOrCreateInternalVariable(
- CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
+ OMPBuilder.getOrCreateInternalVariable(
+ CGM.VoidPtrPtrTy,
+ Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
return Address(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitRuntimeCall(
@@ -2131,30 +2128,10 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
return ThreadIDTemp;
}
-llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
- llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
- SmallString<256> Buffer;
- llvm::raw_svector_ostream Out(Buffer);
- Out << Name;
- StringRef RuntimeName = Out.str();
- auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
- if (Elem.second) {
- assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
- "OMP internal variable has different type than requested");
- return &*Elem.second;
- }
-
- return Elem.second = new llvm::GlobalVariable(
- CGM.getModule(), Ty, /*IsConstant*/ false,
- llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
- Elem.first(), /*InsertBefore=*/nullptr,
- llvm::GlobalValue::NotThreadLocal, AddressSpace);
-}
-
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
std::string Name = getName({Prefix, "var"});
- return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
+ return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
}
namespace {
@@ -2583,6 +2560,22 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
Args);
}
+void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
+ Expr *ME, bool IsFatal) {
+ llvm::Value *MVL =
+ ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ // Build call void __kmpc_error(ident_t *loc, int severity, const char
+ // *message)
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
+ llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
+ CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_error),
+ Args);
+}
+
/// Map the OpenMP loop schedule to the runtime enumeration.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked, bool Ordered) {
@@ -2951,328 +2944,55 @@ enum KmpTaskTFields {
};
} // anonymous namespace
-bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
- return OffloadEntriesTargetRegion.empty() &&
- OffloadEntriesDeviceGlobalVar.empty();
-}
-
-/// Initialize target region entry.
-void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
- initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
- StringRef ParentName, unsigned LineNum,
- unsigned Order) {
- assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
- "only required for the device "
- "code generation.");
- OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
- OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
- OMPTargetRegionEntryTargetRegion);
- ++OffloadingEntriesNum;
-}
-
-void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
- registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
- StringRef ParentName, unsigned LineNum,
- llvm::Constant *Addr, llvm::Constant *ID,
- OMPTargetRegionEntryKind Flags) {
- // If we are emitting code for a target, the entry is already initialized,
- // only has to be registered.
- if (CGM.getLangOpts().OpenMPIsDevice) {
- // This could happen if the device compilation is invoked standalone.
- if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
- return;
- auto &Entry =
- OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
- Entry.setAddress(Addr);
- Entry.setID(ID);
- Entry.setFlags(Flags);
- } else {
- if (Flags ==
- OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
- hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
- /*IgnoreAddressId*/ true))
- return;
- assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
- "Target region entry already registered!");
- OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
- OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
- ++OffloadingEntriesNum;
- }
-}
-
-bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
- unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
- bool IgnoreAddressId) const {
- auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
- if (PerDevice == OffloadEntriesTargetRegion.end())
- return false;
- auto PerFile = PerDevice->second.find(FileID);
- if (PerFile == PerDevice->second.end())
- return false;
- auto PerParentName = PerFile->second.find(ParentName);
- if (PerParentName == PerFile->second.end())
- return false;
- auto PerLine = PerParentName->second.find(LineNum);
- if (PerLine == PerParentName->second.end())
- return false;
- // Fail if this entry is already registered.
- if (!IgnoreAddressId &&
- (PerLine->second.getAddress() || PerLine->second.getID()))
- return false;
- return true;
-}
-
-void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
- const OffloadTargetRegionEntryInfoActTy &Action) {
- // Scan all target region entries and perform the provided action.
- for (const auto &D : OffloadEntriesTargetRegion)
- for (const auto &F : D.second)
- for (const auto &P : F.second)
- for (const auto &L : P.second)
- Action(D.first, F.first, P.first(), L.first, L.second);
-}
-
-void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
- initializeDeviceGlobalVarEntryInfo(StringRef Name,
- OMPTargetGlobalVarEntryKind Flags,
- unsigned Order) {
- assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
- "only required for the device "
- "code generation.");
- OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
- ++OffloadingEntriesNum;
-}
-
-void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
- registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
- CharUnits VarSize,
- OMPTargetGlobalVarEntryKind Flags,
- llvm::GlobalValue::LinkageTypes Linkage) {
- if (CGM.getLangOpts().OpenMPIsDevice) {
- // This could happen if the device compilation is invoked standalone.
- if (!hasDeviceGlobalVarEntryInfo(VarName))
- return;
- auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
- if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
- if (Entry.getVarSize().isZero()) {
- Entry.setVarSize(VarSize);
- Entry.setLinkage(Linkage);
- }
- return;
- }
- Entry.setVarSize(VarSize);
- Entry.setLinkage(Linkage);
- Entry.setAddress(Addr);
- } else {
- if (hasDeviceGlobalVarEntryInfo(VarName)) {
- auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
- assert(Entry.isValid() && Entry.getFlags() == Flags &&
- "Entry not initialized!");
- if (Entry.getVarSize().isZero()) {
- Entry.setVarSize(VarSize);
- Entry.setLinkage(Linkage);
- }
- return;
- }
- OffloadEntriesDeviceGlobalVar.try_emplace(
- VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
- ++OffloadingEntriesNum;
- }
-}
-
-void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
- actOnDeviceGlobalVarEntriesInfo(
- const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
- // Scan all target region entries and perform the provided action.
- for (const auto &E : OffloadEntriesDeviceGlobalVar)
- Action(E.getKey(), E.getValue());
-}
-
-void CGOpenMPRuntime::createOffloadEntry(
- llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
- llvm::GlobalValue::LinkageTypes Linkage) {
- OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
-}
-
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
- // Emit the offloading entries and metadata so that the device codegen side
- // can easily figure out what to emit. The produced metadata looks like
- // this:
- //
- // !omp_offload.info = !{!1, ...}
- //
- // Right now we only generate metadata for function that contain target
- // regions.
-
// If we are in simd mode or there are no entries, we don't need to do
// anything.
if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
return;
- llvm::Module &M = CGM.getModule();
- llvm::LLVMContext &C = M.getContext();
- SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
- SourceLocation, StringRef>,
- 16>
- OrderedEntries(OffloadEntriesInfoManager.size());
- llvm::SmallVector<StringRef, 16> ParentFunctions(
- OffloadEntriesInfoManager.size());
-
- // Auxiliary methods to create metadata values and strings.
- auto &&GetMDInt = [this](unsigned V) {
- return llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(CGM.Int32Ty, V));
- };
-
- auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
-
- // Create the offloading info metadata node.
- llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
-
- // Create function that emits metadata for each target region entry;
- auto &&TargetRegionMetadataEmitter =
- [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
- &GetMDString](
- unsigned DeviceID, unsigned FileID, StringRef ParentName,
- unsigned Line,
- const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
- // Generate metadata for target regions. Each entry of this metadata
- // contains:
- // - Entry 0 -> Kind of this type of metadata (0).
- // - Entry 1 -> Device ID of the file where the entry was identified.
- // - Entry 2 -> File ID of the file where the entry was identified.
- // - Entry 3 -> Mangled name of the function where the entry was
- // identified.
- // - Entry 4 -> Line in the file where the entry was identified.
- // - Entry 5 -> Order the entry was created.
- // The first element of the metadata node is the kind.
- llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
- GetMDInt(FileID), GetMDString(ParentName),
- GetMDInt(Line), GetMDInt(E.getOrder())};
-
- SourceLocation Loc;
- for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
- E = CGM.getContext().getSourceManager().fileinfo_end();
- I != E; ++I) {
- if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
- I->getFirst()->getUniqueID().getFile() == FileID) {
- Loc = CGM.getContext().getSourceManager().translateFileLineCol(
- I->getFirst(), Line, 1);
- break;
- }
- }
- // Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
- ParentFunctions[E.getOrder()] = ParentName;
-
- // Add metadata to the named metadata node.
- MD->addOperand(llvm::MDNode::get(C, Ops));
- };
-
- OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
- TargetRegionMetadataEmitter);
-
- // Create function that emits metadata for each device global variable entry;
- auto &&DeviceGlobalVarMetadataEmitter =
- [&C, &OrderedEntries, &GetMDInt, &GetMDString,
- MD](StringRef MangledName,
- const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
- &E) {
- // Generate metadata for global variables. Each entry of this metadata
- // contains:
- // - Entry 0 -> Kind of this type of metadata (1).
- // - Entry 1 -> Mangled name of the variable.
- // - Entry 2 -> Declare target kind.
- // - Entry 3 -> Order the entry was created.
- // The first element of the metadata node is the kind.
- llvm::Metadata *Ops[] = {
- GetMDInt(E.getKind()), GetMDString(MangledName),
- GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
-
- // Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] =
- std::make_tuple(&E, SourceLocation(), MangledName);
-
- // Add metadata to the named metadata node.
- MD->addOperand(llvm::MDNode::get(C, Ops));
- };
-
- OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
- DeviceGlobalVarMetadataEmitter);
-
- for (const auto &E : OrderedEntries) {
- assert(std::get<0>(E) && "All ordered entries must exist!");
- if (const auto *CE =
- dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
- std::get<0>(E))) {
- if (!CE->getID() || !CE->getAddress()) {
- // Do not blame the entry if the parent funtion is not emitted.
- StringRef FnName = ParentFunctions[CE->getOrder()];
- if (!CGM.GetGlobalValue(FnName))
- continue;
- unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "Offloading entry for target region in %0 is incorrect: either the "
- "address or the ID is invalid.");
- CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
- continue;
- }
- createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
- CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
- } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
- OffloadEntryInfoDeviceGlobalVar>(
- std::get<0>(E))) {
- OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
- static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
- CE->getFlags());
- switch (Flags) {
- case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
- if (CGM.getLangOpts().OpenMPIsDevice &&
- CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
- continue;
- if (!CE->getAddress()) {
- unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error, "Offloading entry for declare target "
- "variable %0 is incorrect: the "
- "address is invalid.");
- CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
- continue;
- }
- // The vaiable has no definition - no need to add the entry.
- if (CE->getVarSize().isZero())
- continue;
- break;
- }
- case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
- assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
- (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
- "Declaret target link address is set.");
- if (CGM.getLangOpts().OpenMPIsDevice)
- continue;
- if (!CE->getAddress()) {
- unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "Offloading entry for declare target variable is incorrect: the "
- "address is invalid.");
- CGM.getDiags().Report(DiagID);
- continue;
+ llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
+ [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
+ const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
+ SourceLocation Loc;
+ if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
+ for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
+ E = CGM.getContext().getSourceManager().fileinfo_end();
+ I != E; ++I) {
+ if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
+ I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
+ Loc = CGM.getContext().getSourceManager().translateFileLineCol(
+ I->getFirst(), EntryInfo.Line, 1);
+ break;
}
- break;
}
-
- // Hidden or internal symbols on the device are not externally visible. We
- // should not attempt to register them by creating an offloading entry.
- if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
- continue;
-
- createOffloadEntry(CE->getAddress(), CE->getAddress(),
- CE->getVarSize().getQuantity(), Flags,
- CE->getLinkage());
- } else {
- llvm_unreachable("Unsupported entry kind.");
}
- }
+ switch (Kind) {
+ case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error, "Offloading entry for target region in "
+ "%0 is incorrect: either the "
+ "address or the ID is invalid.");
+ CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
+ } break;
+ case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error, "Offloading entry for declare target "
+ "variable %0 is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
+ } break;
+ case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "Offloading entry for declare target variable is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(DiagID);
+ } break;
+ }
+ };
+
+ OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager,
+ ErrorReportFn);
}
/// Loads all the offload entries information from the host IR
@@ -3306,42 +3026,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() {
return;
}
- llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
- if (!MD)
- return;
-
- for (llvm::MDNode *MN : MD->operands()) {
- auto &&GetMDInt = [MN](unsigned Idx) {
- auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
- return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
- };
-
- auto &&GetMDString = [MN](unsigned Idx) {
- auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
- return V->getString();
- };
-
- switch (GetMDInt(0)) {
- default:
- llvm_unreachable("Unexpected metadata!");
- break;
- case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
- OffloadingEntryInfoTargetRegion:
- OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
- /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
- /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
- /*Order=*/GetMDInt(5));
- break;
- case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
- OffloadingEntryInfoDeviceGlobalVar:
- OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
- /*MangledName=*/GetMDString(1),
- static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
- /*Flags=*/GetMDInt(2)),
- /*Order=*/GetMDInt(3));
- break;
- }
- }
+ OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager);
}
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
@@ -4501,39 +4186,26 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
return Result;
}
-namespace {
-/// Dependence kind for RTL.
-enum RTLDependenceKindTy {
- DepIn = 0x01,
- DepInOut = 0x3,
- DepMutexInOutSet = 0x4,
- DepInOutSet = 0x8,
- DepOmpAllMem = 0x80,
-};
-/// Fields ids in kmp_depend_info record.
-enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
-} // namespace
-
/// Translates internal dependency kind into the runtime kind.
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
RTLDependenceKindTy DepKind;
switch (K) {
case OMPC_DEPEND_in:
- DepKind = DepIn;
+ DepKind = RTLDependenceKindTy::DepIn;
break;
// Out and InOut dependencies must use the same code.
case OMPC_DEPEND_out:
case OMPC_DEPEND_inout:
- DepKind = DepInOut;
+ DepKind = RTLDependenceKindTy::DepInOut;
break;
case OMPC_DEPEND_mutexinoutset:
- DepKind = DepMutexInOutSet;
+ DepKind = RTLDependenceKindTy::DepMutexInOutSet;
break;
case OMPC_DEPEND_inoutset:
- DepKind = DepInOutSet;
+ DepKind = RTLDependenceKindTy::DepInOutSet;
break;
case OMPC_DEPEND_outallmemory:
- DepKind = DepOmpAllMem;
+ DepKind = RTLDependenceKindTy::DepOmpAllMem;
break;
case OMPC_DEPEND_source:
case OMPC_DEPEND_sink:
@@ -4581,7 +4253,9 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
// NumDeps = deps[i].base_addr;
LValue BaseAddrLVal = CGF.EmitLValueForField(
- NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ NumDepsBase,
+ *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
return std::make_pair(NumDeps, Base);
}
@@ -4627,18 +4301,24 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
}
// deps[i].base_addr = &<Dependencies[i].second>;
LValue BaseAddrLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ Base,
+ *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
// deps[i].len = sizeof(<Dependencies[i].second>);
LValue LenLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), Len));
+ Base, *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::Len)));
CGF.EmitStoreOfScalar(Size, LenLVal);
// deps[i].flags = <Dependencies[i].first>;
RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
LValue FlagsLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
- CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
- FlagsLVal);
+ Base,
+ *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::Flags)));
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
+ FlagsLVal);
if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
++(*P);
} else {
@@ -4655,7 +4335,7 @@ SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
CodeGenFunction &CGF, QualType &KmpDependInfoTy,
const OMPTaskDataTy::DependData &Data) {
assert(Data.DepKind == OMPC_DEPEND_depobj &&
- "Expected depobj dependecy kind.");
+ "Expected depobj dependency kind.");
SmallVector<llvm::Value *, 4> Sizes;
SmallVector<LValue, 4> SizeLVals;
ASTContext &C = CGF.getContext();
@@ -4695,7 +4375,7 @@ void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
const OMPTaskDataTy::DependData &Data,
Address DependenciesArray) {
assert(Data.DepKind == OMPC_DEPEND_depobj &&
- "Expected depobj dependecy kind.");
+ "Expected depobj dependency kind.");
llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
{
OMPIteratorGeneratorScope IteratorScope(
@@ -4751,7 +4431,8 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
llvm::Value *NumOfRegularWithIterators =
llvm::ConstantInt::get(CGF.IntPtrTy, 0);
- // Calculate number of depobj dependecies and regular deps with the iterators.
+ // Calculate number of depobj dependencies and regular deps with the
+ // iterators.
for (const OMPTaskDataTy::DependData &D : Dependencies) {
if (D.DepKind == OMPC_DEPEND_depobj) {
SmallVector<llvm::Value *, 4> Sizes =
@@ -4825,7 +4506,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
DependenciesArray);
}
- // Copy regular dependecies with iterators.
+ // Copy regular dependencies with iterators.
LValue PosLVal = CGF.MakeAddrLValue(
CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
@@ -4913,7 +4594,9 @@ Address CGOpenMPRuntime::emitDepobjDependClause(
LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
// deps[i].base_addr = NumDependencies;
LValue BaseAddrLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+ Base,
+ *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
llvm::PointerUnion<unsigned *, LValue *> Pos;
unsigned Idx = 1;
@@ -4993,9 +4676,11 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
// deps[i].flags = NewDepKind;
RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
LValue FlagsLVal = CGF.EmitLValueForField(
- Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
- CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
- FlagsLVal);
+ Base, *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::Flags)));
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
+ FlagsLVal);
// Shift the address forward by one element.
Address ElementNext =
@@ -5073,7 +4758,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
Region->emitUntiedSwitch(CGF);
};
- llvm::Value *DepWaitTaskArgs[6];
+ llvm::Value *DepWaitTaskArgs[7];
if (!Data.Dependences.empty()) {
DepWaitTaskArgs[0] = UpLoc;
DepWaitTaskArgs[1] = ThreadID;
@@ -5081,6 +4766,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepWaitTaskArgs[3] = DependenciesArray.getPointer();
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ DepWaitTaskArgs[6] =
+ llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
}
auto &M = CGM.getModule();
auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
@@ -5092,9 +4779,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
// is specified.
if (!Data.Dependences.empty())
- CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
- DepWaitTaskArgs);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_omp_taskwait_deps_51),
+ DepWaitTaskArgs);
// Call proxy_task_entry(gtid, new_task);
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -5595,7 +5282,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
};
RegionCodeGenTy RCG(CodeGen);
CommonActionTy Action(
- nullptr, llvm::None,
+ nullptr, std::nullopt,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
: OMPRTL___kmpc_end_reduce),
@@ -5717,7 +5404,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ThreadId, // i32 <gtid>
Lock // kmp_critical_name *&<lock>
};
- CommonActionTy Action(nullptr, llvm::None,
+ CommonActionTy Action(nullptr, std::nullopt,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_reduce),
EndArgs);
@@ -6142,24 +5829,26 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *NumOfElements;
std::tie(NumOfElements, DependenciesArray) =
emitDependClause(CGF, Data.Dependences, Loc);
- llvm::Value *DepWaitTaskArgs[6];
if (!Data.Dependences.empty()) {
+ llvm::Value *DepWaitTaskArgs[7];
DepWaitTaskArgs[0] = UpLoc;
DepWaitTaskArgs[1] = ThreadID;
DepWaitTaskArgs[2] = NumOfElements;
DepWaitTaskArgs[3] = DependenciesArray.getPointer();
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ DepWaitTaskArgs[6] =
+ llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
- // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
+ // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
- // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
- // is specified.
- CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
- DepWaitTaskArgs);
+ // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
+ // kmp_int32 has_no_wait); if dependence info is specified.
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_omp_taskwait_deps_51),
+ DepWaitTaskArgs);
} else {
@@ -6333,7 +6022,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
- assert(!ParentName.empty() && "Invalid target region parent name!");
+ assert(!ParentName.empty() && "Invalid target entry parent name!");
HasEmittedTargetRegion = true;
SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
@@ -6405,99 +6094,32 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
- // Create a unique name for the entry function using the source location
- // information of the current target region. The name will be something like:
- //
- // __omp_offloading_DD_FFFF_PP_lBB
- //
- // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
- // mangled name of the function that encloses the target region and BB is the
- // line number of the target region.
- const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
- !CGM.getLangOpts().OpenMPOffloadMandatory;
- unsigned DeviceID;
- unsigned FileID;
- unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
- Line);
- SmallString<64> EntryFnName;
- {
- llvm::raw_svector_ostream OS(EntryFnName);
- OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
- << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
- }
-
- const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
+ auto EntryInfo =
+ getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName);
CodeGenFunction CGF(CGM, true);
- CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-
- if (BuildOutlinedFn)
- OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
-
- // If this target outline function is not an offload entry, we don't need to
- // register it.
- if (!IsOffloadEntry)
- return;
-
- // The target region ID is used by the runtime library to identify the current
- // target region, so it only has to be unique and not necessarily point to
- // anything. It could be the pointer to the outlined function that implements
- // the target region, but we aren't using that so that the compiler doesn't
- // need to keep that, and could therefore inline the host function if proven
- // worthwhile during optimization. In the other hand, if emitting code for the
- // device, the ID has to be the function address so that it can retrieved from
- // the offloading entry and launched by the runtime library. We also mark the
- // outlined function to have external linkage in case we are emitting code for
- // the device, because these functions will be entry points to the device.
-
- if (CGM.getLangOpts().OpenMPIsDevice) {
- OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
- OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
- OutlinedFn->setDSOLocal(false);
- if (CGM.getTriple().isAMDGCN())
- OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
- } else {
- std::string Name = getName({EntryFnName, "region_id"});
- OutlinedFnID = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::WeakAnyLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), Name);
- }
-
- // If we do not allow host fallback we still need a named address to use.
- llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
- if (!BuildOutlinedFn) {
- assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
- "Named kernel already exists?");
- TargetRegionEntryAddr = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::InternalLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
- }
+ llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
+ [&CGF, &D, &CodeGen](StringRef EntryFnName) {
+ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
- // Register the information for the entry associated with this target region.
- OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
- DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
- OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
+ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
+ };
- // Add NumTeams and ThreadLimit attributes to the outlined GPU function
+ // Get NumTeams and ThreadLimit attributes
int32_t DefaultValTeams = -1;
- getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
- if (DefaultValTeams > 0 && OutlinedFn) {
- OutlinedFn->addFnAttr("omp_target_num_teams",
- std::to_string(DefaultValTeams));
- }
int32_t DefaultValThreads = -1;
+ getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
- if (DefaultValThreads > 0 && OutlinedFn) {
- OutlinedFn->addFnAttr("omp_target_thread_limit",
- std::to_string(DefaultValThreads));
- }
- if (BuildOutlinedFn)
+ OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo,
+ GenerateOutlinedFunction, DefaultValTeams,
+ DefaultValThreads, IsOffloadEntry,
+ OutlinedFn, OutlinedFnID);
+
+ if (OutlinedFn != nullptr)
CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
}
@@ -6808,10 +6430,8 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
}
if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
return CGF.Builder.getInt32(1);
- return DefaultThreadLimitVal;
}
- return DefaultThreadLimitVal ? DefaultThreadLimitVal
- : CGF.Builder.getInt32(0);
+ return DefaultThreadLimitVal;
}
const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
@@ -6954,12 +6574,14 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
return NumThreads;
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
CGF.getContext(), CS->getCapturedStmt());
+ // TODO: The standard is not clear how to resolve two thread limit clauses,
+ // let's pick the teams one if it's present, otherwise the target one.
+ const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
- if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
+ if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
+ ThreadLimitClause = TLC;
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- const auto *ThreadLimitClause =
- Dir->getSingleClause<OMPThreadLimitClause>();
CodeGenFunction::LexicalScope Scope(
CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
if (const auto *PreInit =
@@ -6974,11 +6596,15 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
}
}
}
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
- ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
- ThreadLimitVal =
- Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
}
+ }
+ if (ThreadLimitClause) {
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+ ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+ ThreadLimitVal =
+ Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
+ }
+ if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
!isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
CS = Dir->getInnermostCapturedStmt();
@@ -7029,7 +6655,10 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
ThreadLimitVal =
Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
}
- return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
+ if (llvm::Value *NumThreads =
+ getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
+ return NumThreads;
+ return Bld.getInt32(0);
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
@@ -7164,67 +6793,13 @@ LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
// code for that information.
class MappableExprsHandler {
public:
- /// Values for bit flags used to specify the mapping type for
- /// offloading.
- enum OpenMPOffloadMappingFlags : uint64_t {
- /// No flags
- OMP_MAP_NONE = 0x0,
- /// Allocate memory on the device and move data from host to device.
- OMP_MAP_TO = 0x01,
- /// Allocate memory on the device and move data from device to host.
- OMP_MAP_FROM = 0x02,
- /// Always perform the requested mapping action on the element, even
- /// if it was already mapped before.
- OMP_MAP_ALWAYS = 0x04,
- /// Delete the element from the device environment, ignoring the
- /// current reference count associated with the element.
- OMP_MAP_DELETE = 0x08,
- /// The element being mapped is a pointer-pointee pair; both the
- /// pointer and the pointee should be mapped.
- OMP_MAP_PTR_AND_OBJ = 0x10,
- /// This flags signals that the base address of an entry should be
- /// passed to the target kernel as an argument.
- OMP_MAP_TARGET_PARAM = 0x20,
- /// Signal that the runtime library has to return the device pointer
- /// in the current position for the data being mapped. Used when we have the
- /// use_device_ptr or use_device_addr clause.
- OMP_MAP_RETURN_PARAM = 0x40,
- /// This flag signals that the reference being passed is a pointer to
- /// private data.
- OMP_MAP_PRIVATE = 0x80,
- /// Pass the element to the device by value.
- OMP_MAP_LITERAL = 0x100,
- /// Implicit map
- OMP_MAP_IMPLICIT = 0x200,
- /// Close is a hint to the runtime to allocate memory close to
- /// the target device.
- OMP_MAP_CLOSE = 0x400,
- /// 0x800 is reserved for compatibility with XLC.
- /// Produce a runtime error if the data is not already allocated.
- OMP_MAP_PRESENT = 0x1000,
- // Increment and decrement a separate reference counter so that the data
- // cannot be unmapped within the associated region. Thus, this flag is
- // intended to be used on 'target' and 'target data' directives because they
- // are inherently structured. It is not intended to be used on 'target
- // enter data' and 'target exit data' directives because they are inherently
- // dynamic.
- // This is an OpenMP extension for the sake of OpenACC support.
- OMP_MAP_OMPX_HOLD = 0x2000,
- /// Signal that the runtime library should use args as an array of
- /// descriptor_dim pointers and use args_size as dims. Used when we have
- /// non-contiguous list items in target update directive
- OMP_MAP_NON_CONTIG = 0x100000000000,
- /// The 16 MSBs of the flags indicate whether the entry is member of some
- /// struct/class.
- OMP_MAP_MEMBER_OF = 0xffff000000000000,
- LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
- };
-
/// Get the offset of the OMP_MAP_MEMBER_OF field.
static unsigned getFlagMemberOffset() {
unsigned Offset = 0;
- for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
- Remain = Remain >> 1)
+ for (uint64_t Remain =
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
+ !(Remain & 1); Remain = Remain >> 1)
Offset++;
return Offset;
}
@@ -7388,6 +6963,13 @@ private:
SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
DevPointersMap;
+ /// Map between device addr declarations and their expression components.
+ /// The key value for declarations in 'this' is null.
+ llvm::DenseMap<
+ const ValueDecl *,
+ SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
+ HasDevAddrsMap;
+
/// Map between lambda declarations and their map type.
llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
@@ -7475,7 +7057,8 @@ private:
ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
OpenMPOffloadMappingFlags Bits =
- IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
+ IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
+ : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
switch (MapType) {
case OMPC_MAP_alloc:
case OMPC_MAP_release:
@@ -7485,35 +7068,36 @@ private:
// type modifiers.
break;
case OMPC_MAP_to:
- Bits |= OMP_MAP_TO;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
break;
case OMPC_MAP_from:
- Bits |= OMP_MAP_FROM;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
break;
case OMPC_MAP_tofrom:
- Bits |= OMP_MAP_TO | OMP_MAP_FROM;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM;
break;
case OMPC_MAP_delete:
- Bits |= OMP_MAP_DELETE;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
break;
case OMPC_MAP_unknown:
llvm_unreachable("Unexpected map type!");
}
if (AddPtrFlag)
- Bits |= OMP_MAP_PTR_AND_OBJ;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
if (AddIsTargetParamFlag)
- Bits |= OMP_MAP_TARGET_PARAM;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
- Bits |= OMP_MAP_ALWAYS;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
- Bits |= OMP_MAP_CLOSE;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
- Bits |= OMP_MAP_PRESENT;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
- Bits |= OMP_MAP_OMPX_HOLD;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
if (IsNonContiguous)
- Bits |= OMP_MAP_NON_CONTIG;
+ Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
return Bits;
}
@@ -7570,7 +7154,7 @@ private:
const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
- OverlappedElements = llvm::None) const {
+ OverlappedElements = std::nullopt) const {
// The following summarizes what has to be generated for each map and the
// types below. The generated information is expressed in this order:
// base pointer, section pointer, size, flags
@@ -7780,10 +7364,11 @@ private:
BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
if (const auto *VD =
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
- if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
RequiresReference = true;
BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
@@ -7998,7 +7583,7 @@ private:
std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
// Emit data for non-overlapped data.
OpenMPOffloadMappingFlags Flags =
- OMP_MAP_MEMBER_OF |
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
/*AddPtrFlag=*/false,
/*AddIsTargetParamFlag=*/false, IsNonContiguous);
@@ -8084,13 +7669,16 @@ private:
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
// then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
if (IsPointer || (IsMemberReference && Next != CE))
- Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
- OMP_MAP_DELETE | OMP_MAP_CLOSE);
+ Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM |
+ OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
+ OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
+ OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
if (ShouldBeMemberOf) {
// Set placeholder value MEMBER_OF=FFFF to indicate that the flag
// should be later updated with the correct value of MEMBER_OF.
- Flags |= OMP_MAP_MEMBER_OF;
+ Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
// From now on, all subsequent PTR_AND_OBJ entries should not be
// marked as MEMBER_OF.
ShouldBeMemberOf = false;
@@ -8337,7 +7925,7 @@ private:
/// Return the adjusted map modifiers if the declaration a capture refers to
/// appears in a first-private clause. This is expected to be used only with
/// directives that start with 'target'.
- MappableExprsHandler::OpenMPOffloadMappingFlags
+ OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
assert(Cap.capturesVariable() && "Expected capture by reference only!");
@@ -8346,22 +7934,22 @@ private:
// declaration is known as first-private in this handler.
if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
if (Cap.getCapturedVar()->getType()->isAnyPointerType())
- return MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
- return MappableExprsHandler::OMP_MAP_PRIVATE |
- MappableExprsHandler::OMP_MAP_TO;
+ return OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+ return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
+ OpenMPOffloadMappingFlags::OMP_MAP_TO;
}
auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
if (I != LambdasMap.end())
// for map(to: lambda): using user specified map type.
return getMapTypeBits(
I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
- /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
+ /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
/*AddPtrFlag=*/false,
/*AddIsTargetParamFlag=*/false,
/*isNonContiguous=*/false);
- return MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM;
+ return OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM;
}
static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
@@ -8375,13 +7963,16 @@ private:
// If the entry is PTR_AND_OBJ but has not been marked with the special
// placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
// marked as MEMBER_OF.
- if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
- ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
+ if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
return;
// Reset the placeholder value to prepare the flag for the assignment of the
// proper MEMBER_OF value.
- Flags &= ~OMP_MAP_MEMBER_OF;
+ Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
Flags |= MemberOfFlag;
}
@@ -8500,7 +8091,7 @@ private:
for (const auto L : C->component_lists()) {
const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
- C->getMapTypeModifiers(), llvm::None,
+ C->getMapTypeModifiers(), std::nullopt,
/*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
E);
++EI;
@@ -8516,7 +8107,7 @@ private:
Kind = Present;
const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
- InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
+ InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
C->isImplicit(), std::get<2>(L), *EI);
++EI;
@@ -8532,56 +8123,101 @@ private:
Kind = Present;
const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
- InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
- C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
- C->isImplicit(), std::get<2>(L), *EI);
+ InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
+ std::nullopt, C->getMotionModifiers(),
+ /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
+ *EI);
++EI;
}
}
- // Look at the use_device_ptr clause information and mark the existing map
- // entries as such. If there is no map information for an entry in the
- // use_device_ptr list, we create one with map type 'alloc' and zero size
- // section. It is the user fault if that was not mapped before. If there is
- // no map information and the pointer is a struct member, then we defer the
- // emission of that entry until the whole struct has been processed.
+ // Look at the use_device_ptr and use_device_addr clauses information and
+ // mark the existing map entries as such. If there is no map information for
+ // an entry in the use_device_ptr and use_device_addr list, we create one
+ // with map type 'alloc' and zero size section. It is the user fault if that
+ // was not mapped before. If there is no map information and the pointer is
+ // a struct member, then we defer the emission of that entry until the whole
+ // struct has been processed.
llvm::MapVector<CanonicalDeclPtr<const Decl>,
SmallVector<DeferredDevicePtrEntryTy, 4>>
DeferredInfo;
- MapCombinedInfoTy UseDevicePtrCombinedInfo;
+ MapCombinedInfoTy UseDeviceDataCombinedInfo;
- for (const auto *Cl : Clauses) {
- const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
- if (!C)
- continue;
- for (const auto L : C->component_lists()) {
- OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
- std::get<1>(L);
- assert(!Components.empty() &&
- "Not expecting empty list of components!");
- const ValueDecl *VD = Components.back().getAssociatedDeclaration();
- VD = cast<ValueDecl>(VD->getCanonicalDecl());
- const Expr *IE = Components.back().getAssociatedExpression();
- // If the first component is a member expression, we have to look into
- // 'this', which maps to null in the map of map information. Otherwise
- // look directly for the information.
- auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
+ auto &&UseDeviceDataCombinedInfoGen =
+ [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
+ CodeGenFunction &CGF) {
+ UseDeviceDataCombinedInfo.Exprs.push_back(VD);
+ UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD);
+ UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
+ UseDeviceDataCombinedInfo.Sizes.push_back(
+ llvm::Constant::getNullValue(CGF.Int64Ty));
+ UseDeviceDataCombinedInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
+ UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
+ };
+
+ auto &&MapInfoGen =
+ [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
+ &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef
+ Components,
+ bool IsImplicit, bool IsDevAddr) {
+ // We didn't find any match in our map information - generate a zero
+ // size array section - if the pointer is a struct member we defer
+ // this action until the whole struct has been processed.
+ if (isa<MemberExpr>(IE)) {
+ // Insert the pointer into Info to be processed by
+ // generateInfoForComponentList. Because it is a member pointer
+ // without a pointee, no entry will be generated for it, therefore
+ // we need to generate one after the whole struct has been
+ // processed. Nonetheless, generateInfoForComponentList must be
+ // called to take the pointer into account for the calculation of
+ // the range of the partial struct.
+ InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
+ std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
+ nullptr, nullptr, IsDevAddr);
+ DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
+ } else {
+ llvm::Value *Ptr;
+ if (IsDevAddr) {
+ if (IE->isGLValue())
+ Ptr = CGF.EmitLValue(IE).getPointer(CGF);
+ else
+ Ptr = CGF.EmitScalarExpr(IE);
+ } else {
+ Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
+ }
+ UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
+ }
+ };
- // We potentially have map information for this declaration already.
- // Look for the first set of components that refer to it.
- if (It != Info.end()) {
- bool Found = false;
- for (auto &Data : It->second) {
- auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
- return MI.Components.back().getAssociatedDeclaration() == VD;
- });
- // If we found a map entry, signal that the pointer has to be
- // returned and move on to the next declaration. Exclude cases where
- // the base pointer is mapped as array subscript, array section or
- // array shaping. The base address is passed as a pointer to base in
- // this case and cannot be used as a base for use_device_ptr list
- // item.
- if (CI != Data.end()) {
+ auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
+ const Expr *IE, bool IsDevAddr) -> bool {
+ // We potentially have map information for this declaration already.
+ // Look for the first set of components that refer to it. If found,
+ // return true.
+ // If the first component is a member expression, we have to look into
+ // 'this', which maps to null in the map of map information. Otherwise
+ // look directly for the information.
+ auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
+ if (It != Info.end()) {
+ bool Found = false;
+ for (auto &Data : It->second) {
+ auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
+ return MI.Components.back().getAssociatedDeclaration() == VD;
+ });
+ // If we found a map entry, signal that the pointer has to be
+ // returned and move on to the next declaration. Exclude cases where
+ // the base pointer is mapped as array subscript, array section or
+ // array shaping. The base address is passed as a pointer to base in
+ // this case and cannot be used as a base for use_device_ptr list
+ // item.
+ if (CI != Data.end()) {
+ if (IsDevAddr) {
+ CI->ReturnDevicePointer = true;
+ Found = true;
+ break;
+ } else {
auto PrevCI = std::next(CI->Components.rbegin());
const auto *VarD = dyn_cast<VarDecl>(VD);
if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
@@ -8596,51 +8232,45 @@ private:
}
}
}
- if (Found)
- continue;
- }
-
- // We didn't find any match in our map information - generate a zero
- // size array section - if the pointer is a struct member we defer this
- // action until the whole struct has been processed.
- if (isa<MemberExpr>(IE)) {
- // Insert the pointer into Info to be processed by
- // generateInfoForComponentList. Because it is a member pointer
- // without a pointee, no entry will be generated for it, therefore
- // we need to generate one after the whole struct has been processed.
- // Nonetheless, generateInfoForComponentList must be called to take
- // the pointer into account for the calculation of the range of the
- // partial struct.
- InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
- llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
- nullptr);
- DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
- } else {
- llvm::Value *Ptr =
- CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
- UseDevicePtrCombinedInfo.Exprs.push_back(VD);
- UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
- UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
- UseDevicePtrCombinedInfo.Sizes.push_back(
- llvm::Constant::getNullValue(CGF.Int64Ty));
- UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
- UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
}
+ return Found;
}
- }
+ return false;
+ };
- // Look at the use_device_addr clause information and mark the existing map
+ // Look at the use_device_ptr clause information and mark the existing map
// entries as such. If there is no map information for an entry in the
- // use_device_addr list, we create one with map type 'alloc' and zero size
+ // use_device_ptr list, we create one with map type 'alloc' and zero size
// section. It is the user fault if that was not mapped before. If there is
// no map information and the pointer is a struct member, then we defer the
// emission of that entry until the whole struct has been processed.
+ for (const auto *Cl : Clauses) {
+ const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
+ if (!C)
+ continue;
+ for (const auto L : C->component_lists()) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
+ std::get<1>(L);
+ assert(!Components.empty() &&
+ "Not expecting empty list of components!");
+ const ValueDecl *VD = Components.back().getAssociatedDeclaration();
+ VD = cast<ValueDecl>(VD->getCanonicalDecl());
+ const Expr *IE = Components.back().getAssociatedExpression();
+ if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
+ continue;
+ MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
+ /*IsDevAddr=*/false);
+ }
+ }
+
llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
for (const auto *Cl : Clauses) {
const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
if (!C)
continue;
for (const auto L : C->component_lists()) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
+ std::get<1>(L);
assert(!std::get<1>(L).empty() &&
"Not expecting empty list of components!");
const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
@@ -8648,60 +8278,10 @@ private:
continue;
VD = cast<ValueDecl>(VD->getCanonicalDecl());
const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
- // If the first component is a member expression, we have to look into
- // 'this', which maps to null in the map of map information. Otherwise
- // look directly for the information.
- auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
-
- // We potentially have map information for this declaration already.
- // Look for the first set of components that refer to it.
- if (It != Info.end()) {
- bool Found = false;
- for (auto &Data : It->second) {
- auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
- return MI.Components.back().getAssociatedDeclaration() == VD;
- });
- // If we found a map entry, signal that the pointer has to be
- // returned and move on to the next declaration.
- if (CI != Data.end()) {
- CI->ReturnDevicePointer = true;
- Found = true;
- break;
- }
- }
- if (Found)
- continue;
- }
-
- // We didn't find any match in our map information - generate a zero
- // size array section - if the pointer is a struct member we defer this
- // action until the whole struct has been processed.
- if (isa<MemberExpr>(IE)) {
- // Insert the pointer into Info to be processed by
- // generateInfoForComponentList. Because it is a member pointer
- // without a pointee, no entry will be generated for it, therefore
- // we need to generate one after the whole struct has been processed.
- // Nonetheless, generateInfoForComponentList must be called to take
- // the pointer into account for the calculation of the range of the
- // partial struct.
- InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
- llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
- nullptr, nullptr, /*ForDeviceAddr=*/true);
- DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
- } else {
- llvm::Value *Ptr;
- if (IE->isGLValue())
- Ptr = CGF.EmitLValue(IE).getPointer(CGF);
- else
- Ptr = CGF.EmitScalarExpr(IE);
- CombinedInfo.Exprs.push_back(VD);
- CombinedInfo.BasePointers.emplace_back(Ptr, VD);
- CombinedInfo.Pointers.push_back(Ptr);
- CombinedInfo.Sizes.push_back(
- llvm::Constant::getNullValue(CGF.Int64Ty));
- CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
- CombinedInfo.Mappers.push_back(nullptr);
- }
+ if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
+ continue;
+ MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
+ /*IsDevAddr=*/true);
}
}
@@ -8738,7 +8318,8 @@ private:
CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
RelevantVD);
- CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
+ CurInfo.Types[CurrentBasePointersIdx] |=
+ OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
}
}
}
@@ -8759,7 +8340,9 @@ private:
// Entry is RETURN_PARAM. Also, set the placeholder value
// MEMBER_OF=FFFF so that the entry is later updated with the
// correct value of MEMBER_OF.
- CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
+ CurInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
} else {
BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
@@ -8767,8 +8350,10 @@ private:
// Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
// placeholder value MEMBER_OF=FFFF so that the entry is later
// updated with the correct value of MEMBER_OF.
- CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
- OMP_MAP_MEMBER_OF);
+ CurInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
+ OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
}
CurInfo.Exprs.push_back(L.VD);
CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
@@ -8790,7 +8375,7 @@ private:
CombinedInfo.append(CurInfo);
}
// Append data for use_device_ptr clauses.
- CombinedInfo.append(UseDevicePtrCombinedInfo);
+ CombinedInfo.append(UseDeviceDataCombinedInfo);
}
public:
@@ -8818,6 +8403,10 @@ public:
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
for (auto L : C->component_lists())
DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
+ // Extract device addr clause information.
+ for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
+ for (auto L : C->component_lists())
+ HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
// Extract map information.
for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
if (C->getMapType() != OMPC_MAP_to)
@@ -8848,7 +8437,8 @@ public:
const ValueDecl *VD = nullptr,
bool NotTargetParams = true) const {
if (CurTypes.size() == 1 &&
- ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
+ ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
!PartialStruct.IsArraySection)
return;
Address LBAddr = PartialStruct.LowestElem.second;
@@ -8862,31 +8452,53 @@ public:
CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
// Pointer is the address of the lowest element
llvm::Value *LB = LBAddr.getPointer();
- CombinedInfo.Pointers.push_back(LB);
+ const CXXMethodDecl *MD =
+ CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
+ const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
+ bool HasBaseClass = RD ? RD->getNumBases() > 0 : false;
// There should not be a mapper for a combined entry.
+ if (HasBaseClass) {
+ // OpenMP 5.2 148:21:
+ // If the target construct is within a class non-static member function,
+ // and a variable is an accessible data member of the object for which the
+ // non-static data member function is invoked, the variable is treated as
+ // if the this[:1] expression had appeared in a map clause with a map-type
+ // of tofrom.
+ // Emit this[:1]
+ CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
+ QualType Ty = MD->getThisType()->getPointeeType();
+ llvm::Value *Size =
+ CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
+ /*isSigned=*/true);
+ CombinedInfo.Sizes.push_back(Size);
+ } else {
+ CombinedInfo.Pointers.push_back(LB);
+ // Size is (addr of {highest+1} element) - (addr of lowest element)
+ llvm::Value *HB = HBAddr.getPointer();
+ llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
+ HBAddr.getElementType(), HB, /*Idx0=*/1);
+ llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
+ llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
+ llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
+ llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
+ /*isSigned=*/false);
+ CombinedInfo.Sizes.push_back(Size);
+ }
CombinedInfo.Mappers.push_back(nullptr);
- // Size is (addr of {highest+1} element) - (addr of lowest element)
- llvm::Value *HB = HBAddr.getPointer();
- llvm::Value *HAddr =
- CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
- llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
- llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
- llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
- llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
- /*isSigned=*/false);
- CombinedInfo.Sizes.push_back(Size);
// Map type is always TARGET_PARAM, if generate info for captures.
- CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
- : OMP_MAP_TARGET_PARAM);
+ CombinedInfo.Types.push_back(
+ NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
+ : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
// If any element has the present modifier, then make sure the runtime
// doesn't attempt to allocate the struct.
if (CurTypes.end() !=
llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
- return Type & OMP_MAP_PRESENT;
+ return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
}))
- CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
+ CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
// Remove TARGET_PARAM flag from the first element
- (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
+ (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
// If any element has the ompx_hold modifier, then make sure the runtime
// uses the hold reference count for the struct as a whole so that it won't
// be unmapped by an extra dynamic reference count decrement. Add it to all
@@ -8895,11 +8507,12 @@ public:
// individual elements.
if (CurTypes.end() !=
llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
- return Type & OMP_MAP_OMPX_HOLD;
+ return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
})) {
- CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
+ CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
for (auto &M : CurTypes)
- M |= OMP_MAP_OMPX_HOLD;
+ M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
}
// All other current entries will be MEMBER_OF the combined entry
@@ -8947,7 +8560,7 @@ public:
Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
CGF.getContext().getDeclAlign(VD));
LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
- llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
FieldDecl *ThisCapture = nullptr;
RD->getCaptureFields(Captures, ThisCapture);
if (ThisCapture) {
@@ -8962,14 +8575,17 @@ public:
CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
CGF.Int64Ty, /*isSigned=*/true));
- CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
- OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ CombinedInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
+ OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
CombinedInfo.Mappers.push_back(nullptr);
}
for (const LambdaCapture &LC : RD->captures()) {
if (!LC.capturesVariable())
continue;
- const VarDecl *VD = LC.getCapturedVar();
+ const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
continue;
auto It = Captures.find(VD);
@@ -8995,8 +8611,11 @@ public:
CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
}
- CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
- OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ CombinedInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
+ OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
CombinedInfo.Mappers.push_back(nullptr);
}
}
@@ -9008,8 +8627,10 @@ public:
MapFlagsArrayTy &Types) const {
for (unsigned I = 0, E = Types.size(); I < E; ++I) {
// Set correct member_of idx for all implicit lambda captures.
- if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
- OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
+ if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
+ OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+ OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
continue;
llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
assert(BasePtr && "Unable to find base lambda address.");
@@ -9051,7 +8672,7 @@ public:
// If this declaration appears in a is_device_ptr clause we just have to
// pass the pointer by value. If it is a reference to a declaration, we just
// pass its value.
- if (DevPointersMap.count(VD)) {
+ if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
CombinedInfo.Exprs.push_back(VD);
CombinedInfo.BasePointers.emplace_back(Arg, VD);
CombinedInfo.Pointers.push_back(Arg);
@@ -9059,8 +8680,10 @@ public:
CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
/*isSigned=*/true));
CombinedInfo.Types.push_back(
- (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
- OMP_MAP_TARGET_PARAM);
+ (Cap->capturesVariable()
+ ? OpenMPOffloadMappingFlags::OMP_MAP_TO
+ : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) |
+ OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
CombinedInfo.Mappers.push_back(nullptr);
return;
}
@@ -9070,6 +8693,21 @@ public:
OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
const ValueDecl *, const Expr *>;
SmallVector<MapData, 4> DeclComponentLists;
+ // For member fields list in is_device_ptr, store it in
+ // DeclComponentLists for generating components info.
+ static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
+ auto It = DevPointersMap.find(VD);
+ if (It != DevPointersMap.end())
+ for (const auto &MCL : It->second)
+ DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
+ /*IsImpicit = */ true, nullptr,
+ nullptr);
+ auto I = HasDevAddrsMap.find(VD);
+ if (I != HasDevAddrsMap.end())
+ for (const auto &MCL : I->second)
+ DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
+ /*IsImpicit = */ true, nullptr,
+ nullptr);
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
@@ -9123,7 +8761,7 @@ public:
std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
L;
++Count;
- for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
+ for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
VarRef) = L1;
@@ -9243,7 +8881,7 @@ public:
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
OverlappedComponents = Pair.getSecond();
generateInfoForComponentList(
- MapType, MapModifiers, llvm::None, Components, CombinedInfo,
+ MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
/*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
IsFirstComponentList = false;
@@ -9260,7 +8898,7 @@ public:
L;
auto It = OverlappedData.find(&L);
if (It == OverlappedData.end())
- generateInfoForComponentList(MapType, MapModifiers, llvm::None,
+ generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
Components, CombinedInfo, PartialStruct,
IsFirstComponentList, IsImplicit, Mapper,
/*ForDeviceAddr=*/false, VD, VarRef);
@@ -9284,7 +8922,8 @@ public:
CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
CGF.Int64Ty, /*isSigned=*/true));
// Default map type.
- CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
+ CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM);
} else if (CI.capturesVariableByCopy()) {
const VarDecl *VD = CI.getCapturedVar();
CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
@@ -9293,13 +8932,14 @@ public:
if (!RI.getType()->isAnyPointerType()) {
// We have to signal to the runtime captures passed by value that are
// not pointers.
- CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
+ CombinedInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
} else {
// Pointers are implicitly mapped with a zero size and no flags
// (other than first map that is added for all implicit maps).
- CombinedInfo.Types.push_back(OMP_MAP_NONE);
+ CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
}
auto I = FirstPrivateDecls.find(VD);
@@ -9331,11 +8971,12 @@ public:
IsImplicit = I->getSecond();
}
// Every default map produces a single argument which is a target parameter.
- CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
+ CombinedInfo.Types.back() |=
+ OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
// Add flag stating this is an implicit map.
if (IsImplicit)
- CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
+ CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
// No user-defined mapper for default mapping.
CombinedInfo.Mappers.push_back(nullptr);
@@ -9404,7 +9045,7 @@ static void emitNonContiguousDescriptor(
DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.PointersArray, 0, I);
+ Info.RTArgs.PointersArray, 0, I);
Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
++L;
@@ -9482,13 +9123,13 @@ static void emitOffloadingArrays(
Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- Info.BasePointersArray =
+ Info.RTArgs.BasePointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
- Info.PointersArray =
+ Info.RTArgs.PointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
Address MappersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
- Info.MappersArray = MappersArray.getPointer();
+ Info.RTArgs.MappersArray = MappersArray.getPointer();
// If we don't have any VLA types or other types that require runtime
// evaluation, we can use a constant array for the map sizes, otherwise we
@@ -9501,8 +9142,10 @@ static void emitOffloadingArrays(
for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
- if (IsNonContiguous && (CombinedInfo.Types[I] &
- MappableExprsHandler::OMP_MAP_NON_CONTIG))
+ if (IsNonContiguous &&
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ CombinedInfo.Types[I] &
+ OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
ConstSizes[I] = llvm::ConstantInt::get(
CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
else
@@ -9517,7 +9160,7 @@ static void emitOffloadingArrays(
QualType SizeArrayType = Ctx.getConstantArrayType(
Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- Info.SizesArray =
+ Info.RTArgs.SizesArray =
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
} else {
auto *SizesArrayInit = llvm::ConstantArray::get(
@@ -9541,26 +9184,29 @@ static void emitOffloadingArrays(
CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
/*DestWidth=*/64, /*Signed=*/false))),
CGF.getTypeSize(SizeArrayType));
- Info.SizesArray = Buffer.getPointer();
+ Info.RTArgs.SizesArray = Buffer.getPointer();
} else {
- Info.SizesArray = SizesArrayGbl;
+ Info.RTArgs.SizesArray = SizesArrayGbl;
}
}
// The map types are always constant so we don't need to generate code to
// fill arrays. Instead, we create an array constant.
- SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
- llvm::copy(CombinedInfo.Types, Mapping.begin());
+ SmallVector<uint64_t, 4> Mapping;
+ for (auto mapFlag : CombinedInfo.Types)
+ Mapping.push_back(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ mapFlag));
std::string MaptypesName =
CGM.getOpenMPRuntime().getName({"offload_maptypes"});
auto *MapTypesArrayGbl =
OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
- Info.MapTypesArray = MapTypesArrayGbl;
+ Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
// The information types are only built if there is debug information
// requested.
if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
- Info.MapNamesArray = llvm::Constant::getNullValue(
+ Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue(
llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
} else {
auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
@@ -9572,7 +9218,7 @@ static void emitOffloadingArrays(
CGM.getOpenMPRuntime().getName({"offload_mapnames"});
auto *MapNamesArrayGbl =
OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
- Info.MapNamesArray = MapNamesArrayGbl;
+ Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
}
// If there's a present map type modifier, it must not be applied to the end
@@ -9580,15 +9226,19 @@ static void emitOffloadingArrays(
if (Info.separateBeginEndCalls()) {
bool EndMapTypesDiffer = false;
for (uint64_t &Type : Mapping) {
- if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
- Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
+ if (Type &
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
+ Type &=
+ ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
EndMapTypesDiffer = true;
}
}
if (EndMapTypesDiffer) {
MapTypesArrayGbl =
OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
- Info.MapTypesArrayEnd = MapTypesArrayGbl;
+ Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
}
}
@@ -9596,7 +9246,7 @@ static void emitOffloadingArrays(
llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.BasePointersArray, 0, I);
+ Info.RTArgs.BasePointersArray, 0, I);
BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address BPAddr(BP, BPVal->getType(),
@@ -9611,7 +9261,7 @@ static void emitOffloadingArrays(
llvm::Value *PVal = CombinedInfo.Pointers[I];
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.PointersArray, 0, I);
+ Info.RTArgs.PointersArray, 0, I);
P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
@@ -9620,7 +9270,7 @@ static void emitOffloadingArrays(
if (RuntimeSizes.test(I)) {
llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
- Info.SizesArray,
+ Info.RTArgs.SizesArray,
/*Idx0=*/0,
/*Idx1=*/I);
Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
@@ -9650,76 +9300,6 @@ static void emitOffloadingArrays(
emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
}
-namespace {
-/// Additional arguments for emitOffloadingArraysArgument function.
-struct ArgumentsOptions {
- bool ForEndCall = false;
- ArgumentsOptions() = default;
- ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
-};
-} // namespace
-
-/// Emit the arguments to be passed to the runtime library based on the
-/// arrays of base pointers, pointers, sizes, map types, and mappers. If
-/// ForEndCall, emit map types to be passed for the end of the region instead of
-/// the beginning.
-static void emitOffloadingArraysArgument(
- CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
- llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
- llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
- llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
- const ArgumentsOptions &Options = ArgumentsOptions()) {
- assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
- "expected region end call to runtime only when end call is separate");
- CodeGenModule &CGM = CGF.CGM;
- if (Info.NumberOfPtrs) {
- BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.BasePointersArray,
- /*Idx0=*/0, /*Idx1=*/0);
- PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.PointersArray,
- /*Idx0=*/0,
- /*Idx1=*/0);
- SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
- /*Idx0=*/0, /*Idx1=*/0);
- MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
- Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
- : Info.MapTypesArray,
- /*Idx0=*/0,
- /*Idx1=*/0);
-
- // Only emit the mapper information arrays if debug information is
- // requested.
- if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
- MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- else
- MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
- Info.MapNamesArray,
- /*Idx0=*/0,
- /*Idx1=*/0);
- // If there is no user-defined mapper, set the mapper array to nullptr to
- // avoid an unnecessary data privatization
- if (!Info.HasMapper)
- MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- else
- MappersArrayArg =
- CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
- } else {
- BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
- MapTypesArrayArg =
- llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
- MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
- }
-}
-
/// Check for inner distribute directive.
static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
@@ -9999,7 +9579,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
: emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
// Extract the MEMBER_OF field from the map type.
- llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
+ llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ Info.Types[I]));
llvm::Value *MemberMapType =
MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
@@ -10017,8 +9599,10 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
// tofrom | alloc | to | from | tofrom | release | delete
llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
MapType,
- MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
llvm::BasicBlock *AllocElseBB =
MapperCGF.createBasicBlock("omp.type.alloc.else");
@@ -10032,30 +9616,40 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
MapperCGF.EmitBlock(AllocBB);
llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
MemberMapType,
- MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM)));
+ MapperCGF.Builder.getInt64(
+ ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
MapperCGF.Builder.CreateBr(EndBB);
MapperCGF.EmitBlock(AllocElseBB);
llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
LeftToFrom,
- MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
+ MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_TO)));
MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
// In case of to, clear OMP_MAP_FROM.
MapperCGF.EmitBlock(ToBB);
llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
MemberMapType,
- MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.getInt64(
+ ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
MapperCGF.Builder.CreateBr(EndBB);
MapperCGF.EmitBlock(ToElseBB);
llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
LeftToFrom,
- MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
// In case of from, clear OMP_MAP_TO.
MapperCGF.EmitBlock(FromBB);
llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
MemberMapType,
- MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
+ MapperCGF.Builder.getInt64(
+ ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_TO)));
// In case of tofrom, do nothing.
MapperCGF.EmitBlock(EndBB);
LastBB = EndBB;
@@ -10130,7 +9724,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
MapType,
- MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
+ MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
llvm::Value *DeleteCond;
llvm::Value *Cond;
if (IsInit) {
@@ -10139,7 +9735,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
// IsPtrAndObj?
llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
MapType,
- MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
+ MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
@@ -10162,11 +9760,15 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
// memory allocation/deletion purpose only.
llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
MapType,
- MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM)));
+ MapperCGF.Builder.getInt64(
+ ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
MapTypeArg = MapperCGF.Builder.CreateOr(
MapTypeArg,
- MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
+ MapperCGF.Builder.getInt64(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
// Call the runtime API __tgt_push_mapper_component to fill up the runtime
// data structure.
@@ -10301,9 +9903,29 @@ void CGOpenMPRuntime::emitTargetCall(
llvm::Value *NumIterations =
emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+ llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
+ if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
+ CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
+ llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
+ DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
+ DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
+ /*isSigned=*/false);
+ }
+
+ llvm::Value *ZeroArray =
+ llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3));
+
+ bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+ llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait);
+
+ llvm::Value *NumTeams3D =
+ CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0});
+ llvm::Value *NumThreads3D =
+ CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0});
+
// Arguments for the target kernel.
SmallVector<llvm::Value *> KernelArgs{
- CGF.Builder.getInt32(/* Version */ 1),
+ CGF.Builder.getInt32(/* Version */ 2),
PointerNum,
InputInfo.BasePointersArray.getPointer(),
InputInfo.PointersArray.getPointer(),
@@ -10311,18 +9933,13 @@ void CGOpenMPRuntime::emitTargetCall(
MapTypesArray,
MapNamesArray,
InputInfo.MappersArray.getPointer(),
- NumIterations};
-
- // Arguments passed to the 'nowait' variant.
- SmallVector<llvm::Value *> NoWaitKernelArgs{
- CGF.Builder.getInt32(0),
- llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
- CGF.Builder.getInt32(0),
- llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ NumIterations,
+ Flags,
+ NumTeams3D,
+ NumThreads3D,
+ DynCGroupMem,
};
- bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
-
// The target region is an outlined function launched by the runtime
// via calls to __tgt_target_kernel().
//
@@ -10336,13 +9953,9 @@ void CGOpenMPRuntime::emitTargetCall(
// __tgt_target_teams() launches a GPU kernel with the requested number
// of teams and threads so no additional calls to the runtime are required.
// Check the error code and execute the host version if required.
- CGF.Builder.restoreIP(
- HasNoWait ? OMPBuilder.emitTargetKernel(
- CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
- NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
- : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
- DeviceID, NumTeams, NumThreads,
- OutlinedFnID, KernelArgs));
+ CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel(
+ CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads,
+ OutlinedFnID, KernelArgs));
llvm::BasicBlock *OffloadFailedBlock =
CGF.createBasicBlock("omp_offload.failed");
@@ -10392,9 +10005,10 @@ void CGOpenMPRuntime::emitTargetCall(
CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
// Copy to the device as an argument. No need to retrieve it.
- CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
- MappableExprsHandler::OMP_MAP_TARGET_PARAM |
- MappableExprsHandler::OMP_MAP_IMPLICIT);
+ CurInfo.Types.push_back(
+ OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+ OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
CurInfo.Mappers.push_back(nullptr);
} else {
// If we have any information in the map clause, we use it, otherwise we
@@ -10441,25 +10055,26 @@ void CGOpenMPRuntime::emitTargetCall(
// weren't referenced within the construct.
MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
- TargetDataInfo Info;
+ CGOpenMPRuntime::TargetDataInfo Info;
// Fill up the arrays and create the arguments.
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
- emitOffloadingArraysArgument(
- CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
- {/*ForEndCall=*/false});
+ bool EmitDebug =
+ CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
+ OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+ EmitDebug,
+ /*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
- InputInfo.BasePointersArray =
- Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- InputInfo.PointersArray =
- Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+ CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
+ CGM.getPointerAlign());
InputInfo.SizesArray =
- Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+ Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
InputInfo.MappersArray =
- Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- MapTypesArray = Info.MapTypesArray;
- MapNamesArray = Info.MapNamesArray;
+ Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ MapTypesArray = Info.RTArgs.MapTypesArray;
+ MapNamesArray = Info.RTArgs.MapNamesArray;
if (RequiresOuterTask)
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
else
@@ -10506,16 +10121,12 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (RequiresDeviceCodegen) {
const auto &E = *cast<OMPExecutableDirective>(S);
- unsigned DeviceID;
- unsigned FileID;
- unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
- FileID, Line);
+ auto EntryInfo =
+ getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName);
// Is this a target region that should not be emitted as an entry point? If
// so just signal we are done with this target region.
- if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
- ParentName, Line))
+ if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo))
return;
switch (E.getDirectiveKind()) {
@@ -10645,7 +10256,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
}
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
- Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
OMPDeclareTargetDeclAttr::getDeviceType(VD);
if (!DevTy)
return false;
@@ -10710,11 +10321,12 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
}
// Do not to emit variable if it is not marked as declare target.
- llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
cast<VarDecl>(GD.getDecl()));
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
HasRequiresUnifiedSharedMemory)) {
DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
return true;
@@ -10729,12 +10341,12 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
return;
// If we have host/nohost variables, they do not need to be registered.
- Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
OMPDeclareTargetDeclAttr::getDeviceType(VD);
if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
return;
- llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res) {
if (CGM.getLangOpts().OpenMPIsDevice) {
@@ -10746,20 +10358,22 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
return;
}
// Register declare target variables.
- OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
+ llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
StringRef VarName;
- CharUnits VarSize;
+ int64_t VarSize;
llvm::GlobalValue::LinkageTypes Linkage;
- if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
!HasRequiresUnifiedSharedMemory) {
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+ Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
VarName = CGM.getMangledName(VD);
if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
- VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
- assert(!VarSize.isZero() && "Expected non-zero size of the variable");
+ VarSize =
+ CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity();
+ assert(VarSize != 0 && "Expected non-zero size of the variable");
} else {
- VarSize = CharUnits::Zero();
+ VarSize = 0;
}
Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
// Temp solution to prevent optimizations of the internal variables.
@@ -10771,7 +10385,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
std::string RefName = getName({VarName, "ref"});
if (!CGM.GetGlobalValue(RefName)) {
llvm::Constant *AddrRef =
- getOrCreateInternalVariable(Addr->getType(), RefName);
+ OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName);
auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
GVAddrRef->setConstant(/*Val=*/true);
GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
@@ -10781,13 +10395,14 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
}
} else {
assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
HasRequiresUnifiedSharedMemory)) &&
"Declare target attribute must link or to with unified memory.");
if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
else
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+ Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
if (CGM.getLangOpts().OpenMPIsDevice) {
VarName = Addr->getName();
@@ -10796,7 +10411,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
VarName = getAddrOfDeclareTargetVar(VD).getName();
Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
}
- VarSize = CGM.getPointerSize();
+ VarSize = CGM.getPointerSize().getQuantity();
Linkage = llvm::GlobalValue::WeakAnyLinkage;
}
@@ -10814,16 +10429,18 @@ bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
void CGOpenMPRuntime::emitDeferredTargetDecls() const {
for (const VarDecl *VD : DeferredGlobalVariables) {
- llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res)
continue;
- if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
!HasRequiresUnifiedSharedMemory) {
CGM.EmitGlobal(VD);
} else {
assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
HasRequiresUnifiedSharedMemory)) &&
"Expected link clause or to clause with unified memory.");
(void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
@@ -10841,6 +10458,7 @@ void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
for (const OMPClause *Clause : D->clauselists()) {
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
HasRequiresUnifiedSharedMemory = true;
+ OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
} else if (const auto *AC =
dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
switch (AC->getAtomicDefaultMemOrderKind()) {
@@ -11025,7 +10643,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
void CGOpenMPRuntime::emitTargetDataCalls(
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
- const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
+ const Expr *Device, const RegionCodeGenTy &CodeGen,
+ CGOpenMPRuntime::TargetDataInfo &Info) {
if (!CGF.HaveInsertPoint())
return;
@@ -11049,15 +10668,11 @@ void CGOpenMPRuntime::emitTargetDataCalls(
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
/*IsNonContiguous=*/true);
- llvm::Value *BasePointersArrayArg = nullptr;
- llvm::Value *PointersArrayArg = nullptr;
- llvm::Value *SizesArrayArg = nullptr;
- llvm::Value *MapTypesArrayArg = nullptr;
- llvm::Value *MapNamesArrayArg = nullptr;
- llvm::Value *MappersArrayArg = nullptr;
- emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg,
- MapNamesArrayArg, MappersArrayArg, Info);
+ llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
+ bool EmitDebug =
+ CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
+ OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
+ EmitDebug);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -11077,12 +10692,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
llvm::Value *OffloadingArgs[] = {RTLoc,
DeviceID,
PointerNum,
- BasePointersArrayArg,
- PointersArrayArg,
- SizesArrayArg,
- MapTypesArrayArg,
- MapNamesArrayArg,
- MappersArrayArg};
+ RTArgs.BasePointersArray,
+ RTArgs.PointersArray,
+ RTArgs.SizesArray,
+ RTArgs.MapTypesArray,
+ RTArgs.MapNamesArray,
+ RTArgs.MappersArray};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
@@ -11099,16 +10714,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
PrePostActionTy &) {
assert(Info.isValid() && "Invalid data environment closing arguments.");
- llvm::Value *BasePointersArrayArg = nullptr;
- llvm::Value *PointersArrayArg = nullptr;
- llvm::Value *SizesArrayArg = nullptr;
- llvm::Value *MapTypesArrayArg = nullptr;
- llvm::Value *MapNamesArrayArg = nullptr;
- llvm::Value *MappersArrayArg = nullptr;
- emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg,
- MapNamesArrayArg, MappersArrayArg, Info,
- {/*ForEndCall=*/true});
+ llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
+ bool EmitDebug =
+ CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
+ OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
+ EmitDebug,
+ /*ForEndCall=*/true);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -11128,12 +10739,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
llvm::Value *OffloadingArgs[] = {RTLoc,
DeviceID,
PointerNum,
- BasePointersArrayArg,
- PointersArrayArg,
- SizesArrayArg,
- MapTypesArrayArg,
- MapNamesArrayArg,
- MappersArrayArg};
+ RTArgs.BasePointersArray,
+ RTArgs.PointersArray,
+ RTArgs.SizesArray,
+ RTArgs.MapTypesArray,
+ RTArgs.MapNamesArray,
+ RTArgs.MappersArray};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
@@ -11322,27 +10933,28 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
MappableExprsHandler MEHandler(D, CGF);
MEHandler.generateAllInfo(CombinedInfo);
- TargetDataInfo Info;
+ CGOpenMPRuntime::TargetDataInfo Info;
// Fill up the arrays and create the arguments.
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
/*IsNonContiguous=*/true);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
- emitOffloadingArraysArgument(
- CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
- {/*ForEndCall=*/false});
+ bool EmitDebug =
+ CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
+ OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+ EmitDebug,
+ /*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
- InputInfo.BasePointersArray =
- Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- InputInfo.PointersArray =
- Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+ CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
+ CGM.getPointerAlign());
InputInfo.SizesArray =
- Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+ Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
InputInfo.MappersArray =
- Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- MapTypesArray = Info.MapTypesArray;
- MapNamesArray = Info.MapNamesArray;
+ Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ MapTypesArray = Info.RTArgs.MapTypesArray;
+ MapNamesArray = Info.RTArgs.MapNamesArray;
if (RequiresOuterTask)
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
else
@@ -11937,7 +11549,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
if (CGM.getTarget().hasFeature("sve"))
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
MangledName, 's', 128, Fn, ExprLoc);
- if (CGM.getTarget().hasFeature("neon"))
+ else if (CGM.getTarget().hasFeature("neon"))
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
MangledName, 'n', 128, Fn, ExprLoc);
}
@@ -12039,7 +11651,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_doacross_fini);
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
- llvm::makeArrayRef(FiniArgs));
+ llvm::ArrayRef(FiniArgs));
}
void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
@@ -12130,7 +11742,7 @@ static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
/// Return the alignment from an allocate directive if present.
static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
- llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
+ std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
if (!AllocateAlignment)
return nullptr;
@@ -12570,15 +12182,15 @@ void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
// Last updated loop counter for the lastprivate conditional var.
// int<xx> last_iv = 0;
llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
- llvm::Constant *LastIV =
- getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
+ llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
+ LLIVTy, getName({UniqueDeclName, "iv"}));
cast<llvm::GlobalVariable>(LastIV)->setAlignment(
IVLVal.getAlignment().getAsAlign());
LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
// Last value of the lastprivate conditional.
// decltype(priv_a) last_a;
- llvm::GlobalVariable *Last = getOrCreateInternalVariable(
+ llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
Last->setAlignment(LVal.getAlignment().getAsAlign());
LValue LastLVal = CGF.MakeAddrLValue(
@@ -13037,7 +12649,8 @@ void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
void CGOpenMPSIMDRuntime::emitTargetDataCalls(
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
- const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
+ const Expr *Device, const RegionCodeGenTy &CodeGen,
+ CGOpenMPRuntime::TargetDataInfo &Info) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index b95aef68335e..e7c1a098c768 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -121,6 +121,7 @@ struct OMPTaskDataTy final {
bool Nogroup = false;
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
+ bool HasNowaitClause = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
@@ -306,21 +307,10 @@ public:
protected:
CodeGenModule &CGM;
- StringRef FirstSeparator, Separator;
/// An OpenMP-IR-Builder instance.
llvm::OpenMPIRBuilder OMPBuilder;
- /// Constructor allowing to redefine the name separator for the variables.
- explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
- StringRef Separator);
-
- /// Creates offloading entry for the provided entry ID \a ID,
- /// address \a Addr, size \a Size, and flags \a Flags.
- virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
- uint64_t Size, int32_t Flags,
- llvm::GlobalValue::LinkageTypes Linkage);
-
/// Helper to emit outlined function for 'target' directive.
/// \param D Directive to emit.
/// \param ParentName Name of the function that encloses the target region.
@@ -339,9 +329,10 @@ protected:
/// Emits object of ident_t type with info for source location.
/// \param Flags Flags for OpenMP location.
+ /// \param EmitLoc emit source location with debug-info is off.
///
llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
- unsigned Flags = 0);
+ unsigned Flags = 0, bool EmitLoc = false);
/// Emit the number of teams for a target directive. Inspect the num_teams
/// clause associated with a teams construct combined or closely nested
@@ -387,7 +378,7 @@ protected:
/// Emits \p Callee function call with arguments \p Args with location \p Loc.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::FunctionCallee Callee,
- ArrayRef<llvm::Value *> Args = llvm::None) const;
+ ArrayRef<llvm::Value *> Args = std::nullopt) const;
/// Emits address of the word in a memory where current thread id is
/// stored.
@@ -419,8 +410,7 @@ protected:
///
llvm::Value *getCriticalRegionLock(StringRef CriticalName);
-private:
-
+protected:
/// Map for SourceLocation and OpenMP runtime library debug locations.
typedef llvm::DenseMap<SourceLocation, llvm::Value *> OpenMPDebugLocMapTy;
OpenMPDebugLocMapTy OpenMPDebugLocMap;
@@ -520,214 +510,7 @@ private:
QualType KmpDimTy;
/// Entity that registers the offloading constants that were emitted so
/// far.
- class OffloadEntriesInfoManagerTy {
- CodeGenModule &CGM;
-
- /// Number of entries registered so far.
- unsigned OffloadingEntriesNum = 0;
-
- public:
- /// Base class of the entries info.
- class OffloadEntryInfo {
- public:
- /// Kind of a given entry.
- enum OffloadingEntryInfoKinds : unsigned {
- /// Entry is a target region.
- OffloadingEntryInfoTargetRegion = 0,
- /// Entry is a declare target variable.
- OffloadingEntryInfoDeviceGlobalVar = 1,
- /// Invalid entry info.
- OffloadingEntryInfoInvalid = ~0u
- };
-
- protected:
- OffloadEntryInfo() = delete;
- explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
- explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
- uint32_t Flags)
- : Flags(Flags), Order(Order), Kind(Kind) {}
- ~OffloadEntryInfo() = default;
-
- public:
- bool isValid() const { return Order != ~0u; }
- unsigned getOrder() const { return Order; }
- OffloadingEntryInfoKinds getKind() const { return Kind; }
- uint32_t getFlags() const { return Flags; }
- void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
- llvm::Constant *getAddress() const {
- return cast_or_null<llvm::Constant>(Addr);
- }
- void setAddress(llvm::Constant *V) {
- assert(!Addr.pointsToAliveValue() && "Address has been set before!");
- Addr = V;
- }
- static bool classof(const OffloadEntryInfo *Info) { return true; }
-
- private:
- /// Address of the entity that has to be mapped for offloading.
- llvm::WeakTrackingVH Addr;
-
- /// Flags associated with the device global.
- uint32_t Flags = 0u;
-
- /// Order this entry was emitted.
- unsigned Order = ~0u;
-
- OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
- };
-
- /// Return true if a there are no entries defined.
- bool empty() const;
- /// Return number of entries defined so far.
- unsigned size() const { return OffloadingEntriesNum; }
- OffloadEntriesInfoManagerTy(CodeGenModule &CGM) : CGM(CGM) {}
-
- //
- // Target region entries related.
- //
-
- /// Kind of the target registry entry.
- enum OMPTargetRegionEntryKind : uint32_t {
- /// Mark the entry as target region.
- OMPTargetRegionEntryTargetRegion = 0x0,
- /// Mark the entry as a global constructor.
- OMPTargetRegionEntryCtor = 0x02,
- /// Mark the entry as a global destructor.
- OMPTargetRegionEntryDtor = 0x04,
- };
-
- /// Target region entries info.
- class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
- /// Address that can be used as the ID of the entry.
- llvm::Constant *ID = nullptr;
-
- public:
- OffloadEntryInfoTargetRegion()
- : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
- explicit OffloadEntryInfoTargetRegion(unsigned Order,
- llvm::Constant *Addr,
- llvm::Constant *ID,
- OMPTargetRegionEntryKind Flags)
- : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
- ID(ID) {
- setAddress(Addr);
- }
-
- llvm::Constant *getID() const { return ID; }
- void setID(llvm::Constant *V) {
- assert(!ID && "ID has been set before!");
- ID = V;
- }
- static bool classof(const OffloadEntryInfo *Info) {
- return Info->getKind() == OffloadingEntryInfoTargetRegion;
- }
- };
-
- /// Initialize target region entry.
- void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
- StringRef ParentName, unsigned LineNum,
- unsigned Order);
- /// Register target region entry.
- void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
- StringRef ParentName, unsigned LineNum,
- llvm::Constant *Addr, llvm::Constant *ID,
- OMPTargetRegionEntryKind Flags);
- /// Return true if a target region entry with the provided information
- /// exists.
- bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
- StringRef ParentName, unsigned LineNum,
- bool IgnoreAddressId = false) const;
- /// brief Applies action \a Action on all registered entries.
- typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned,
- const OffloadEntryInfoTargetRegion &)>
- OffloadTargetRegionEntryInfoActTy;
- void actOnTargetRegionEntriesInfo(
- const OffloadTargetRegionEntryInfoActTy &Action);
-
- //
- // Device global variable entries related.
- //
-
- /// Kind of the global variable entry..
- enum OMPTargetGlobalVarEntryKind : uint32_t {
- /// Mark the entry as a to declare target.
- OMPTargetGlobalVarEntryTo = 0x0,
- /// Mark the entry as a to declare target link.
- OMPTargetGlobalVarEntryLink = 0x1,
- };
-
- /// Device global variable entries info.
- class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
- /// Type of the global variable.
- CharUnits VarSize;
- llvm::GlobalValue::LinkageTypes Linkage;
-
- public:
- OffloadEntryInfoDeviceGlobalVar()
- : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
- explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
- OMPTargetGlobalVarEntryKind Flags)
- : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
- explicit OffloadEntryInfoDeviceGlobalVar(
- unsigned Order, llvm::Constant *Addr, CharUnits VarSize,
- OMPTargetGlobalVarEntryKind Flags,
- llvm::GlobalValue::LinkageTypes Linkage)
- : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
- VarSize(VarSize), Linkage(Linkage) {
- setAddress(Addr);
- }
-
- CharUnits getVarSize() const { return VarSize; }
- void setVarSize(CharUnits Size) { VarSize = Size; }
- llvm::GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
- void setLinkage(llvm::GlobalValue::LinkageTypes LT) { Linkage = LT; }
- static bool classof(const OffloadEntryInfo *Info) {
- return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
- }
- };
-
- /// Initialize device global variable entry.
- void initializeDeviceGlobalVarEntryInfo(StringRef Name,
- OMPTargetGlobalVarEntryKind Flags,
- unsigned Order);
-
- /// Register device global variable entry.
- void
- registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
- CharUnits VarSize,
- OMPTargetGlobalVarEntryKind Flags,
- llvm::GlobalValue::LinkageTypes Linkage);
- /// Checks if the variable with the given name has been registered already.
- bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
- return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
- }
- /// Applies action \a Action on all registered entries.
- typedef llvm::function_ref<void(StringRef,
- const OffloadEntryInfoDeviceGlobalVar &)>
- OffloadDeviceGlobalVarEntryInfoActTy;
- void actOnDeviceGlobalVarEntriesInfo(
- const OffloadDeviceGlobalVarEntryInfoActTy &Action);
-
- private:
- // Storage for target region entries kind. The storage is to be indexed by
- // file ID, device ID, parent function name and line number.
- typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion>
- OffloadEntriesTargetRegionPerLine;
- typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine>
- OffloadEntriesTargetRegionPerParentName;
- typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerParentName>
- OffloadEntriesTargetRegionPerFile;
- typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerFile>
- OffloadEntriesTargetRegionPerDevice;
- typedef OffloadEntriesTargetRegionPerDevice OffloadEntriesTargetRegionTy;
- OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
- /// Storage for device global variable entries kind. The storage is to be
- /// indexed by mangled name.
- typedef llvm::StringMap<OffloadEntryInfoDeviceGlobalVar>
- OffloadEntriesDeviceGlobalVarTy;
- OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
- };
- OffloadEntriesInfoManagerTy OffloadEntriesInfoManager;
+ llvm::OffloadEntriesInfoManager OffloadEntriesInfoManager;
bool ShouldMarkAsGlobal = true;
/// List of the emitted declarations.
@@ -773,7 +556,7 @@ private:
/// metadata.
void loadOffloadInfoMetadata();
- /// Start scanning from statement \a S and and emit all target regions
+ /// Start scanning from statement \a S and emit all target regions
/// found along the way.
/// \param S Starting statement.
/// \param ParentName Name of the function declaration that is being scanned.
@@ -814,16 +597,6 @@ private:
/// \return Cache variable for the specified threadprivate.
llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
- /// Gets (if variable with the given name already exist) or creates
- /// internal global variable with the specified Name. The created variable has
- /// linkage CommonLinkage by default and is initialized by null value.
- /// \param Ty Type of the global variable. If it is exist already the type
- /// must be the same.
- /// \param Name Name of the variable.
- llvm::GlobalVariable *getOrCreateInternalVariable(llvm::Type *Ty,
- const llvm::Twine &Name,
- unsigned AddressSpace = 0);
-
/// Set of threadprivate variables with the generated initializer.
llvm::StringSet<> ThreadPrivateWithDefinition;
@@ -915,11 +688,13 @@ private:
Address DependenciesArray);
public:
- explicit CGOpenMPRuntime(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM, ".", ".") {}
+ explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
virtual void clear();
+ /// Returns true if the current target is a GPU.
+ virtual bool isTargetCodegen() const { return false; }
+
/// Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
@@ -1048,6 +823,11 @@ public:
/// Emits code for a taskyield directive.
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc);
+ /// Emit __kmpc_error call for error directive
+ /// extern void __kmpc_error(ident_t *loc, int severity, const char *message);
+ virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME,
+ bool IsFatal);
+
/// Emit a taskgroup region.
/// \param TaskgroupOpGen Generator for the statement associated with the
/// given taskgroup region.
@@ -1654,65 +1434,16 @@ public:
/// Struct that keeps all the relevant information that should be kept
/// throughout a 'target data' region.
- class TargetDataInfo {
- /// Set to true if device pointer information have to be obtained.
- bool RequiresDevicePointerInfo = false;
- /// Set to true if Clang emits separate runtime calls for the beginning and
- /// end of the region. These calls might have separate map type arrays.
- bool SeparateBeginEndCalls = false;
-
+ class TargetDataInfo : public llvm::OpenMPIRBuilder::TargetDataInfo {
public:
- /// The array of base pointer passed to the runtime library.
- llvm::Value *BasePointersArray = nullptr;
- /// The array of section pointers passed to the runtime library.
- llvm::Value *PointersArray = nullptr;
- /// The array of sizes passed to the runtime library.
- llvm::Value *SizesArray = nullptr;
- /// The array of map types passed to the runtime library for the beginning
- /// of the region or for the entire region if there are no separate map
- /// types for the region end.
- llvm::Value *MapTypesArray = nullptr;
- /// The array of map types passed to the runtime library for the end of the
- /// region, or nullptr if there are no separate map types for the region
- /// end.
- llvm::Value *MapTypesArrayEnd = nullptr;
- /// The array of user-defined mappers passed to the runtime library.
- llvm::Value *MappersArray = nullptr;
- /// The array of original declaration names of mapped pointers sent to the
- /// runtime library for debugging
- llvm::Value *MapNamesArray = nullptr;
- /// Indicate whether any user-defined mapper exists.
- bool HasMapper = false;
- /// The total number of pointers passed to the runtime library.
- unsigned NumberOfPtrs = 0u;
+ explicit TargetDataInfo() : llvm::OpenMPIRBuilder::TargetDataInfo() {}
+ explicit TargetDataInfo(bool RequiresDevicePointerInfo,
+ bool SeparateBeginEndCalls)
+ : llvm::OpenMPIRBuilder::TargetDataInfo(RequiresDevicePointerInfo,
+ SeparateBeginEndCalls) {}
/// Map between the a declaration of a capture and the corresponding base
/// pointer address where the runtime returns the device pointers.
llvm::DenseMap<const ValueDecl *, Address> CaptureDeviceAddrMap;
-
- explicit TargetDataInfo() {}
- explicit TargetDataInfo(bool RequiresDevicePointerInfo,
- bool SeparateBeginEndCalls)
- : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
- SeparateBeginEndCalls(SeparateBeginEndCalls) {}
- /// Clear information about the data arrays.
- void clearArrayInfo() {
- BasePointersArray = nullptr;
- PointersArray = nullptr;
- SizesArray = nullptr;
- MapTypesArray = nullptr;
- MapTypesArrayEnd = nullptr;
- MapNamesArray = nullptr;
- MappersArray = nullptr;
- HasMapper = false;
- NumberOfPtrs = 0u;
- }
- /// Return true if the current target data information has valid arrays.
- bool isValid() {
- return BasePointersArray && PointersArray && SizesArray &&
- MapTypesArray && (!HasMapper || MappersArray) && NumberOfPtrs;
- }
- bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
- bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
};
/// Emit the target data mapping code associated with \a D.
@@ -1727,7 +1458,7 @@ public:
const OMPExecutableDirective &D,
const Expr *IfCond, const Expr *Device,
const RegionCodeGenTy &CodeGen,
- TargetDataInfo &Info);
+ CGOpenMPRuntime::TargetDataInfo &Info);
/// Emit the data mapping/movement code associated with the directive
/// \a D that should be of the form 'target [{enter|exit} data | update]'.
@@ -1792,7 +1523,7 @@ public:
virtual void
emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::FunctionCallee OutlinedFn,
- ArrayRef<llvm::Value *> Args = llvm::None) const;
+ ArrayRef<llvm::Value *> Args = std::nullopt) const;
/// Emits OpenMP-specific function prolog.
/// Required for device constructs.
@@ -2487,7 +2218,7 @@ public:
void emitTargetDataCalls(CodeGenFunction &CGF,
const OMPExecutableDirective &D, const Expr *IfCond,
const Expr *Device, const RegionCodeGenTy &CodeGen,
- TargetDataInfo &Info) override;
+ CGOpenMPRuntime::TargetDataInfo &Info) override;
/// Emit the data mapping/movement code associated with the directive
/// \a D that should be of the form 'target [{enter|exit} data | update]'.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 6dea846f486f..e8c5f04db49f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -15,6 +15,7 @@
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
@@ -73,30 +74,15 @@ private:
CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode =
CGOpenMPRuntimeGPU::EM_Unknown;
CGOpenMPRuntimeGPU::ExecutionMode &ExecMode;
- bool SavedRuntimeMode = false;
- bool *RuntimeMode = nullptr;
public:
- /// Constructor for Non-SPMD mode.
- ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode)
- : ExecMode(ExecMode) {
- SavedExecMode = ExecMode;
- ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD;
- }
- /// Constructor for SPMD mode.
ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode,
- bool &RuntimeMode, bool FullRuntimeMode)
- : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+ CGOpenMPRuntimeGPU::ExecutionMode EntryMode)
+ : ExecMode(ExecMode) {
SavedExecMode = ExecMode;
- SavedRuntimeMode = RuntimeMode;
- ExecMode = CGOpenMPRuntimeGPU::EM_SPMD;
- RuntimeMode = FullRuntimeMode;
- }
- ~ExecutionRuntimeModesRAII() {
- ExecMode = SavedExecMode;
- if (RuntimeMode)
- *RuntimeMode = SavedRuntimeMode;
+ ExecMode = EntryMode;
}
+ ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
};
/// GPU Configuration: This information can be derived from cuda registers,
@@ -109,9 +95,6 @@ enum MachineConfiguration : unsigned {
/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
-
- /// Maximal size of the shared memory buffer.
- SharedMemorySize = 128,
};
static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
@@ -444,9 +427,8 @@ public:
markAsEscaped(VD);
if (isa<OMPCapturedExprDecl>(VD))
VisitValueDecl(VD);
- else if (const auto *VarD = dyn_cast<VarDecl>(VD))
- if (VarD->isInitCapture())
- VisitValueDecl(VD);
+ else if (VD->isInitCapture())
+ VisitValueDecl(VD);
}
void VisitUnaryOperator(const UnaryOperator *E) {
if (!E)
@@ -746,274 +728,13 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
"Unknown programming model for OpenMP directive on NVPTX target.");
}
-/// Check if the directive is loops based and has schedule clause at all or has
-/// static scheduling.
-static bool hasStaticScheduling(const OMPExecutableDirective &D) {
- assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
- isOpenMPLoopDirective(D.getDirectiveKind()) &&
- "Expected loop-based directive.");
- return !D.hasClausesOfKind<OMPOrderedClause>() &&
- (!D.hasClausesOfKind<OMPScheduleClause>() ||
- llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
- [](const OMPScheduleClause *C) {
- return C->getScheduleKind() == OMPC_SCHEDULE_static;
- }));
-}
-
-/// Check for inner (nested) lightweight runtime construct, if any
-static bool hasNestedLightweightDirective(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
- assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
- const auto *CS = D.getInnermostCapturedStmt();
- const auto *Body =
- CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
- const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-
- if (const auto *NestedDir =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
- switch (D.getDirectiveKind()) {
- case OMPD_target:
- if (isOpenMPParallelDirective(DKind) &&
- isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
- hasStaticScheduling(*NestedDir))
- return true;
- if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
- return true;
- if (DKind == OMPD_parallel) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- }
- } else if (DKind == OMPD_teams) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPParallelDirective(DKind) &&
- isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- if (DKind == OMPD_parallel) {
- Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- }
- }
- }
- }
- return false;
- case OMPD_target_teams:
- if (isOpenMPParallelDirective(DKind) &&
- isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
- hasStaticScheduling(*NestedDir))
- return true;
- if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
- return true;
- if (DKind == OMPD_parallel) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- }
- }
- return false;
- case OMPD_target_parallel:
- if (DKind == OMPD_simd)
- return true;
- return isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
- case OMPD_target_teams_distribute:
- case OMPD_target_simd:
- case OMPD_target_parallel_for:
- case OMPD_target_parallel_for_simd:
- case OMPD_target_teams_distribute_simd:
- case OMPD_target_teams_distribute_parallel_for:
- case OMPD_target_teams_distribute_parallel_for_simd:
- case OMPD_parallel:
- case OMPD_for:
- case OMPD_parallel_for:
- case OMPD_parallel_master:
- case OMPD_parallel_sections:
- case OMPD_for_simd:
- case OMPD_parallel_for_simd:
- case OMPD_cancel:
- case OMPD_cancellation_point:
- case OMPD_ordered:
- case OMPD_threadprivate:
- case OMPD_allocate:
- case OMPD_task:
- case OMPD_simd:
- case OMPD_sections:
- case OMPD_section:
- case OMPD_single:
- case OMPD_master:
- case OMPD_critical:
- case OMPD_taskyield:
- case OMPD_barrier:
- case OMPD_taskwait:
- case OMPD_taskgroup:
- case OMPD_atomic:
- case OMPD_flush:
- case OMPD_depobj:
- case OMPD_scan:
- case OMPD_teams:
- case OMPD_target_data:
- case OMPD_target_exit_data:
- case OMPD_target_enter_data:
- case OMPD_distribute:
- case OMPD_distribute_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_distribute_parallel_for_simd:
- case OMPD_teams_distribute:
- case OMPD_teams_distribute_simd:
- case OMPD_teams_distribute_parallel_for:
- case OMPD_teams_distribute_parallel_for_simd:
- case OMPD_target_update:
- case OMPD_declare_simd:
- case OMPD_declare_variant:
- case OMPD_begin_declare_variant:
- case OMPD_end_declare_variant:
- case OMPD_declare_target:
- case OMPD_end_declare_target:
- case OMPD_declare_reduction:
- case OMPD_declare_mapper:
- case OMPD_taskloop:
- case OMPD_taskloop_simd:
- case OMPD_master_taskloop:
- case OMPD_master_taskloop_simd:
- case OMPD_parallel_master_taskloop:
- case OMPD_parallel_master_taskloop_simd:
- case OMPD_requires:
- case OMPD_unknown:
- default:
- llvm_unreachable("Unexpected directive.");
- }
- }
-
- return false;
-}
-
-/// Checks if the construct supports lightweight runtime. It must be SPMD
-/// construct + inner loop-based construct with static scheduling.
-static bool supportsLightweightRuntime(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
- if (!supportsSPMDExecutionMode(Ctx, D))
- return false;
- OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
- switch (DirectiveKind) {
- case OMPD_target:
- case OMPD_target_teams:
- case OMPD_target_parallel:
- return hasNestedLightweightDirective(Ctx, D);
- case OMPD_target_parallel_for:
- case OMPD_target_parallel_for_simd:
- case OMPD_target_teams_distribute_parallel_for:
- case OMPD_target_teams_distribute_parallel_for_simd:
- // (Last|First)-privates must be shared in parallel region.
- return hasStaticScheduling(D);
- case OMPD_target_simd:
- case OMPD_target_teams_distribute_simd:
- return true;
- case OMPD_target_teams_distribute:
- return false;
- case OMPD_parallel:
- case OMPD_for:
- case OMPD_parallel_for:
- case OMPD_parallel_master:
- case OMPD_parallel_sections:
- case OMPD_for_simd:
- case OMPD_parallel_for_simd:
- case OMPD_cancel:
- case OMPD_cancellation_point:
- case OMPD_ordered:
- case OMPD_threadprivate:
- case OMPD_allocate:
- case OMPD_task:
- case OMPD_simd:
- case OMPD_sections:
- case OMPD_section:
- case OMPD_single:
- case OMPD_master:
- case OMPD_critical:
- case OMPD_taskyield:
- case OMPD_barrier:
- case OMPD_taskwait:
- case OMPD_taskgroup:
- case OMPD_atomic:
- case OMPD_flush:
- case OMPD_depobj:
- case OMPD_scan:
- case OMPD_teams:
- case OMPD_target_data:
- case OMPD_target_exit_data:
- case OMPD_target_enter_data:
- case OMPD_distribute:
- case OMPD_distribute_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_distribute_parallel_for_simd:
- case OMPD_teams_distribute:
- case OMPD_teams_distribute_simd:
- case OMPD_teams_distribute_parallel_for:
- case OMPD_teams_distribute_parallel_for_simd:
- case OMPD_target_update:
- case OMPD_declare_simd:
- case OMPD_declare_variant:
- case OMPD_begin_declare_variant:
- case OMPD_end_declare_variant:
- case OMPD_declare_target:
- case OMPD_end_declare_target:
- case OMPD_declare_reduction:
- case OMPD_declare_mapper:
- case OMPD_taskloop:
- case OMPD_taskloop_simd:
- case OMPD_master_taskloop:
- case OMPD_master_taskloop_simd:
- case OMPD_parallel_master_taskloop:
- case OMPD_parallel_master_taskloop_simd:
- case OMPD_requires:
- case OMPD_unknown:
- default:
- break;
- }
- llvm_unreachable(
- "Unknown programming model for OpenMP directive on NVPTX target.");
-}
-
void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD);
EntryFunctionState EST;
WrapperFunctionsMap.clear();
@@ -1048,8 +769,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,
EntryFunctionState &EST, bool IsSPMD) {
CGBuilderTy &Bld = CGF.Builder;
- Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime()));
- IsInTargetMasterThreadRegion = IsSPMD;
+ Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD));
if (!IsSPMD)
emitGenericVarsProlog(CGF, EST.Loc);
}
@@ -1061,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
emitGenericVarsEpilog(CGF);
CGBuilderTy &Bld = CGF.Builder;
- OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime());
+ OMPBuilder.createTargetDeinit(Bld, IsSPMD);
}
void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@@ -1070,10 +790,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionRuntimeModesRAII ModeRAII(
- CurrentExecutionMode, RequiresFullRuntime,
- CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
- !supportsLightweightRuntime(CGM.getContext(), D));
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -1116,36 +833,10 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
: OMP_TGT_EXEC_MODE_GENERIC),
Twine(Name, "_exec_mode"));
+ GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);
CGM.addCompilerUsedGlobal(GVMode);
}
-void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
- llvm::Constant *Addr,
- uint64_t Size, int32_t,
- llvm::GlobalValue::LinkageTypes) {
- // TODO: Add support for global variables on the device after declare target
- // support.
- llvm::Function *Fn = dyn_cast<llvm::Function>(Addr);
- if (!Fn)
- return;
-
- llvm::Module &M = CGM.getModule();
- llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-
- // Get "nvvm.annotations" metadata node.
- llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
-
- llvm::Metadata *MDVals[] = {
- llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"),
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
- // Append metadata to nvvm.annotations.
- MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-
- // Add a function attribute for the kernel.
- Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel"));
-}
-
void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -1166,39 +857,14 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
-namespace {
-LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
-/// Enum for accesseing the reserved_2 field of the ident_t struct.
-enum ModeFlagsTy : unsigned {
- /// Bit set to 1 when in SPMD mode.
- KMP_IDENT_SPMD_MODE = 0x01,
- /// Bit set to 1 when a simplified runtime is used.
- KMP_IDENT_SIMPLE_RT_MODE = 0x02,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
-};
-
-/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
-static const ModeFlagsTy UndefinedMode =
- (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
-} // anonymous namespace
-
-unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
- switch (getExecutionMode()) {
- case EM_SPMD:
- if (requiresFullRuntime())
- return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
- return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
- case EM_NonSPMD:
- assert(requiresFullRuntime() && "Expected full runtime.");
- return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
- case EM_Unknown:
- return UndefinedMode;
- }
- llvm_unreachable("Unknown flags are requested.");
-}
-
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM, "_", "$") {
+ : CGOpenMPRuntime(CGM) {
+ llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true,
+ hasRequiresUnifiedSharedMemory(),
+ CGM.getLangOpts().OpenMPOffloadMandatory);
+ OMPBuilder.setConfig(Config);
+ OffloadEntriesInfoManager.setConfig(Config);
+
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP can only handle device code.");
@@ -1214,6 +880,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
"__omp_rtl_assume_threads_oversubscription");
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,
"__omp_rtl_assume_no_thread_state");
+ OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism,
+ "__omp_rtl_assume_no_nested_parallelism");
}
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
@@ -1241,33 +909,13 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
// Emit target region as a standalone region.
- class NVPTXPrePostActionTy : public PrePostActionTy {
- bool &IsInParallelRegion;
- bool PrevIsInParallelRegion;
-
- public:
- NVPTXPrePostActionTy(bool &IsInParallelRegion)
- : IsInParallelRegion(IsInParallelRegion) {}
- void Enter(CodeGenFunction &CGF) override {
- PrevIsInParallelRegion = IsInParallelRegion;
- IsInParallelRegion = true;
- }
- void Exit(CodeGenFunction &CGF) override {
- IsInParallelRegion = PrevIsInParallelRegion;
- }
- } Action(IsInParallelRegion);
- CodeGen.setAction(Action);
bool PrevIsInTTDRegion = IsInTTDRegion;
IsInTTDRegion = false;
- bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
- IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
- IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
- if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
- !IsInParallelRegion) {
+ if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) {
llvm::Function *WrapperFun =
createParallelDataSharingWrapper(OutlinedFun, D);
WrapperFunctionsMap[OutlinedFun] = WrapperFun;
@@ -1330,7 +978,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
if (!LastPrivatesReductions.empty()) {
GlobalizedRD = ::buildRecordForGlobalizedVars(
- CGM.getContext(), llvm::None, LastPrivatesReductions,
+ CGM.getContext(), std::nullopt, LastPrivatesReductions,
MappedDeclsFields, WarpSize);
}
} else if (!LastPrivatesReductions.empty()) {
@@ -3307,7 +2955,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
++Cnt;
}
const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
- CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,
+ CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap,
C.getLangOpts().OpenMPCUDAReductionBufNum);
TeamsReductions.push_back(TeamReductionRec);
if (!KernelTeamsReductionPtr) {
@@ -3379,7 +3027,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
llvm::Value *EndArgs[] = {ThreadId};
RegionCodeGenTy RCG(CodeGen);
NVPTXActionTy Action(
- nullptr, llvm::None,
+ nullptr, std::nullopt,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),
EndArgs);
@@ -3435,7 +3083,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
const Type *NonQualTy = QC.strip(NativeParamType);
QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
unsigned NativePointeeAddrSpace =
- CGF.getContext().getTargetAddressSpace(NativePointeeTy);
+ CGF.getTypes().getTargetAddressSpace(NativePointeeTy);
QualType TargetTy = TargetParam->getType();
llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
@@ -3659,16 +3307,6 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
Data.insert(std::make_pair(VD, MappedVarData()));
}
- if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
- CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
- VarChecker.Visit(Body);
- I->getSecond().SecondaryLocalVarData.emplace();
- DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData;
- for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
- assert(VD->isCanonicalDecl() && "Expected canonical declaration");
- Data.insert(std::make_pair(VD, MappedVarData()));
- }
- }
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
struct GlobalizationScope final : EHScopeStack::Cleanup {
@@ -3810,7 +3448,7 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(
else
VDLVal = CGF.MakeAddrLValue(
VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
- llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
FieldDecl *ThisCapture = nullptr;
RD->getCaptureFields(Captures, ThisCapture);
if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
@@ -3822,13 +3460,15 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(
for (const LambdaCapture &LC : RD->captures()) {
if (LC.getCaptureKind() != LCK_ByRef)
continue;
- const VarDecl *VD = LC.getCapturedVar();
- if (!CS->capturesVariable(VD))
+ const ValueDecl *VD = LC.getCapturedVar();
+ // FIXME: For now VD is always a VarDecl because OpenMP does not support
+ // capturing structured bindings in lambdas yet.
+ if (!CS->capturesVariable(cast<VarDecl>(VD)))
continue;
auto It = Captures.find(VD);
assert(It != Captures.end() && "Found lambda capture without field.");
LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
- Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD));
if (VD->getType().getCanonicalType()->isReferenceType())
VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
VD->getType().getCanonicalType())
@@ -3913,6 +3553,9 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::SM_75:
case CudaArch::SM_80:
case CudaArch::SM_86:
+ case CudaArch::SM_87:
+ case CudaArch::SM_89:
+ case CudaArch::SM_90:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
@@ -4006,10 +3649,10 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
llvm::Function *F = M->getFunction(LocSize);
if (!F) {
F = llvm::Function::Create(
- llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
+ llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false),
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
}
- return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
+ return Bld.CreateCall(F, std::nullopt, "nvptx_num_threads");
}
llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index ff585efa3fce..75d140205773 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -42,8 +42,6 @@ private:
ExecutionMode getExecutionMode() const;
- bool requiresFullRuntime() const { return RequiresFullRuntime; }
-
/// Get barrier to synchronize all threads in a block.
void syncCTAThreads(CodeGenFunction &CGF);
@@ -66,12 +64,6 @@ private:
// Base class overrides.
//
- /// Creates offloading entry for the provided entry ID \a ID,
- /// address \a Addr, size \a Size, and flags \a Flags.
- void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
- uint64_t Size, int32_t Flags,
- llvm::GlobalValue::LinkageTypes Linkage) override;
-
/// Emit outlined function specialized for the Fork-Join
/// programming model for applicable target directives on the NVPTX device.
/// \param D Directive to emit.
@@ -161,16 +153,12 @@ protected:
/// Constant for NVPTX for better optimization.
bool isDefaultLocationConstant() const override { return true; }
- /// Returns additional flags that can be stored in reserved_2 field of the
- /// default location.
- /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
- /// Full/Lightweight runtime mode. Used for better optimization.
- unsigned getDefaultLocationReserved2Flags() const override;
-
public:
explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
void clear() override;
+ bool isTargetCodegen() const override { return true; };
+
/// Declare generalized virtual functions which need to be defined
/// by all specializations of OpenMPGPURuntime Targets like AMDGCN
/// and NVPTX.
@@ -330,7 +318,7 @@ public:
/// translating these arguments to correct target-specific arguments.
void emitOutlinedFunctionCall(
CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
- ArrayRef<llvm::Value *> Args = llvm::None) const override;
+ ArrayRef<llvm::Value *> Args = std::nullopt) const override;
/// Emits OpenMP-specific function prolog.
/// Required for device constructs.
@@ -386,17 +374,9 @@ private:
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;
- /// Check if the full runtime is required (default - yes).
- bool RequiresFullRuntime = true;
-
- /// true if we're emitting the code for the target region and next parallel
- /// region is L0 for sure.
- bool IsInTargetMasterThreadRegion = false;
/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
- /// true if we're definitely in the parallel region.
- bool IsInParallelRegion = false;
/// Map between an outlined function and its wrapper.
llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
@@ -421,12 +401,10 @@ private:
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
DeclToAddrMapTy LocalVarData;
- llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
EscapedParamsTy EscapedParameters;
llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
EscapedVariableLengthDeclsAddrs;
- llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
};
/// Maps the function to the list of the globalized variables with their
@@ -438,9 +416,6 @@ private:
/// reductions.
/// All the records are gathered into a union `union.type` is created.
llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
- /// Shared pointer for the global memory in the global memory buffer used for
- /// the given kernel.
- llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
/// Pair of the Non-SPMD team and all reductions variables in this team
/// region.
std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 6f85bca8a201..596f0bd33204 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -162,7 +162,7 @@ struct CGRecordLowering {
return CharUnits::fromQuantity(DataLayout.getTypeAllocSize(Type));
}
CharUnits getAlignment(llvm::Type *Type) {
- return CharUnits::fromQuantity(DataLayout.getABITypeAlignment(Type));
+ return CharUnits::fromQuantity(DataLayout.getABITypeAlign(Type));
}
bool isZeroInitializable(const FieldDecl *FD) {
return Types.isZeroInitializable(FD->getType());
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 05ab16668743..248ffb544014 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/SaveAndRestore.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -254,6 +255,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPTaskyieldDirectiveClass:
EmitOMPTaskyieldDirective(cast<OMPTaskyieldDirective>(*S));
break;
+ case Stmt::OMPErrorDirectiveClass:
+ EmitOMPErrorDirective(cast<OMPErrorDirective>(*S));
+ break;
case Stmt::OMPBarrierDirectiveClass:
EmitOMPBarrierDirective(cast<OMPBarrierDirective>(*S));
break;
@@ -571,9 +575,9 @@ void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB, bool IsFinished) {
// Place the block after the current block, if possible, or else at
// the end of the function.
if (CurBB && CurBB->getParent())
- CurFn->getBasicBlockList().insertAfter(CurBB->getIterator(), BB);
+ CurFn->insert(std::next(CurBB->getIterator()), BB);
else
- CurFn->getBasicBlockList().push_back(BB);
+ CurFn->insert(CurFn->end(), BB);
Builder.SetInsertPoint(BB);
}
@@ -598,15 +602,14 @@ void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) {
bool inserted = false;
for (llvm::User *u : block->users()) {
if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) {
- CurFn->getBasicBlockList().insertAfter(insn->getParent()->getIterator(),
- block);
+ CurFn->insert(std::next(insn->getParent()->getIterator()), block);
inserted = true;
break;
}
}
if (!inserted)
- CurFn->getBasicBlockList().push_back(block);
+ CurFn->insert(CurFn->end(), block);
Builder.SetInsertPoint(block);
}
@@ -718,11 +721,10 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
break;
}
}
- SaveAndRestore<bool> save_nomerge(InNoMergeAttributedStmt, nomerge);
- SaveAndRestore<bool> save_noinline(InNoInlineAttributedStmt, noinline);
- SaveAndRestore<bool> save_alwaysinline(InAlwaysInlineAttributedStmt,
- alwaysinline);
- SaveAndRestore<const CallExpr *> save_musttail(MustTailCall, musttail);
+ SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge);
+ SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
+ SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
+ SaveAndRestore save_musttail(MustTailCall, musttail);
EmitStmt(S.getSubStmt(), S.getAttrs());
}
@@ -815,11 +817,20 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// Prefer the PGO based weights over the likelihood attribute.
// When the build isn't optimized the metadata isn't used, so don't generate
// it.
+ // Also, differentiate between disabled PGO and a never executed branch with
+ // PGO. Assuming PGO is in use:
+ // - we want to ignore the [[likely]] attribute if the branch is never
+ // executed,
+ // - assuming the profile is poor, preserving the attribute may still be
+ // beneficial.
+ // As an approximation, preserve the attribute only if both the branch and the
+ // parent context were not executed.
Stmt::Likelihood LH = Stmt::LH_None;
- uint64_t Count = getProfileCount(S.getThen());
- if (!Count && CGM.getCodeGenOpts().OptimizationLevel)
+ uint64_t ThenCount = getProfileCount(S.getThen());
+ if (!ThenCount && !getCurrentProfileCount() &&
+ CGM.getCodeGenOpts().OptimizationLevel)
LH = Stmt::getLikelihood(S.getThen(), S.getElse());
- EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, LH);
+ EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, ThenCount, LH);
// Emit the 'then' code.
EmitBlock(ThenBlock);
@@ -1458,7 +1469,7 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S,
llvm::BasicBlock *FalseDest = CaseRangeBlock;
CaseRangeBlock = createBasicBlock("sw.caserange");
- CurFn->getBasicBlockList().push_back(CaseRangeBlock);
+ CurFn->insert(CurFn->end(), CaseRangeBlock);
Builder.SetInsertPoint(CaseRangeBlock);
// Emit range check.
@@ -1509,6 +1520,21 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S,
llvm::ConstantInt *CaseVal =
Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
+
+ // Emit debuginfo for the case value if it is an enum value.
+ const ConstantExpr *CE;
+ if (auto ICE = dyn_cast<ImplicitCastExpr>(S.getLHS()))
+ CE = dyn_cast<ConstantExpr>(ICE->getSubExpr());
+ else
+ CE = dyn_cast<ConstantExpr>(S.getLHS());
+ if (CE) {
+ if (auto DE = dyn_cast<DeclRefExpr>(CE->getSubExpr()))
+ if (CGDebugInfo *Dbg = getDebugInfo())
+ if (CGM.getCodeGenOpts().hasReducedDebugInfo())
+ Dbg->EmitGlobalVariable(DE->getDecl(),
+ APValue(llvm::APSInt(CaseVal->getValue())));
+ }
+
if (SwitchLikelihood)
SwitchLikelihood->push_back(Stmt::getLikelihood(Attrs));
@@ -1843,11 +1869,11 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S,
FoundCase;
}
-static Optional<SmallVector<uint64_t, 16>>
+static std::optional<SmallVector<uint64_t, 16>>
getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) {
// Are there enough branches to weight them?
if (Likelihoods.size() <= 1)
- return None;
+ return std::nullopt;
uint64_t NumUnlikely = 0;
uint64_t NumNone = 0;
@@ -1868,7 +1894,7 @@ getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) {
// Is there a likelihood attribute used?
if (NumUnlikely == 0 && NumLikely == 0)
- return None;
+ return std::nullopt;
// When multiple cases share the same code they can be combined during
// optimization. In that case the weights of the branch will be the sum of
@@ -2050,7 +2076,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
} else if (SwitchLikelihood) {
assert(SwitchLikelihood->size() == 1 + SwitchInsn->getNumCases() &&
"switch likelihoods do not match switch cases");
- Optional<SmallVector<uint64_t, 16>> LHW =
+ std::optional<SmallVector<uint64_t, 16>> LHW =
getLikelihoodWeights(*SwitchLikelihood);
if (LHW) {
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
@@ -2256,9 +2282,9 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
// Attach readnone and readonly attributes.
if (!HasSideEffect) {
if (ReadNone)
- Result.addFnAttr(llvm::Attribute::ReadNone);
+ Result.setDoesNotAccessMemory();
else if (ReadOnly)
- Result.addFnAttr(llvm::Attribute::ReadOnly);
+ Result.setOnlyReadsMemory();
}
// Add elementtype attribute for indirect constraints.
@@ -2343,6 +2369,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
std::vector<llvm::Type *> ArgElemTypes;
std::vector<llvm::Value*> Args;
llvm::BitVector ResultTypeRequiresCast;
+ llvm::BitVector ResultRegIsFlagReg;
// Keep track of inout constraints.
std::string InOutConstraints;
@@ -2400,6 +2427,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
ResultRegQualTys.push_back(QTy);
ResultRegDests.push_back(Dest);
+ bool IsFlagReg = llvm::StringRef(OutputConstraint).startswith("{@cc");
+ ResultRegIsFlagReg.push_back(IsFlagReg);
+
llvm::Type *Ty = ConvertTypeForMem(QTy);
const bool RequiresCast = Info.allowsRegister() &&
(getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
@@ -2448,7 +2478,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back()))
LargestVectorWidth =
std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
+ VT->getPrimitiveSizeInBits().getKnownMinValue());
} else {
Address DestAddr = Dest.getAddress(*this);
// Matrix types in memory are represented by arrays, but accessed through
@@ -2487,7 +2517,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
LargestVectorWidth =
std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
+ VT->getPrimitiveSizeInBits().getKnownMinValue());
// Only tie earlyclobber physregs.
if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber()))
InOutConstraints += llvm::utostr(i);
@@ -2577,7 +2607,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
LargestVectorWidth =
std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
+ VT->getPrimitiveSizeInBits().getKnownMinValue());
ArgTypes.push_back(Arg->getType());
ArgElemTypes.push_back(ArgElemType);
@@ -2717,10 +2747,21 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// ResultRegDests can be also populated by addReturnRegisterOutputs() above,
// in which case its size may grow.
assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
+ assert(ResultRegIsFlagReg.size() <= ResultRegDests.size());
for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
llvm::Value *Tmp = RegResults[i];
llvm::Type *TruncTy = ResultTruncRegTypes[i];
+ if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
+ // Target must guarantee the Value `Tmp` here is lowered to a boolean
+ // value.
+ llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+ llvm::Value *IsBooleanValue =
+ Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+ llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
+ Builder.CreateCall(FnAssume, IsBooleanValue);
+ }
+
// If the result type of the LLVM IR asm doesn't match the result type of
// the expression, do the conversion.
if (ResultRegTypes[i] != ResultTruncRegTypes[i]) {
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index aa55cdaca5dc..6bc30ad0302e 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
@@ -74,7 +75,7 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
public:
OMPLexicalScope(
CodeGenFunction &CGF, const OMPExecutableDirective &S,
- const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
+ const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
const bool EmitPreInitStmt = true)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
@@ -114,7 +115,7 @@ class OMPParallelScope final : public OMPLexicalScope {
public:
OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
- : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
+ : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
EmitPreInitStmt(S)) {}
};
@@ -129,7 +130,7 @@ class OMPTeamsScope final : public OMPLexicalScope {
public:
OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
- : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
+ : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
EmitPreInitStmt(S)) {}
};
@@ -446,7 +447,7 @@ static llvm::Function *emitOutlinedFunctionPrologue(
FunctionDecl *DebugFunctionDecl = nullptr;
if (!FO.UIntPtrCastRequired) {
FunctionProtoType::ExtProtoInfo EPI;
- QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
+ QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
DebugFunctionDecl = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
SourceLocation(), DeclarationName(), FunctionTy,
@@ -708,8 +709,9 @@ void CodeGenFunction::EmitOMPAggregateAssign(
llvm::Value *SrcBegin = SrcAddr.getPointer();
llvm::Value *DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
- llvm::Value *DestEnd =
- Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
+ llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
+ DestBegin, NumElements);
+
// The basic structure here is a while-do loop.
llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
@@ -1347,6 +1349,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
case OMPD_parallel_for_simd:
case OMPD_task:
case OMPD_taskyield:
+ case OMPD_error:
case OMPD_barrier:
case OMPD_taskwait:
case OMPD_taskgroup:
@@ -1593,6 +1596,19 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
const OMPExecutableDirective &,
llvm::SmallVectorImpl<llvm::Value *> &) {}
+static void emitOMPCopyinClause(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S) {
+ bool Copyins = CGF.EmitOMPCopyinClause(S);
+ if (Copyins) {
+ // Emit implicit barrier to synchronize threads and avoid data races on
+ // propagation master's thread values of threadprivate variables to local
+ // instances of that variables of all other implicit threads.
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(
+ CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ }
+}
+
Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
CodeGenFunction &CGF, const VarDecl *VD) {
CodeGenModule &CGM = CGF.CGM;
@@ -1774,16 +1790,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
- bool Copyins = CGF.EmitOMPCopyinClause(S);
+ emitOMPCopyinClause(CGF, S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- if (Copyins) {
- // Emit implicit barrier to synchronize threads and avoid data races on
- // propagation master's thread values of threadprivate variables to local
- // instances of that variables of all other implicit threads.
- CGF.CGM.getOpenMPRuntime().emitBarrierCall(
- CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
- /*ForceSimpleCall=*/true);
- }
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -2582,8 +2590,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
emitPostUpdateForReductionClause(CGF, S,
[](CodeGenFunction &) { return nullptr; });
+ LoopScope.restoreMap();
+ CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
}
- CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
// Emit: if (PreCond) - end.
if (ContBlock) {
CGF.EmitBranch(ContBlock);
@@ -2594,8 +2603,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
// Check for unsupported clauses
for (OMPClause *C : S.clauses()) {
- // Currently only simdlen clause is supported
- if (!isa<OMPSimdlenClause>(C))
+ // Currently only order, simdlen and safelen clauses are supported
+ if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
+ isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
return false;
}
@@ -2621,6 +2631,36 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
}
return true;
}
+static llvm::MapVector<llvm::Value *, llvm::Value *>
+GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
+ llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
+ for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
+ llvm::APInt ClauseAlignment(64, 0);
+ if (const Expr *AlignmentExpr = Clause->getAlignment()) {
+ auto *AlignmentCI =
+ cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
+ ClauseAlignment = AlignmentCI->getValue();
+ }
+ for (const Expr *E : Clause->varlists()) {
+ llvm::APInt Alignment(ClauseAlignment);
+ if (Alignment == 0) {
+ // OpenMP [2.8.1, Description]
+ // If no optional parameter is specified, implementation-defined default
+ // alignments for SIMD instructions on the target platforms are assumed.
+ Alignment =
+ CGF.getContext()
+ .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+ E->getType()->getPointeeType()))
+ .getQuantity();
+ }
+ assert((Alignment == 0 || Alignment.isPowerOf2()) &&
+ "alignment is not power of 2");
+ llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
+ AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
+ }
+ }
+ return AlignedVars;
+}
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
bool UseOMPIRBuilder =
@@ -2630,6 +2670,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
PrePostActionTy &) {
// Use the OpenMPIRBuilder if enabled.
if (UseOMPIRBuilder) {
+ llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
+ GetAlignedMapping(S, CGF);
// Emit the associated statement and get its loop representation.
const Stmt *Inner = S.getRawStmt();
llvm::CanonicalLoopInfo *CLI =
@@ -2646,7 +2688,24 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
Simdlen = Val;
}
- OMPBuilder.applySimd(CLI, Simdlen);
+ llvm::ConstantInt *Safelen = nullptr;
+ if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
+ RValue Len =
+ this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
+ /*ignoreResult=*/true);
+ auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+ Safelen = Val;
+ }
+ llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
+ if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
+ if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
+ Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
+ }
+ }
+ // Add simd metadata to the collapsed loop. Do not generate
+ // another loop for if clause. Support for if clause is done earlier.
+ OMPBuilder.applySimd(CLI, AlignedVars,
+ /*IfCond*/ nullptr, Order, Simdlen, Safelen);
return;
}
};
@@ -3426,11 +3485,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
EmitOMPLastprivateClauseFinal(
S, isOpenMPSimdDirective(S.getDirectiveKind()),
Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
+ LoopScope.restoreMap();
+ EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
+ return CGF.Builder.CreateIsNotNull(
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
+ });
}
- EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
- return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
- });
DoacrossCleanupScope.ForceCleanup();
// We're now done with the loop, so jump to the continuation block.
if (ContBlock) {
@@ -4340,6 +4400,7 @@ void CodeGenFunction::EmitOMPParallelForDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ emitOMPCopyinClause(CGF, S);
(void)emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
@@ -4373,6 +4434,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ emitOMPCopyinClause(CGF, S);
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
@@ -4407,16 +4469,8 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
- bool Copyins = CGF.EmitOMPCopyinClause(S);
+ emitOMPCopyinClause(CGF, S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- if (Copyins) {
- // Emit implicit barrier to synchronize threads and avoid data races on
- // propagation master's thread values of threadprivate variables to local
- // instances of that variables of all other implicit threads.
- CGF.CGM.getOpenMPRuntime().emitBarrierCall(
- CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
- /*ForceSimpleCall=*/true);
- }
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
@@ -4441,6 +4495,7 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
// directives: 'parallel' with 'sections' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ emitOMPCopyinClause(CGF, S);
CGF.EmitSections(S);
};
{
@@ -4892,7 +4947,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
Data.NumberOfParts);
- OMPLexicalScope Scope(*this, S, llvm::None,
+ OMPLexicalScope Scope(*this, S, std::nullopt,
!isOpenMPParallelDirective(S.getDirectiveKind()) &&
!isOpenMPSimdDirective(S.getDirectiveKind()));
TaskGen(*this, OutlinedFn, Data);
@@ -5192,6 +5247,16 @@ void CodeGenFunction::EmitOMPTaskyieldDirective(
CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
}
+void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
+ const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
+ Expr *ME = MC ? MC->getMessageString() : nullptr;
+ const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
+ bool IsFatal = false;
+ if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
+ IsFatal = true;
+ CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
+}
+
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
}
@@ -5200,6 +5265,7 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
OMPTaskDataTy Data;
// Build list of dependences
buildDependences(S, Data);
+ Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
}
@@ -5263,9 +5329,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
*this,
[&S]() -> ArrayRef<const Expr *> {
if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
- return llvm::makeArrayRef(FlushClause->varlist_begin(),
- FlushClause->varlist_end());
- return llvm::None;
+ return llvm::ArrayRef(FlushClause->varlist_begin(),
+ FlushClause->varlist_end());
+ return std::nullopt;
}(),
S.getBeginLoc(), AO);
}
@@ -5934,7 +6000,7 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
case llvm::AtomicOrdering::Acquire:
case llvm::AtomicOrdering::AcquireRelease:
case llvm::AtomicOrdering::SequentiallyConsistent:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
llvm::AtomicOrdering::Acquire);
break;
case llvm::AtomicOrdering::Monotonic:
@@ -5963,7 +6029,7 @@ static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
case llvm::AtomicOrdering::Release:
case llvm::AtomicOrdering::AcquireRelease:
case llvm::AtomicOrdering::SequentiallyConsistent:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
llvm::AtomicOrdering::Release);
break;
case llvm::AtomicOrdering::Acquire:
@@ -6154,7 +6220,7 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
case llvm::AtomicOrdering::Release:
case llvm::AtomicOrdering::AcquireRelease:
case llvm::AtomicOrdering::SequentiallyConsistent:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
llvm::AtomicOrdering::Release);
break;
case llvm::AtomicOrdering::Acquire:
@@ -6269,17 +6335,17 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
// operation is also an acquire flush.
switch (AO) {
case llvm::AtomicOrdering::Release:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
llvm::AtomicOrdering::Release);
break;
case llvm::AtomicOrdering::Acquire:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
llvm::AtomicOrdering::Acquire);
break;
case llvm::AtomicOrdering::AcquireRelease:
case llvm::AtomicOrdering::SequentiallyConsistent:
CGF.CGM.getOpenMPRuntime().emitFlush(
- CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease);
+ CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
break;
case llvm::AtomicOrdering::Monotonic:
break;
@@ -6392,95 +6458,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
IsPostfixUpdate, IsFailOnly, Loc);
break;
}
- case OMPC_if:
- case OMPC_final:
- case OMPC_num_threads:
- case OMPC_private:
- case OMPC_firstprivate:
- case OMPC_lastprivate:
- case OMPC_reduction:
- case OMPC_task_reduction:
- case OMPC_in_reduction:
- case OMPC_safelen:
- case OMPC_simdlen:
- case OMPC_sizes:
- case OMPC_full:
- case OMPC_partial:
- case OMPC_allocator:
- case OMPC_allocate:
- case OMPC_collapse:
- case OMPC_default:
- case OMPC_seq_cst:
- case OMPC_acq_rel:
- case OMPC_acquire:
- case OMPC_release:
- case OMPC_relaxed:
- case OMPC_shared:
- case OMPC_linear:
- case OMPC_aligned:
- case OMPC_copyin:
- case OMPC_copyprivate:
- case OMPC_flush:
- case OMPC_depobj:
- case OMPC_proc_bind:
- case OMPC_schedule:
- case OMPC_ordered:
- case OMPC_nowait:
- case OMPC_untied:
- case OMPC_threadprivate:
- case OMPC_depend:
- case OMPC_mergeable:
- case OMPC_device:
- case OMPC_threads:
- case OMPC_simd:
- case OMPC_map:
- case OMPC_num_teams:
- case OMPC_thread_limit:
- case OMPC_priority:
- case OMPC_grainsize:
- case OMPC_nogroup:
- case OMPC_num_tasks:
- case OMPC_hint:
- case OMPC_dist_schedule:
- case OMPC_defaultmap:
- case OMPC_uniform:
- case OMPC_to:
- case OMPC_from:
- case OMPC_use_device_ptr:
- case OMPC_use_device_addr:
- case OMPC_is_device_ptr:
- case OMPC_has_device_addr:
- case OMPC_unified_address:
- case OMPC_unified_shared_memory:
- case OMPC_reverse_offload:
- case OMPC_dynamic_allocators:
- case OMPC_atomic_default_mem_order:
- case OMPC_device_type:
- case OMPC_match:
- case OMPC_nontemporal:
- case OMPC_order:
- case OMPC_destroy:
- case OMPC_detach:
- case OMPC_inclusive:
- case OMPC_exclusive:
- case OMPC_uses_allocators:
- case OMPC_affinity:
- case OMPC_init:
- case OMPC_inbranch:
- case OMPC_notinbranch:
- case OMPC_link:
- case OMPC_indirect:
- case OMPC_use:
- case OMPC_novariants:
- case OMPC_nocontext:
- case OMPC_filter:
- case OMPC_when:
- case OMPC_adjust_args:
- case OMPC_append_args:
- case OMPC_memory_order:
- case OMPC_bind:
- case OMPC_align:
- case OMPC_cancellation_construct_type:
+ default:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
@@ -7658,6 +7636,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
(*LIP)->getType(), S.getBeginLoc())));
}
+ LoopScope.restoreMap();
CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
@@ -7714,7 +7693,7 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
};
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
- OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
+ OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
@@ -7792,7 +7771,19 @@ void CodeGenFunction::EmitOMPGenericLoopDirective(
const OMPGenericLoopDirective &S) {
// Unimplemented, just inline the underlying statement for now.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ // Emit the loop iteration variable.
+ const Stmt *CS =
+ cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
+ const auto *ForS = dyn_cast<ForStmt>(CS);
+ if (ForS && !isa<DeclStmt>(ForS->getInit())) {
+ OMPPrivateScope LoopScope(CGF);
+ CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+ (void)LoopScope.Privatize();
+ CGF.EmitStmt(CS);
+ LoopScope.restoreMap();
+ } else {
+ CGF.EmitStmt(CS);
+ }
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
diff --git a/clang/lib/CodeGen/CGVTT.cpp b/clang/lib/CodeGen/CGVTT.cpp
index ebac9196df02..d0c8e351626b 100644
--- a/clang/lib/CodeGen/CGVTT.cpp
+++ b/clang/lib/CodeGen/CGVTT.cpp
@@ -114,7 +114,7 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
llvm::ArrayType *ArrayType =
llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size());
- unsigned Align = CGM.getDataLayout().getABITypeAlignment(CGM.Int8PtrTy);
+ llvm::Align Align = CGM.getDataLayout().getABITypeAlign(CGM.Int8PtrTy);
llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align);
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index cdd40d2a6a2e..a0b5d9e4b096 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -128,7 +128,7 @@ static void resolveTopLevelMetadata(llvm::Function *Fn,
// Find all llvm.dbg.declare intrinsics and resolve the DILocalVariable nodes
// they are referencing.
- for (auto &BB : Fn->getBasicBlockList()) {
+ for (auto &BB : *Fn) {
for (auto &I : BB) {
if (auto *DII = dyn_cast<llvm::DbgVariableIntrinsic>(&I)) {
auto *DILocal = DII->getVariable();
@@ -664,6 +664,12 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder,
proxy->setVisibility(llvm::GlobalValue::HiddenVisibility);
proxy->setComdat(module.getOrInsertComdat(rttiProxyName));
}
+ // Do not instrument the rtti proxies with hwasan to avoid a duplicate
+ // symbol error. Aliases generated by hwasan will retain the same namebut
+ // the addresses they are set to may have different tags from different
+ // compilation units. We don't run into this without hwasan because the
+ // proxies are in comdat groups, but those aren't propagated to the alias.
+ RemoveHwasanMetadata(proxy);
}
target = proxy;
}
@@ -672,15 +678,23 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder,
/*position=*/vtableAddressPoint);
}
-bool CodeGenVTables::useRelativeLayout() const {
+static bool UseRelativeLayout(const CodeGenModule &CGM) {
return CGM.getTarget().getCXXABI().isItaniumFamily() &&
CGM.getItaniumVTableContext().isRelativeLayout();
}
+bool CodeGenVTables::useRelativeLayout() const {
+ return UseRelativeLayout(CGM);
+}
+
+llvm::Type *CodeGenModule::getVTableComponentType() const {
+ if (UseRelativeLayout(*this))
+ return Int32Ty;
+ return Int8PtrTy;
+}
+
llvm::Type *CodeGenVTables::getVTableComponentType() const {
- if (useRelativeLayout())
- return CGM.Int32Ty;
- return CGM.Int8PtrTy;
+ return CGM.getVTableComponentType();
}
static void AddPointerLayoutOffset(const CodeGenModule &CGM,
@@ -895,7 +909,7 @@ llvm::GlobalVariable *CodeGenVTables::GenerateConstructionVTable(
if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage)
Linkage = llvm::GlobalVariable::InternalLinkage;
- unsigned Align = CGM.getDataLayout().getABITypeAlignment(VTType);
+ llvm::Align Align = CGM.getDataLayout().getABITypeAlign(VTType);
// Create the variable that will hold the construction vtable.
llvm::GlobalVariable *VTable =
@@ -921,12 +935,33 @@ llvm::GlobalVariable *CodeGenVTables::GenerateConstructionVTable(
CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get());
- if (UsingRelativeLayout && !VTable->isDSOLocal())
- GenerateRelativeVTableAlias(VTable, OutName);
+ if (UsingRelativeLayout) {
+ RemoveHwasanMetadata(VTable);
+ if (!VTable->isDSOLocal())
+ GenerateRelativeVTableAlias(VTable, OutName);
+ }
return VTable;
}
+// Ensure this vtable is not instrumented by hwasan. That is, a global alias is
+// not generated for it. This is mainly used by the relative-vtables ABI where
+// vtables instead contain 32-bit offsets between the vtable and function
+// pointers. Hwasan is disabled for these vtables for now because the tag in a
+// vtable pointer may fail the overflow check when resolving 32-bit PLT
+// relocations. A future alternative for this would be finding which usages of
+// the vtable can continue to use the untagged hwasan value without any loss of
+// value in hwasan.
+void CodeGenVTables::RemoveHwasanMetadata(llvm::GlobalValue *GV) const {
+ if (CGM.getLangOpts().Sanitize.has(SanitizerKind::HWAddress)) {
+ llvm::GlobalValue::SanitizerMetadata Meta;
+ if (GV->hasSanitizerMetadata())
+ Meta = GV->getSanitizerMetadata();
+ Meta.NoHWAddress = true;
+ GV->setSanitizerMetadata(Meta);
+ }
+}
+
// If the VTable is not dso_local, then we will not be able to indicate that
// the VTable does not need a relocation and move into rodata. A frequent
// time this can occur is for classes that should be made public from a DSO
@@ -1254,8 +1289,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
if (!getCodeGenOpts().LTOUnit)
return;
- CharUnits PointerWidth =
- Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
+ CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());
typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint;
std::vector<AddressPoint> AddressPoints;
@@ -1293,7 +1327,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
ArrayRef<VTableComponent> Comps = VTLayout.vtable_components();
for (auto AP : AddressPoints) {
// Create type metadata for the address point.
- AddVTableTypeMetadata(VTable, PointerWidth * AP.second, AP.first);
+ AddVTableTypeMetadata(VTable, ComponentWidth * AP.second, AP.first);
// The class associated with each address point could also potentially be
// used for indirect calls via a member function pointer, so we need to
@@ -1306,7 +1340,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
Context.getMemberPointerType(
Comps[I].getFunctionDecl()->getType(),
Context.getRecordType(AP.first).getTypePtr()));
- VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD);
+ VTable->addTypeMetadata((ComponentWidth * I).getQuantity(), MD);
}
}
diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h
index bdfc075ee305..e7b59d94f257 100644
--- a/clang/lib/CodeGen/CGVTables.h
+++ b/clang/lib/CodeGen/CGVTables.h
@@ -102,6 +102,10 @@ public:
return *cast<ItaniumVTableContext>(VTContext);
}
+ const ItaniumVTableContext &getItaniumVTableContext() const {
+ return *cast<ItaniumVTableContext>(VTContext);
+ }
+
MicrosoftVTableContext &getMicrosoftVTableContext() {
return *cast<MicrosoftVTableContext>(VTContext);
}
@@ -154,6 +158,9 @@ public:
/// when a vtable may not be dso_local.
void GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable,
llvm::StringRef AliasNameRef);
+
+ /// Specify a global should not be instrumented with hwasan.
+ void RemoveHwasanMetadata(llvm::GlobalValue *GV) const;
};
} // end namespace CodeGen
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index 12c6b3f49c43..2b219267869e 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -49,6 +49,7 @@
#include "llvm/Transforms/IPO/Internalize.h"
#include <memory>
+#include <optional>
using namespace clang;
using namespace llvm;
@@ -422,7 +423,8 @@ namespace clang {
bool &BadDebugInfo, StringRef &Filename,
unsigned &Line, unsigned &Column) const;
- Optional<FullSourceLoc> getFunctionSourceLocation(const Function &F) const;
+ std::optional<FullSourceLoc>
+ getFunctionSourceLocation(const Function &F) const;
void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
/// Specialized handler for InlineAsm diagnostic.
@@ -435,6 +437,11 @@ namespace clang {
/// \return True if the diagnostic has been successfully reported, false
/// otherwise.
bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
+ /// Specialized handler for ResourceLimit diagnostic.
+ /// \return True if the diagnostic has been successfully reported, false
+ /// otherwise.
+ bool ResourceLimitDiagHandler(const llvm::DiagnosticInfoResourceLimit &D);
+
/// Specialized handler for unsupported backend feature diagnostic.
void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
/// Specialized handlers for optimization remarks.
@@ -623,10 +630,23 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
if (!Loc)
return false;
- // FIXME: Shouldn't need to truncate to uint32_t
Diags.Report(*Loc, diag::warn_fe_frame_larger_than)
- << static_cast<uint32_t>(D.getStackSize())
- << static_cast<uint32_t>(D.getStackLimit())
+ << D.getStackSize()
+ << D.getStackLimit()
+ << llvm::demangle(D.getFunction().getName().str());
+ return true;
+}
+
+bool BackendConsumer::ResourceLimitDiagHandler(
+ const llvm::DiagnosticInfoResourceLimit &D) {
+ auto Loc = getFunctionSourceLocation(D.getFunction());
+ if (!Loc)
+ return false;
+ unsigned DiagID = diag::err_fe_backend_resource_limit;
+ ComputeDiagID(D.getSeverity(), backend_resource_limit, DiagID);
+
+ Diags.Report(*Loc, DiagID)
+ << D.getResourceName() << D.getResourceSize() << D.getResourceLimit()
<< llvm::demangle(D.getFunction().getName().str());
return true;
}
@@ -673,14 +693,14 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
return Loc;
}
-Optional<FullSourceLoc>
+std::optional<FullSourceLoc>
BackendConsumer::getFunctionSourceLocation(const Function &F) const {
auto Hash = llvm::hash_value(F.getName());
for (const auto &Pair : ManglingFullSourceLocs) {
if (Pair.first == Hash)
return Pair.second;
}
- return Optional<FullSourceLoc>();
+ return std::nullopt;
}
void BackendConsumer::UnsupportedDiagHandler(
@@ -874,6 +894,11 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
return;
ComputeDiagID(Severity, backend_frame_larger_than, DiagID);
break;
+ case llvm::DK_ResourceLimit:
+ if (ResourceLimitDiagHandler(cast<DiagnosticInfoResourceLimit>(DI)))
+ return;
+ ComputeDiagID(Severity, backend_resource_limit, DiagID);
+ break;
case DK_Linker:
ComputeDiagID(Severity, linking_module, DiagID);
break;
@@ -1078,6 +1103,8 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) {
CompilerInstance &CI = getCompilerInstance();
SourceManager &SM = CI.getSourceManager();
+ VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers);
+
// For ThinLTO backend invocations, ensure that the context
// merges types based on ODR identifiers. We also need to read
// the correct module out of a multi-module bitcode file.
@@ -1157,7 +1184,7 @@ void CodeGenAction::ExecuteAction() {
SourceManager &SM = CI.getSourceManager();
FileID FID = SM.getMainFileID();
- Optional<MemoryBufferRef> MainFile = SM.getBufferOrNone(FID);
+ std::optional<MemoryBufferRef> MainFile = SM.getBufferOrNone(FID);
if (!MainFile)
return;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 5012bd822bd3..8cbe2a540744 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -16,6 +16,7 @@
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGDebugInfo.h"
+#include "CGHLSLRuntime.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenModule.h"
#include "CodeGenPGO.h"
@@ -45,6 +46,7 @@
#include "llvm/Support/CRC.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -172,10 +174,11 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) {
mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs());
mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs());
mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero());
- mergeFnAttrValue("unsafe-fp-math", FPFeatures.getAllowFPReassociate() &&
- FPFeatures.getAllowReciprocal() &&
- FPFeatures.getAllowApproxFunc() &&
- FPFeatures.getNoSignedZero());
+ mergeFnAttrValue(
+ "unsafe-fp-math",
+ FPFeatures.getAllowFPReassociate() && FPFeatures.getAllowReciprocal() &&
+ FPFeatures.getAllowApproxFunc() && FPFeatures.getNoSignedZero() &&
+ FPFeatures.allowFPContractAcrossStatement());
}
CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() {
@@ -317,8 +320,10 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() {
static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) {
if (!BB) return;
- if (!BB->use_empty())
- return CGF.CurFn->getBasicBlockList().push_back(BB);
+ if (!BB->use_empty()) {
+ CGF.CurFn->insert(CGF.CurFn->end(), BB);
+ return;
+ }
delete BB;
}
@@ -356,17 +361,18 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
bool HasOnlyLifetimeMarkers =
HasCleanups && EHStack.containsOnlyLifetimeMarkers(PrologueCleanupDepth);
bool EmitRetDbgLoc = !HasCleanups || HasOnlyLifetimeMarkers;
+
+ std::optional<ApplyDebugLocation> OAL;
if (HasCleanups) {
// Make sure the line table doesn't jump back into the body for
// the ret after it's been at EndLoc.
- Optional<ApplyDebugLocation> AL;
if (CGDebugInfo *DI = getDebugInfo()) {
if (OnlySimpleReturnStmts)
DI->EmitLocation(Builder, EndLoc);
else
// We may not have a valid end location. Try to apply it anyway, and
// fall back to an artificial location if needed.
- AL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc);
+ OAL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc);
}
PopCleanupBlocks(PrologueCleanupDepth);
@@ -477,13 +483,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
LargestVectorWidth =
std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
+ VT->getPrimitiveSizeInBits().getKnownMinValue());
// Update vector width based on return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
LargestVectorWidth =
std::max((uint64_t)LargestVectorWidth,
- VT->getPrimitiveSizeInBits().getKnownMinSize());
+ VT->getPrimitiveSizeInBits().getKnownMinValue());
if (CurFnInfo->getMaxVectorWidth() > LargestVectorWidth)
LargestVectorWidth = CurFnInfo->getMaxVectorWidth();
@@ -495,10 +501,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// 4. Width of vector arguments and return types for this function.
// 5. Width of vector aguments and return types for functions called by this
// function.
- CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
+ if (getContext().getTargetInfo().getTriple().isX86())
+ CurFn->addFnAttr("min-legal-vector-width",
+ llvm::utostr(LargestVectorWidth));
// Add vscale_range attribute if appropriate.
- Optional<std::pair<unsigned, unsigned>> VScaleRange =
+ std::optional<std::pair<unsigned, unsigned>> VScaleRange =
getContext().getTargetInfo().getVScaleRange(getLangOpts());
if (VScaleRange) {
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
@@ -699,7 +707,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
CurCodeDecl = D;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (FD && FD->usesSEHTry())
- CurSEHParent = FD;
+ CurSEHParent = GD;
CurFuncDecl = (D ? D->getNonClosureContext() : nullptr);
FnRetTy = RetTy;
CurFn = Fn;
@@ -724,7 +732,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
const bool SanitizeBounds = SanOpts.hasOneOf(SanitizerKind::Bounds);
bool NoSanitizeCoverage = false;
- for (auto Attr : D->specific_attrs<NoSanitizeAttr>()) {
+ for (auto *Attr : D->specific_attrs<NoSanitizeAttr>()) {
// Apply the no_sanitize* attributes to SanOpts.
SanitizerMask mask = Attr->getMask();
SanOpts.Mask &= ~mask;
@@ -842,8 +850,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
auto FuncGroups = CGM.getCodeGenOpts().XRayTotalFunctionGroups;
if (FuncGroups > 1) {
- auto FuncName = llvm::makeArrayRef<uint8_t>(
- CurFn->getName().bytes_begin(), CurFn->getName().bytes_end());
+ auto FuncName = llvm::ArrayRef<uint8_t>(CurFn->getName().bytes_begin(),
+ CurFn->getName().bytes_end());
auto Group = crc32(FuncName) % FuncGroups;
if (Group != CGM.getCodeGenOpts().XRaySelectedFunctionGroup &&
!AlwaysXRayAttr)
@@ -851,9 +859,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
}
}
- if (CGM.getCodeGenOpts().getProfileInstr() != CodeGenOptions::ProfileNone)
- if (CGM.isFunctionBlockedFromProfileInstr(Fn, Loc))
+ if (CGM.getCodeGenOpts().getProfileInstr() != CodeGenOptions::ProfileNone) {
+ switch (CGM.isFunctionBlockedFromProfileInstr(Fn, Loc)) {
+ case ProfileList::Skip:
+ Fn->addFnAttr(llvm::Attribute::SkipProfile);
+ break;
+ case ProfileList::Forbid:
Fn->addFnAttr(llvm::Attribute::NoProfile);
+ break;
+ case ProfileList::Allow:
+ break;
+ }
+ }
unsigned Count, Offset;
if (const auto *Attr =
@@ -874,7 +891,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
// backends as they don't need it -- instructions on these architectures are
// always atomically patchable at runtime.
if (CGM.getCodeGenOpts().HotPatch &&
- getContext().getTargetInfo().getTriple().isX86())
+ getContext().getTargetInfo().getTriple().isX86() &&
+ getContext().getTargetInfo().getTriple().getEnvironment() !=
+ llvm::Triple::CODE16)
Fn->addFnAttr("patchable-function", "prologue-short-redirect");
// Add no-jump-tables value.
@@ -941,7 +960,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
// If we're checking nullability, we need to know whether we can check the
// return value. Initialize the flag to 'true' and refine it in EmitParmDecl.
if (SanOpts.has(SanitizerKind::NullabilityReturn)) {
- auto Nullability = FnRetTy->getNullability(getContext());
+ auto Nullability = FnRetTy->getNullability();
if (Nullability && *Nullability == NullabilityKind::NonNull) {
if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) &&
CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()))
@@ -1128,6 +1147,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
if (getLangOpts().OpenMP && CurCodeDecl)
CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl);
+ // Handle emitting HLSL entry functions.
+ if (D && D->hasAttr<HLSLShaderAttr>())
+ CGM.getHLSLRuntime().emitEntryFunction(FD, Fn);
+
EmitFunctionProlog(*CurFnInfo, CurFn, Args);
if (isa_and_nonnull<CXXMethodDecl>(D) &&
@@ -1450,7 +1473,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
llvm::Value *IsFalse = Builder.getFalse();
EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
SanitizerHandler::MissingReturn,
- EmitCheckSourceLocation(FD->getLocation()), None);
+ EmitCheckSourceLocation(FD->getLocation()), std::nullopt);
} else if (ShouldEmitUnreachable) {
if (CGM.getCodeGenOpts().OptimizationLevel == 0)
EmitTrapCall(llvm::Intrinsic::trap);
@@ -2214,7 +2237,6 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
case Type::ConstantMatrix:
case Type::Record:
case Type::Enum:
- case Type::Elaborated:
case Type::Using:
case Type::TemplateSpecialization:
case Type::ObjCTypeParam:
@@ -2224,6 +2246,10 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
case Type::BitInt:
llvm_unreachable("type class is never variably-modified!");
+ case Type::Elaborated:
+ type = cast<ElaboratedType>(ty)->getNamedType();
+ break;
+
case Type::Adjusted:
type = cast<AdjustedType>(ty)->getAdjustedType();
break;
@@ -2426,8 +2452,6 @@ void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
SourceLocation AssumptionLoc,
llvm::Value *Alignment,
llvm::Value *OffsetValue) {
- if (auto *CE = dyn_cast<CastExpr>(E))
- E = CE->getSubExprAsWritten();
QualType Ty = E->getType();
SourceLocation Loc = E->getExprLoc();
@@ -2442,8 +2466,10 @@ llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn,
const AnnotateAttr *Attr) {
SmallVector<llvm::Value *, 5> Args = {
AnnotatedVal,
- Builder.CreateBitCast(CGM.EmitAnnotationString(AnnotationStr), Int8PtrTy),
- Builder.CreateBitCast(CGM.EmitAnnotationUnit(Location), Int8PtrTy),
+ Builder.CreateBitCast(CGM.EmitAnnotationString(AnnotationStr),
+ ConstGlobalsPtrTy),
+ Builder.CreateBitCast(CGM.EmitAnnotationUnit(Location),
+ ConstGlobalsPtrTy),
CGM.EmitAnnotationLineNo(Location),
};
if (Attr)
@@ -2455,9 +2481,12 @@ void CodeGenFunction::EmitVarAnnotations(const VarDecl *D, llvm::Value *V) {
assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
// FIXME We create a new bitcast for every annotation because that's what
// llvm-gcc was doing.
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ llvm::Type *I8PtrTy = Builder.getInt8PtrTy(AS);
for (const auto *I : D->specific_attrs<AnnotateAttr>())
- EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation),
- Builder.CreateBitCast(V, CGM.Int8PtrTy, V->getName()),
+ EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation,
+ {I8PtrTy, CGM.ConstGlobalsPtrTy}),
+ Builder.CreateBitCast(V, I8PtrTy, V->getName()),
I->getAnnotation(), D->getLocation(), I);
}
@@ -2470,8 +2499,8 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
unsigned AS = PTy ? PTy->getAddressSpace() : 0;
llvm::PointerType *IntrinTy =
llvm::PointerType::getWithSamePointeeType(CGM.Int8PtrTy, AS);
- llvm::Function *F =
- CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, IntrinTy);
+ llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
+ {IntrinTy, CGM.ConstGlobalsPtrTy});
for (const auto *I : D->specific_attrs<AnnotateAttr>()) {
// FIXME Always emit the cast inst so we can differentiate between
@@ -2594,8 +2623,30 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
CGM.getSanStats().create(IRB, SSK);
}
-llvm::Value *
-CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
+void CodeGenFunction::EmitKCFIOperandBundle(
+ const CGCallee &Callee, SmallVectorImpl<llvm::OperandBundleDef> &Bundles) {
+ const FunctionProtoType *FP =
+ Callee.getAbstractInfo().getCalleeFunctionProtoType();
+ if (FP)
+ Bundles.emplace_back("kcfi", CGM.CreateKCFITypeId(FP->desugar()));
+}
+
+llvm::Value *CodeGenFunction::FormAArch64ResolverCondition(
+ const MultiVersionResolverOption &RO) {
+ llvm::SmallVector<StringRef, 8> CondFeatures;
+ for (const StringRef &Feature : RO.Conditions.Features) {
+ // Form condition for features which are not yet enabled in target
+ if (!getContext().getTargetInfo().hasFeature(Feature))
+ CondFeatures.push_back(Feature);
+ }
+ if (!CondFeatures.empty()) {
+ return EmitAArch64CpuSupports(CondFeatures);
+ }
+ return nullptr;
+}
+
+llvm::Value *CodeGenFunction::FormX86ResolverCondition(
+ const MultiVersionResolverOption &RO) {
llvm::Value *Condition = nullptr;
if (!RO.Conditions.Architecture.empty())
@@ -2633,8 +2684,72 @@ static void CreateMultiVersionResolverReturn(CodeGenModule &CGM,
void CodeGenFunction::EmitMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
- assert(getContext().getTargetInfo().getTriple().isX86() &&
- "Only implemented for x86 targets");
+
+ llvm::Triple::ArchType ArchType =
+ getContext().getTargetInfo().getTriple().getArch();
+
+ switch (ArchType) {
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ EmitX86MultiVersionResolver(Resolver, Options);
+ return;
+ case llvm::Triple::aarch64:
+ EmitAArch64MultiVersionResolver(Resolver, Options);
+ return;
+
+ default:
+ assert(false && "Only implemented for x86 and AArch64 targets");
+ }
+}
+
+void CodeGenFunction::EmitAArch64MultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
+ assert(!Options.empty() && "No multiversion resolver options found");
+ assert(Options.back().Conditions.Features.size() == 0 &&
+ "Default case must be last");
+ bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
+ assert(SupportsIFunc &&
+ "Multiversion resolver requires target IFUNC support");
+ bool AArch64CpuInitialized = false;
+ llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
+
+ for (const MultiVersionResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+ llvm::Value *Condition = FormAArch64ResolverCondition(RO);
+
+ // The 'default' or 'all features enabled' case.
+ if (!Condition) {
+ CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function,
+ SupportsIFunc);
+ return;
+ }
+
+ if (!AArch64CpuInitialized) {
+ Builder.SetInsertPoint(CurBlock, CurBlock->begin());
+ EmitAArch64CpuInit();
+ AArch64CpuInitialized = true;
+ Builder.SetInsertPoint(CurBlock);
+ }
+
+ llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
+ CGBuilderTy RetBuilder(*this, RetBlock);
+ CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function,
+ SupportsIFunc);
+ CurBlock = createBasicBlock("resolver_else", Resolver);
+ Builder.CreateCondBr(Condition, RetBlock, CurBlock);
+ }
+
+ // If no default, emit an unreachable.
+ Builder.SetInsertPoint(CurBlock);
+ llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
+}
+
+void CodeGenFunction::EmitX86MultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
@@ -2645,7 +2760,7 @@ void CodeGenFunction::EmitMultiVersionResolver(
for (const MultiVersionResolverOption &RO : Options) {
Builder.SetInsertPoint(CurBlock);
- llvm::Value *Condition = FormResolverCondition(RO);
+ llvm::Value *Condition = FormX86ResolverCondition(RO);
// The 'default' or 'generic' case.
if (!Condition) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index fe0890f433e8..a535aa7c0410 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -41,6 +41,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include <optional>
namespace llvm {
class BasicBlock;
@@ -539,7 +540,7 @@ public:
/// potentially set the return value.
bool SawAsmBlock = false;
- const NamedDecl *CurSEHParent = nullptr;
+ GlobalDecl CurSEHParent;
/// True if the current function is an outlined SEH helper. This can be a
/// finally block or filter expression.
@@ -570,7 +571,7 @@ public:
return false;
// C++11 and later guarantees that a thread eventually will do one of the
- // following (6.9.2.3.1 in C++11):
+ // following (C++11 [intro.multithread]p24 and C++17 [intro.progress]p1):
// - terminate,
// - make a call to a library I/O function,
// - perform an access through a volatile glvalue, or
@@ -609,7 +610,7 @@ public:
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
llvm::Value *BlockPointer = nullptr;
- llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
+ llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
/// A mapping from NRVO variables to the flags used to indicate
@@ -723,7 +724,7 @@ public:
FPOptions OldFPFeatures;
llvm::fp::ExceptionBehavior OldExcept;
llvm::RoundingMode OldRounding;
- Optional<CGBuilderTy::FastMathFlagGuard> FMFGuard;
+ std::optional<CGBuilderTy::FastMathFlagGuard> FMFGuard;
};
FPOptions CurFPFeatures;
@@ -1094,7 +1095,7 @@ public:
void ForceCleanup() {
RunCleanupsScope::ForceCleanup();
- MappedVars.restore(CGF);
+ restoreMap();
}
/// Exit scope - all the mapped variables are restored.
@@ -1108,6 +1109,11 @@ public:
VD = VD->getCanonicalDecl();
return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
}
+
+ /// Restore all mapped variables w/o clean up. This is usefully when we want
+ /// to reference the original variables but don't want the clean up because
+ /// that could emit lifetime end too early, causing backend issue #56913.
+ void restoreMap() { MappedVars.restore(CGF); }
};
/// Save/restore original map of previously emitted local vars in case when we
@@ -1522,7 +1528,8 @@ public:
/// If \p StepV is null, the default increment is 1.
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV = nullptr) {
if (CGM.getCodeGenOpts().hasProfileClangInstr() &&
- !CurFn->hasFnAttribute(llvm::Attribute::NoProfile))
+ !CurFn->hasFnAttribute(llvm::Attribute::NoProfile) &&
+ !CurFn->hasFnAttribute(llvm::Attribute::SkipProfile))
PGO.emitCounterIncrement(Builder, S, StepV);
PGO.setCurrentStmt(S);
}
@@ -2015,7 +2022,7 @@ public:
return getInvokeDestImpl();
}
- bool currentFunctionUsesSEHTry() const { return CurSEHParent != nullptr; }
+ bool currentFunctionUsesSEHTry() const { return !!CurSEHParent; }
const TargetInfo &getTarget() const { return Target; }
llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
@@ -2225,7 +2232,7 @@ public:
/// Emit the unified return block, trying to avoid its emission when
/// possible.
/// \return The debug location of the user written return statement if the
- /// return block is is avoided.
+ /// return block is avoided.
llvm::DebugLoc EmitReturnBlock();
/// FinishFunction - Complete IR generation of the current function. It is
@@ -2878,7 +2885,7 @@ public:
AggValueSlot::Overlap_t Overlap,
SourceLocation Loc, bool NewPointerIsChecked);
- /// Emit assumption load for all bases. Requires to be be called only on
+ /// Emit assumption load for all bases. Requires to be called only on
/// most-derived class and not under construction of the object.
void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This);
@@ -3207,7 +3214,7 @@ public:
/// This function may clear the current insertion point; callers should use
/// EnsureInsertPoint if they wish to subsequently generate code without first
/// calling EmitBlock, EmitBranch, or EmitStmt.
- void EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs = None);
+ void EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs = std::nullopt);
/// EmitSimpleStmt - Try to emit a "simple" statement which does not
/// necessarily require an insertion point or debug information; typically
@@ -3235,10 +3242,10 @@ public:
void EmitIfStmt(const IfStmt &S);
void EmitWhileStmt(const WhileStmt &S,
- ArrayRef<const Attr *> Attrs = None);
- void EmitDoStmt(const DoStmt &S, ArrayRef<const Attr *> Attrs = None);
+ ArrayRef<const Attr *> Attrs = std::nullopt);
+ void EmitDoStmt(const DoStmt &S, ArrayRef<const Attr *> Attrs = std::nullopt);
void EmitForStmt(const ForStmt &S,
- ArrayRef<const Attr *> Attrs = None);
+ ArrayRef<const Attr *> Attrs = std::nullopt);
void EmitReturnStmt(const ReturnStmt &S);
void EmitDeclStmt(const DeclStmt &S);
void EmitBreakStmt(const BreakStmt &S);
@@ -3315,7 +3322,7 @@ public:
llvm::Value *ParentFP);
void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
- ArrayRef<const Attr *> Attrs = None);
+ ArrayRef<const Attr *> Attrs = std::nullopt);
/// Controls insertion of cancellation exit blocks in worksharing constructs.
class OMPCancelStackRAII {
@@ -3514,6 +3521,7 @@ public:
void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S);
void EmitOMPTaskDirective(const OMPTaskDirective &S);
void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S);
+ void EmitOMPErrorDirective(const OMPErrorDirective &S);
void EmitOMPBarrierDirective(const OMPBarrierDirective &S);
void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S);
void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S);
@@ -3967,6 +3975,8 @@ public:
llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface,
const ObjCIvarDecl *Ivar);
+ llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface,
+ const ObjCIvarDecl *Ivar);
LValue EmitLValueForField(LValue Base, const FieldDecl* Field);
LValue EmitLValueForLambdaField(const FieldDecl *Field);
@@ -4194,6 +4204,12 @@ public:
llvm::Type *getEltType(const SVETypeFlags &TypeFlags);
llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags);
llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags);
+ llvm::Value *EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
+ llvm::Type *ReturnType,
+ ArrayRef<llvm::Value *> Ops);
+ llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
+ llvm::Type *ReturnType,
+ ArrayRef<llvm::Value *> Ops);
llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags);
llvm::Value *EmitSVEDupX(llvm::Value *Scalar);
llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty);
@@ -4247,7 +4263,8 @@ public:
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
+ llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
@@ -4403,6 +4420,11 @@ public:
/// EmitLoadOfComplex - Load a complex number from the specified l-value.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);
+ ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType PromotionType);
+ llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType PromotionType);
+ ComplexPairTy EmitPromotedValue(ComplexPairTy result, QualType PromotionType);
+ ComplexPairTy EmitUnPromotedValue(ComplexPairTy result, QualType PromotionType);
+
Address emitAddrOfRealComponent(Address complex, QualType complexType);
Address emitAddrOfImagComponent(Address complex, QualType complexType);
@@ -4600,6 +4622,9 @@ public:
/// passing to a runtime sanitizer handler.
llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc);
+ void EmitKCFIOperandBundle(const CGCallee &Callee,
+ SmallVectorImpl<llvm::OperandBundleDef> &Bundles);
+
/// Create a basic block that will either trap or call a handler function in
/// the UBSan runtime with the provided arguments, and create a conditional
/// branch to it.
@@ -4789,6 +4814,12 @@ public:
// last (if it exists).
void EmitMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<MultiVersionResolverOption> Options);
+ void
+ EmitX86MultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<MultiVersionResolverOption> Options);
+ void
+ EmitAArch64MultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<MultiVersionResolverOption> Options);
private:
QualType getVarArgType(const Expr *Arg);
@@ -4807,7 +4838,11 @@ private:
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
llvm::Value *EmitX86CpuSupports(uint64_t Mask);
llvm::Value *EmitX86CpuInit();
- llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *EmitAArch64CpuInit();
+ llvm::Value *
+ FormAArch64ResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *EmitAArch64CpuSupports(ArrayRef<StringRef> FeatureStrs);
};
@@ -4817,9 +4852,9 @@ DominatingLLVMValue::save(CodeGenFunction &CGF, llvm::Value *value) {
// Otherwise, we need an alloca.
auto align = CharUnits::fromQuantity(
- CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType()));
+ CGF.CGM.getDataLayout().getPrefTypeAlign(value->getType()));
Address alloca =
- CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save");
+ CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save");
CGF.Builder.CreateStore(value, alloca);
return saved_type(alloca.getPointer(), true);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 4e8e120d89df..12d602fed693 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -47,6 +47,8 @@
#include "clang/CodeGen/BackendUtil.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -58,14 +60,16 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MD5.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/X86TargetParser.h"
+#include "llvm/Support/xxhash.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
@@ -120,9 +124,10 @@ CodeGenModule::CodeGenModule(ASTContext &C,
BFloatTy = llvm::Type::getBFloatTy(LLVMContext);
FloatTy = llvm::Type::getFloatTy(LLVMContext);
DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
- PointerWidthInBits = C.getTargetInfo().getPointerWidth(0);
+ PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default);
PointerAlignInBytes =
- C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity();
+ C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default))
+ .getQuantity();
SizeSizeInBytes =
C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity();
IntAlignInBytes =
@@ -137,6 +142,8 @@ CodeGenModule::CodeGenModule(ASTContext &C,
const llvm::DataLayout &DL = M.getDataLayout();
AllocaInt8PtrTy = Int8Ty->getPointerTo(DL.getAllocaAddrSpace());
GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace());
+ ConstGlobalsPtrTy = Int8Ty->getPointerTo(
+ C.getTargetAddressSpace(GetGlobalConstantAddressSpace()));
ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace();
// Build C++20 Module initializers.
@@ -179,15 +186,11 @@ CodeGenModule::CodeGenModule(ASTContext &C,
if (CodeGenOpts.hasProfileClangUse()) {
auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile);
- if (auto E = ReaderOrErr.takeError()) {
- unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
- "Could not read profile %0: %1");
- llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
- getDiags().Report(DiagID) << CodeGenOpts.ProfileInstrumentUsePath
- << EI.message();
- });
- } else
- PGOReader = std::move(ReaderOrErr.get());
+ // We're checking for profile read errors in CompilerInvocation, so if
+ // there was an error it should've already been caught. If it hasn't been
+ // somehow, trip an assertion.
+ assert(ReaderOrErr);
+ PGOReader = std::move(ReaderOrErr.get());
}
// If coverage mapping generation is enabled, create the
@@ -205,22 +208,7 @@ CodeGenModule::CodeGenModule(ASTContext &C,
Path = Entry.second + Path.substr(Entry.first.size());
break;
}
- llvm::MD5 Md5;
- Md5.update(Path);
- llvm::MD5::MD5Result R;
- Md5.final(R);
- SmallString<32> Str;
- llvm::MD5::stringifyResult(R, Str);
- // Convert MD5hash to Decimal. Demangler suffixes can either contain
- // numbers or characters but not both.
- llvm::APInt IntHash(128, Str.str(), 16);
- // Prepend "__uniq" before the hash for tools like profilers to understand
- // that this symbol is of internal linkage type. The "__uniq" is the
- // pre-determined prefix that is used to tell tools that this symbol was
- // created with -funique-internal-linakge-symbols and the tools can strip or
- // keep the prefix as needed.
- ModuleNameHash = (Twine(".__uniq.") +
- Twine(toString(IntHash, /* Radix = */ 10, /* Signed = */false))).str();
+ ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path);
}
}
@@ -521,7 +509,7 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO,
void CodeGenModule::Release() {
Module *Primary = getContext().getModuleForCodeGen();
- if (CXX20ModuleInits && Primary && !Primary->isModuleMapModule())
+ if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule())
EmitModuleInitializers(Primary);
EmitDeferred();
DeferredDecls.insert(EmittedDeferredDecls.begin(),
@@ -531,6 +519,14 @@ void CodeGenModule::Release() {
applyGlobalValReplacements();
applyReplacements();
emitMultiVersionFunctions();
+
+ if (Context.getLangOpts().IncrementalExtensions &&
+ GlobalTopLevelStmtBlockInFlight.first) {
+ const TopLevelStmtDecl *TLSD = GlobalTopLevelStmtBlockInFlight.second;
+ GlobalTopLevelStmtBlockInFlight.first->FinishFunction(TLSD->getEndLoc());
+ GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr};
+ }
+
if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition())
EmitCXXModuleInitFunc(Primary);
else
@@ -560,6 +556,9 @@ void CodeGenModule::Release() {
if (PGOStats.hasDiagnostics())
PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
}
+ llvm::stable_sort(GlobalCtors, [](const Structor &L, const Structor &R) {
+ return L.LexOrder < R.LexOrder;
+ });
EmitCtorList(GlobalCtors, "llvm.global_ctors");
EmitCtorList(GlobalDtors, "llvm.global_dtors");
EmitGlobalAnnotations();
@@ -573,6 +572,8 @@ void CodeGenModule::Release() {
CodeGenFunction(*this).EmitCfiCheckFail();
CodeGenFunction(*this).EmitCfiCheckStub();
}
+ if (LangOpts.Sanitize.has(SanitizerKind::KCFI))
+ finalizeKCFITypes();
emitAtAvailableLinkGuard();
if (Context.getTargetInfo().getTriple().isWasm())
EmitMainVoidAlias();
@@ -594,9 +595,8 @@ void CodeGenModule::Release() {
}
// Emit amdgpu_code_object_version module flag, which is code object version
// times 100.
- // ToDo: Enable module flag for all code object version when ROCm device
- // library is ready.
- if (getTarget().getTargetOpts().CodeObjectVersion == TargetOptions::COV_5) {
+ if (getTarget().getTargetOpts().CodeObjectVersion !=
+ TargetOptions::COV_None) {
getModule().addModuleFlag(llvm::Module::Error,
"amdgpu_code_object_version",
getTarget().getTargetOpts().CodeObjectVersion);
@@ -689,6 +689,10 @@ void CodeGenModule::Release() {
// Function ID tables for EH Continuation Guard.
getModule().addModuleFlag(llvm::Module::Warning, "ehcontguard", 1);
}
+ if (Context.getLangOpts().Kernel) {
+ // Note if we are compiling with /kernel.
+ getModule().addModuleFlag(llvm::Module::Warning, "ms-kernel", 1);
+ }
if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) {
// We don't support LTO with 2 with different StrictVTablePointers
// FIXME: we could support it by stripping all the information introduced
@@ -755,6 +759,15 @@ void CodeGenModule::Release() {
CodeGenOpts.SanitizeCfiCanonicalJumpTables);
}
+ if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) {
+ getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1);
+ // KCFI assumes patchable-function-prefix is the same for all indirectly
+ // called functions. Store the expected offset for code generation.
+ if (CodeGenOpts.PatchableFunctionEntryOffset)
+ getModule().addModuleFlag(llvm::Module::Override, "kcfi-offset",
+ CodeGenOpts.PatchableFunctionEntryOffset);
+ }
+
if (CodeGenOpts.CFProtectionReturn &&
Target.checkCFProtectionReturnSupported(getDiags())) {
// Indicate that we want to instrument return control flow protection.
@@ -769,12 +782,12 @@ void CodeGenModule::Release() {
1);
}
- if (CodeGenOpts.IBTSeal)
- getModule().addModuleFlag(llvm::Module::Min, "ibt-seal", 1);
-
if (CodeGenOpts.FunctionReturnThunks)
getModule().addModuleFlag(llvm::Module::Override, "function_return_thunk_extern", 1);
+ if (CodeGenOpts.IndirectBranchCSPrefix)
+ getModule().addModuleFlag(llvm::Module::Override, "indirect_branch_cs_prefix", 1);
+
// Add module metadata for return address signing (ignoring
// non-leaf/all) and stack tagging. These are actually turned on by function
// attributes, but we use module metadata to emit build attributes. This is
@@ -965,14 +978,9 @@ void CodeGenModule::EmitOpenCLMetadata() {
void CodeGenModule::EmitBackendOptionsMetadata(
const CodeGenOptions CodeGenOpts) {
- switch (getTriple().getArch()) {
- default:
- break;
- case llvm::Triple::riscv32:
- case llvm::Triple::riscv64:
+ if (getTriple().isRISCV()) {
getModule().addModuleFlag(llvm::Module::Error, "SmallDataLimit",
CodeGenOpts.SmallDataLimit);
- break;
}
}
@@ -1101,8 +1109,6 @@ llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) {
void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV,
const NamedDecl *D) const {
- if (GV->hasDLLImportStorageClass())
- return;
// Internal definitions always have default visibility.
if (GV->hasLocalLinkage()) {
GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
@@ -1113,6 +1119,21 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV,
// Set visibility for definitions, and for declarations if requested globally
// or set explicitly.
LinkageInfo LV = D->getLinkageAndVisibility();
+ if (GV->hasDLLExportStorageClass() || GV->hasDLLImportStorageClass()) {
+ // Reject incompatible dlllstorage and visibility annotations.
+ if (!LV.isVisibilityExplicit())
+ return;
+ if (GV->hasDLLExportStorageClass()) {
+ if (LV.getVisibility() == HiddenVisibility)
+ getDiags().Report(D->getLocation(),
+ diag::err_hidden_visibility_dllexport);
+ } else if (LV.getVisibility() != DefaultVisibility) {
+ getDiags().Report(D->getLocation(),
+ diag::err_non_default_visibility_dllimport);
+ }
+ return;
+ }
+
if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls ||
!GV->isDeclarationForLinker())
GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
@@ -1320,6 +1341,20 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
Out << ".resolver";
}
+static void AppendTargetVersionMangling(const CodeGenModule &CGM,
+ const TargetVersionAttr *Attr,
+ raw_ostream &Out) {
+ if (Attr->isDefaultVersion())
+ return;
+ Out << "._";
+ llvm::SmallVector<StringRef, 8> Feats;
+ Attr->getFeatures(Feats);
+ for (const auto &Feat : Feats) {
+ Out << 'M';
+ Out << Feat;
+ }
+}
+
static void AppendTargetMangling(const CodeGenModule &CGM,
const TargetAttr *Attr, raw_ostream &Out) {
if (Attr->isDefaultVersion())
@@ -1327,21 +1362,21 @@ static void AppendTargetMangling(const CodeGenModule &CGM,
Out << '.';
const TargetInfo &Target = CGM.getTarget();
- ParsedTargetAttr Info =
- Attr->parse([&Target](StringRef LHS, StringRef RHS) {
- // Multiversioning doesn't allow "no-${feature}", so we can
- // only have "+" prefixes here.
- assert(LHS.startswith("+") && RHS.startswith("+") &&
- "Features should always have a prefix.");
- return Target.multiVersionSortPriority(LHS.substr(1)) >
- Target.multiVersionSortPriority(RHS.substr(1));
- });
+ ParsedTargetAttr Info = Target.parseTargetAttr(Attr->getFeaturesStr());
+ llvm::sort(Info.Features, [&Target](StringRef LHS, StringRef RHS) {
+ // Multiversioning doesn't allow "no-${feature}", so we can
+ // only have "+" prefixes here.
+ assert(LHS.startswith("+") && RHS.startswith("+") &&
+ "Features should always have a prefix.");
+ return Target.multiVersionSortPriority(LHS.substr(1)) >
+ Target.multiVersionSortPriority(RHS.substr(1));
+ });
bool IsFirst = true;
- if (!Info.Architecture.empty()) {
+ if (!Info.CPU.empty()) {
IsFirst = false;
- Out << "arch_" << Info.Architecture;
+ Out << "arch_" << Info.CPU;
}
for (StringRef Feat : Info.Features) {
@@ -1365,14 +1400,27 @@ static void AppendTargetClonesMangling(const CodeGenModule &CGM,
const TargetClonesAttr *Attr,
unsigned VersionIndex,
raw_ostream &Out) {
- Out << '.';
- StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
- if (FeatureStr.startswith("arch="))
- Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
- else
- Out << FeatureStr;
+ if (CGM.getTarget().getTriple().isAArch64()) {
+ StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
+ if (FeatureStr == "default")
+ return;
+ Out << "._";
+ SmallVector<StringRef, 8> Features;
+ FeatureStr.split(Features, "+");
+ for (auto &Feat : Features) {
+ Out << 'M';
+ Out << Feat;
+ }
+ } else {
+ Out << '.';
+ StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
+ if (FeatureStr.startswith("arch="))
+ Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
+ else
+ Out << FeatureStr;
- Out << '.' << Attr->getMangledIndex(VersionIndex);
+ Out << '.' << Attr->getMangledIndex(VersionIndex);
+ }
}
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
@@ -1428,6 +1476,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
break;
+ case MultiVersionKind::TargetVersion:
+ AppendTargetVersionMangling(CGM, FD->getAttr<TargetVersionAttr>(), Out);
+ break;
case MultiVersionKind::TargetClones:
AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(),
GD.getMultiVersionIndex(), Out);
@@ -1581,9 +1632,10 @@ llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) {
/// AddGlobalCtor - Add a function to the list that will be called before
/// main() runs.
void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
+ unsigned LexOrder,
llvm::Constant *AssociatedData) {
// FIXME: Type coercion of void()* types.
- GlobalCtors.push_back(Structor(Priority, Ctor, AssociatedData));
+ GlobalCtors.push_back(Structor(Priority, LexOrder, Ctor, AssociatedData));
}
/// AddGlobalDtor - Add a function to the list that will be called
@@ -1597,7 +1649,7 @@ void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority,
}
// FIXME: Type coercion of void()* types.
- GlobalDtors.push_back(Structor(Priority, Dtor, nullptr));
+ GlobalDtors.push_back(Structor(Priority, ~0U, Dtor, nullptr));
}
void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
@@ -1633,7 +1685,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
// The LTO linker doesn't seem to like it when we set an alignment
// on appending variables. Take it off as a workaround.
- list->setAlignment(llvm::None);
+ list->setAlignment(std::nullopt);
Fns.clear();
}
@@ -1666,6 +1718,20 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString()));
}
+llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
+ if (auto *FnType = T->getAs<FunctionProtoType>())
+ T = getContext().getFunctionType(
+ FnType->getReturnType(), FnType->getParamTypes(),
+ FnType->getExtProtoInfo().withExceptionSpec(EST_None));
+
+ std::string OutName;
+ llvm::raw_string_ostream Out(OutName);
+ getCXXABI().getMangleContext().mangleTypeName(T, Out);
+
+ return llvm::ConstantInt::get(Int32Ty,
+ static_cast<uint32_t>(llvm::xxHash64(OutName)));
+}
+
void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
const CGFunctionInfo &Info,
llvm::Function *F, bool IsThunk) {
@@ -1765,7 +1831,7 @@ void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn,
// Get image and pipe access qualifier:
if (ty->isImageType() || ty->isPipeType()) {
const Decl *PDecl = parm;
- if (auto *TD = dyn_cast<TypedefType>(ty))
+ if (const auto *TD = ty->getAs<TypedefType>())
PDecl = TD->getDecl();
const OpenCLAccessAttr *A = PDecl->getAttr<OpenCLAccessAttr>();
if (A && A->isWriteOnly())
@@ -1935,7 +2001,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
llvm::Function *F) {
llvm::AttrBuilder B(F->getContext());
- if (CodeGenOpts.UnwindTables)
+ if ((!D || !D->hasAttr<NoUwtableAttr>()) && CodeGenOpts.UnwindTables)
B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables));
if (CodeGenOpts.StackClashProtector)
@@ -1944,14 +2010,17 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if (!hasUnwindExceptions(LangOpts))
B.addAttribute(llvm::Attribute::NoUnwind);
- if (!D || !D->hasAttr<NoStackProtectorAttr>()) {
- if (LangOpts.getStackProtector() == LangOptions::SSPOn)
- B.addAttribute(llvm::Attribute::StackProtect);
- else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
- B.addAttribute(llvm::Attribute::StackProtectStrong);
- else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
- B.addAttribute(llvm::Attribute::StackProtectReq);
- }
+ if (D && D->hasAttr<NoStackProtectorAttr>())
+ ; // Do nothing.
+ else if (D && D->hasAttr<StrictGuardStackCheckAttr>() &&
+ LangOpts.getStackProtector() == LangOptions::SSPOn)
+ B.addAttribute(llvm::Attribute::StackProtectStrong);
+ else if (LangOpts.getStackProtector() == LangOptions::SSPOn)
+ B.addAttribute(llvm::Attribute::StackProtect);
+ else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
+ B.addAttribute(llvm::Attribute::StackProtectStrong);
+ else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
+ B.addAttribute(llvm::Attribute::StackProtectReq);
if (!D) {
// If we don't have a declaration to control inlining, the function isn't
@@ -2131,10 +2200,12 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl());
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
+ const auto *TV = FD ? FD->getAttr<TargetVersionAttr>() : nullptr;
+ assert((!TD || !TV) && "both target_version and target specified");
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
- if (TD || SD || TC) {
+ if (TD || TV || SD || TC) {
llvm::StringMap<bool> FeatureMap;
getContext().getFunctionFeatureMap(FeatureMap, GD);
@@ -2147,10 +2218,11 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// get and parse the target attribute so we can get the cpu for
// the function.
if (TD) {
- ParsedTargetAttr ParsedAttr = TD->parse();
- if (!ParsedAttr.Architecture.empty() &&
- getTarget().isValidCPUName(ParsedAttr.Architecture)) {
- TargetCPU = ParsedAttr.Architecture;
+ ParsedTargetAttr ParsedAttr =
+ Target.parseTargetAttr(TD->getFeaturesStr());
+ if (!ParsedAttr.CPU.empty() &&
+ getTarget().isValidCPUName(ParsedAttr.CPU)) {
+ TargetCPU = ParsedAttr.CPU;
TuneCPU = ""; // Clear the tune CPU.
}
if (!ParsedAttr.Tune.empty() &&
@@ -2280,6 +2352,57 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
}
+void CodeGenModule::setKCFIType(const FunctionDecl *FD, llvm::Function *F) {
+ if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic())
+ return;
+
+ llvm::LLVMContext &Ctx = F->getContext();
+ llvm::MDBuilder MDB(Ctx);
+ F->setMetadata(llvm::LLVMContext::MD_kcfi_type,
+ llvm::MDNode::get(
+ Ctx, MDB.createConstant(CreateKCFITypeId(FD->getType()))));
+}
+
+static bool allowKCFIIdentifier(StringRef Name) {
+ // KCFI type identifier constants are only necessary for external assembly
+ // functions, which means it's safe to skip unusual names. Subset of
+ // MCAsmInfo::isAcceptableChar() and MCAsmInfoXCOFF::isAcceptableChar().
+ return llvm::all_of(Name, [](const char &C) {
+ return llvm::isAlnum(C) || C == '_' || C == '.';
+ });
+}
+
+void CodeGenModule::finalizeKCFITypes() {
+ llvm::Module &M = getModule();
+ for (auto &F : M.functions()) {
+ // Remove KCFI type metadata from non-address-taken local functions.
+ bool AddressTaken = F.hasAddressTaken();
+ if (!AddressTaken && F.hasLocalLinkage())
+ F.eraseMetadata(llvm::LLVMContext::MD_kcfi_type);
+
+ // Generate a constant with the expected KCFI type identifier for all
+ // address-taken function declarations to support annotating indirectly
+ // called assembly functions.
+ if (!AddressTaken || !F.isDeclaration())
+ continue;
+
+ const llvm::ConstantInt *Type;
+ if (const llvm::MDNode *MD = F.getMetadata(llvm::LLVMContext::MD_kcfi_type))
+ Type = llvm::mdconst::extract<llvm::ConstantInt>(MD->getOperand(0));
+ else
+ continue;
+
+ StringRef Name = F.getName();
+ if (!allowKCFIIdentifier(Name))
+ continue;
+
+ std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" +
+ Name + ", " + Twine(Type->getZExtValue()) + "\n")
+ .str();
+ M.appendModuleInlineAsm(Asm);
+ }
+}
+
void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
bool IsIncompleteFunction,
bool IsThunk) {
@@ -2362,9 +2485,15 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
!CodeGenOpts.SanitizeCfiCanonicalJumpTables)
CreateFunctionTypeMetadataForIcall(FD, F);
+ if (LangOpts.Sanitize.has(SanitizerKind::KCFI))
+ setKCFIType(FD, F);
+
if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
getOpenMPRuntime().emitDeclareSimdFunction(FD, F);
+ if (CodeGenOpts.InlineMaxStackSize != UINT_MAX)
+ F->addFnAttr("inline-max-stacksize", llvm::utostr(CodeGenOpts.InlineMaxStackSize));
+
if (const auto *CB = FD->getAttr<CallbackAttr>()) {
// Annotate the callback behavior as metadata:
// - The callback callee (as argument number).
@@ -2521,21 +2650,23 @@ void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) {
// source, first Global Module Fragments, if present.
if (auto GMF = Primary->getGlobalModuleFragment()) {
for (Decl *D : getContext().getModuleInitializers(GMF)) {
- assert(D->getKind() == Decl::Var && "GMF initializer decl is not a var?");
+ if (isa<ImportDecl>(D))
+ continue;
+ assert(isa<VarDecl>(D) && "GMF initializer decl is not a var?");
EmitTopLevelDecl(D);
}
}
// Second any associated with the module, itself.
for (Decl *D : getContext().getModuleInitializers(Primary)) {
// Skip import decls, the inits for those are called explicitly.
- if (D->getKind() == Decl::Import)
+ if (isa<ImportDecl>(D))
continue;
EmitTopLevelDecl(D);
}
// Third any associated with the Privat eMOdule Fragment, if present.
if (auto PMF = Primary->getPrivateModuleFragment()) {
for (Decl *D : getContext().getModuleInitializers(PMF)) {
- assert(D->getKind() == Decl::Var && "PMF initializer decl is not a var?");
+ assert(isa<VarDecl>(D) && "PMF initializer decl is not a var?");
EmitTopLevelDecl(D);
}
}
@@ -2719,9 +2850,10 @@ llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) {
// Not found yet, create a new global.
llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str);
- auto *gv =
- new llvm::GlobalVariable(getModule(), s->getType(), true,
- llvm::GlobalValue::PrivateLinkage, s, ".str");
+ auto *gv = new llvm::GlobalVariable(
+ getModule(), s->getType(), true, llvm::GlobalValue::PrivateLinkage, s,
+ ".str", nullptr, llvm::GlobalValue::NotThreadLocal,
+ ConstGlobalsPtrTy->getAddressSpace());
gv->setSection(AnnotationSection);
gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
AStr = gv;
@@ -2747,7 +2879,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) {
llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) {
ArrayRef<Expr *> Exprs = {Attr->args_begin(), Attr->args_size()};
if (Exprs.empty())
- return llvm::ConstantPointerNull::get(GlobalsInt8PtrTy);
+ return llvm::ConstantPointerNull::get(ConstGlobalsPtrTy);
llvm::FoldingSetNodeID ID;
for (Expr *E : Exprs) {
@@ -2797,8 +2929,8 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV,
// Create the ConstantStruct for the global annotation.
llvm::Constant *Fields[] = {
llvm::ConstantExpr::getBitCast(GVInGlobalsAS, GlobalsInt8PtrTy),
- llvm::ConstantExpr::getBitCast(AnnoGV, GlobalsInt8PtrTy),
- llvm::ConstantExpr::getBitCast(UnitGV, GlobalsInt8PtrTy),
+ llvm::ConstantExpr::getBitCast(AnnoGV, ConstGlobalsPtrTy),
+ llvm::ConstantExpr::getBitCast(UnitGV, ConstGlobalsPtrTy),
LineNoCst,
Args,
};
@@ -2890,46 +3022,44 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
return true;
}
-bool CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn,
- SourceLocation Loc) const {
+ProfileList::ExclusionType
+CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn,
+ SourceLocation Loc) const {
const auto &ProfileList = getContext().getProfileList();
// If the profile list is empty, then instrument everything.
if (ProfileList.isEmpty())
- return false;
+ return ProfileList::Allow;
CodeGenOptions::ProfileInstrKind Kind = getCodeGenOpts().getProfileInstr();
// First, check the function name.
- Optional<bool> V = ProfileList.isFunctionExcluded(Fn->getName(), Kind);
- if (V)
+ if (auto V = ProfileList.isFunctionExcluded(Fn->getName(), Kind))
return *V;
// Next, check the source location.
- if (Loc.isValid()) {
- Optional<bool> V = ProfileList.isLocationExcluded(Loc, Kind);
- if (V)
+ if (Loc.isValid())
+ if (auto V = ProfileList.isLocationExcluded(Loc, Kind))
return *V;
- }
// If location is unknown, this may be a compiler-generated function. Assume
// it's located in the main file.
auto &SM = Context.getSourceManager();
- if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
- Optional<bool> V = ProfileList.isFileExcluded(MainFile->getName(), Kind);
- if (V)
+ if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID()))
+ if (auto V = ProfileList.isFileExcluded(MainFile->getName(), Kind))
return *V;
- }
- return ProfileList.getDefault();
+ return ProfileList.getDefault(Kind);
}
-bool CodeGenModule::isFunctionBlockedFromProfileInstr(
- llvm::Function *Fn, SourceLocation Loc) const {
- if (isFunctionBlockedByProfileList(Fn, Loc))
- return true;
+ProfileList::ExclusionType
+CodeGenModule::isFunctionBlockedFromProfileInstr(llvm::Function *Fn,
+ SourceLocation Loc) const {
+ auto V = isFunctionBlockedByProfileList(Fn, Loc);
+ if (V != ProfileList::Allow)
+ return V;
auto NumGroups = getCodeGenOpts().ProfileTotalFunctionGroups;
if (NumGroups > 1) {
auto Group = llvm::crc32(arrayRefFromStringRef(Fn->getName())) % NumGroups;
if (Group != getCodeGenOpts().ProfileSelectedFunctionGroup)
- return true;
+ return ProfileList::Skip;
}
- return false;
+ return ProfileList::Allow;
}
bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
@@ -2955,7 +3085,7 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
// we have if the level of the declare target attribute is -1. Note that we
// check somewhere else if we should emit this at all.
if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd) {
- llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
+ std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
OMPDeclareTargetDeclAttr::getActiveAttr(Global);
if (!ActiveAttr || (*ActiveAttr)->getLevel() != (unsigned)-1)
return false;
@@ -3113,7 +3243,7 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
// See if there is already something with the target's name in the module.
llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee());
if (Entry) {
- unsigned AS = getContext().getTargetAddressSpace(VD->getType());
+ unsigned AS = getTypes().getTargetAddressSpace(VD->getType());
auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS));
return ConstantAddress(Ptr, DeclTy, Alignment);
}
@@ -3219,16 +3349,18 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
!Context.isMSStaticDataMemberInlineDefinition(VD)) {
if (LangOpts.OpenMP) {
// Emit declaration of the must-be-emitted declare target variable.
- if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
bool UnifiedMemoryEnabled =
getOpenMPRuntime().hasRequiresUnifiedSharedMemory();
- if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
!UnifiedMemoryEnabled) {
(void)GetAddrOfGlobalVar(VD);
} else {
assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
- (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
UnifiedMemoryEnabled)) &&
"Link clause or to clause with unified memory expected.");
(void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
@@ -3271,6 +3403,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// The value must be emitted, but cannot be emitted eagerly.
assert(!MayBeEmittedEagerly(Global));
addDeferredDeclToEmit(GD);
+ EmittedDeferredDecls[MangledName] = GD;
} else {
// Otherwise, remember that we saw a deferred decl with this name. The
// first use of the mangled name will cause it to move into
@@ -3526,12 +3659,18 @@ static unsigned
TargetMVPriority(const TargetInfo &TI,
const CodeGenFunction::MultiVersionResolverOption &RO) {
unsigned Priority = 0;
- for (StringRef Feat : RO.Conditions.Features)
+ unsigned NumFeatures = 0;
+ for (StringRef Feat : RO.Conditions.Features) {
Priority = std::max(Priority, TI.multiVersionSortPriority(Feat));
+ NumFeatures++;
+ }
if (!RO.Conditions.Architecture.empty())
Priority = std::max(
Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture));
+
+ Priority += TI.multiVersionFeatureCost() * NumFeatures;
+
return Priority;
}
@@ -3576,13 +3715,19 @@ void CodeGenModule::emitMultiVersionFunctions() {
}
assert(Func && "This should have just been created");
}
-
- const auto *TA = CurFD->getAttr<TargetAttr>();
- llvm::SmallVector<StringRef, 8> Feats;
- TA->getAddedFeatures(Feats);
-
- Options.emplace_back(cast<llvm::Function>(Func),
- TA->getArchitecture(), Feats);
+ if (CurFD->getMultiVersionKind() == MultiVersionKind::Target) {
+ const auto *TA = CurFD->getAttr<TargetAttr>();
+ llvm::SmallVector<StringRef, 8> Feats;
+ TA->getAddedFeatures(Feats);
+ Options.emplace_back(cast<llvm::Function>(Func),
+ TA->getArchitecture(), Feats);
+ } else {
+ const auto *TVA = CurFD->getAttr<TargetVersionAttr>();
+ llvm::SmallVector<StringRef, 8> Feats;
+ TVA->getFeatures(Feats);
+ Options.emplace_back(cast<llvm::Function>(Func),
+ /*Architecture*/ "", Feats);
+ }
});
} else if (FD->isTargetClonesMultiVersion()) {
const auto *TC = FD->getAttr<TargetClonesAttr>();
@@ -3612,10 +3757,19 @@ void CodeGenModule::emitMultiVersionFunctions() {
StringRef Architecture;
llvm::SmallVector<StringRef, 1> Feature;
- if (Version.startswith("arch="))
- Architecture = Version.drop_front(sizeof("arch=") - 1);
- else if (Version != "default")
- Feature.push_back(Version);
+ if (getTarget().getTriple().isAArch64()) {
+ if (Version != "default") {
+ llvm::SmallVector<StringRef, 8> VerFeats;
+ Version.split(VerFeats, "+");
+ for (auto &CurFeat : VerFeats)
+ Feature.push_back(CurFeat.trim());
+ }
+ } else {
+ if (Version.startswith("arch="))
+ Architecture = Version.drop_front(sizeof("arch=") - 1);
+ else if (Version != "default")
+ Feature.push_back(Version);
+ }
Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
}
@@ -3675,7 +3829,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
if (getTarget().supportsIFunc()) {
ResolverType = llvm::FunctionType::get(
llvm::PointerType::get(DeclTy,
- Context.getTargetAddressSpace(FD->getType())),
+ getTypes().getTargetAddressSpace(FD->getType())),
false);
}
else {
@@ -3813,8 +3967,8 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
// cpu_dispatch will be emitted in this translation unit.
if (getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion()) {
llvm::Type *ResolverType = llvm::FunctionType::get(
- llvm::PointerType::get(
- DeclTy, getContext().getTargetAddressSpace(FD->getType())),
+ llvm::PointerType::get(DeclTy,
+ getTypes().getTargetAddressSpace(FD->getType())),
false);
llvm::Constant *Resolver = GetOrCreateLLVMFunction(
MangledName + ".resolver", ResolverType, GlobalDecl{},
@@ -3917,7 +4071,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// (If function is requested for a definition, we always need to create a new
// function, not just return a bitcast.)
if (!IsForDefinition)
- return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo());
+ return llvm::ConstantExpr::getBitCast(
+ Entry, Ty->getPointerTo(Entry->getAddressSpace()));
}
// This function doesn't have a complete type (for example, the return
@@ -3958,7 +4113,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
}
llvm::Constant *BC = llvm::ConstantExpr::getBitCast(
- F, Entry->getValueType()->getPointerTo());
+ F, Entry->getValueType()->getPointerTo(Entry->getAddressSpace()));
addGlobalValReplacement(Entry, BC);
}
@@ -3974,7 +4129,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// All MSVC dtors other than the base dtor are linkonce_odr and delegate to
// each other bottoming out with the base dtor. Therefore we emit non-base
// dtors on usage, even if there is no dtor definition in the TU.
- if (D && isa<CXXDestructorDecl>(D) &&
+ if (isa_and_nonnull<CXXDestructorDecl>(D) &&
getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
GD.getDtorType()))
addDeferredDeclToEmit(GD);
@@ -3988,6 +4143,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
// don't need it anymore).
addDeferredDeclToEmit(DDI->second);
+ EmittedDeferredDecls[DDI->first] = DDI->second;
DeferredDecls.erase(DDI);
// Otherwise, there are cases we have to worry about where we're
@@ -4021,8 +4177,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
return F;
}
- llvm::Type *PTy = llvm::PointerType::getUnqual(Ty);
- return llvm::ConstantExpr::getBitCast(F, PTy);
+ return llvm::ConstantExpr::getBitCast(F,
+ Ty->getPointerTo(F->getAddressSpace()));
}
/// GetAddrOfFunction - Return the address of the given function. If Ty is
@@ -4071,8 +4227,9 @@ llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) {
llvm::GlobalValue *F =
cast<llvm::GlobalValue>(GetAddrOfFunction(Decl)->stripPointerCasts());
- return llvm::ConstantExpr::getBitCast(llvm::NoCFIValue::get(F),
- llvm::Type::getInt8PtrTy(VMContext));
+ return llvm::ConstantExpr::getBitCast(
+ llvm::NoCFIValue::get(F),
+ llvm::Type::getInt8PtrTy(VMContext, F->getAddressSpace()));
}
static const FunctionDecl *
@@ -4269,6 +4426,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
// Move the potentially referenced deferred decl to the DeferredDeclsToEmit
// list, and remove it from DeferredDecls (since we don't need it anymore).
addDeferredDeclToEmit(DDI->second);
+ EmittedDeferredDecls[DDI->first] = DDI->second;
DeferredDecls.erase(DDI);
}
@@ -4408,7 +4566,7 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) {
llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage,
- unsigned Alignment) {
+ llvm::Align Alignment) {
llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
llvm::GlobalVariable *OldGV = nullptr;
@@ -4444,7 +4602,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
!GV->hasAvailableExternallyLinkage())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
- GV->setAlignment(llvm::MaybeAlign(Alignment));
+ GV->setAlignment(Alignment);
return GV;
}
@@ -4673,13 +4831,19 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
llvm::TrackingVH<llvm::Constant> Init;
bool NeedsGlobalCtor = false;
+ // Whether the definition of the variable is available externally.
+ // If yes, we shouldn't emit the GloablCtor and GlobalDtor for the variable
+ // since this is the job for its original source.
+ bool IsDefinitionAvailableExternally =
+ getContext().GetGVALinkageForVariable(D) == GVA_AvailableExternally;
bool NeedsGlobalDtor =
+ !IsDefinitionAvailableExternally &&
D->needsDestruction(getContext()) == QualType::DK_cxx_destructor;
const VarDecl *InitDecl;
const Expr *InitExpr = D->getAnyInitializer(InitDecl);
- Optional<ConstantEmitter> emitter;
+ std::optional<ConstantEmitter> emitter;
// CUDA E.2.4.1 "__shared__ variables cannot have an initialization
// as part of their declaration." Sema has already checked for
@@ -4727,7 +4891,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
if (InitDecl->hasFlexibleArrayInit(getContext()))
ErrorUnsupported(D, "flexible array initializer");
Init = EmitNullConstant(T);
- NeedsGlobalCtor = true;
+
+ if (!IsDefinitionAvailableExternally)
+ NeedsGlobalCtor = true;
} else {
ErrorUnsupported(D, "static initializer");
Init = llvm::UndefValue::get(getTypes().ConvertType(T));
@@ -4837,7 +5003,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
CharUnits AlignVal = getContext().getDeclAlign(D);
// Check for alignment specifed in an 'omp allocate' directive.
- if (llvm::Optional<CharUnits> AlignValFromAllocate =
+ if (std::optional<CharUnits> AlignValFromAllocate =
getOMPAllocateAlignment(D))
AlignVal = *AlignValFromAllocate;
GV->setAlignment(AlignVal.getAsAlign());
@@ -5331,7 +5497,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
// Emit global alias debug information.
if (isa<VarDecl>(D))
if (CGDebugInfo *DI = getModuleDebugInfo())
- DI->EmitGlobalAlias(cast<llvm::GlobalValue>(GA->getAliasee()), GD);
+ DI->EmitGlobalAlias(cast<llvm::GlobalValue>(GA->getAliasee()->stripPointerCasts()), GD);
}
void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
@@ -5468,7 +5634,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
switch (CFRuntime) {
default: break;
- case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH;
+ case LangOptions::CoreFoundationABI::Swift: [[fallthrough]];
case LangOptions::CoreFoundationABI::Swift5_0:
CFConstantStringClassName =
Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN"
@@ -5546,7 +5712,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
// String pointer.
llvm::Constant *C = nullptr;
if (isUTF16) {
- auto Arr = llvm::makeArrayRef(
+ auto Arr = llvm::ArrayRef(
reinterpret_cast<uint16_t *>(const_cast<char *>(Entry.first().data())),
Entry.first().size() / 2);
C = llvm::ConstantDataArray::get(VMContext, Arr);
@@ -5891,7 +6057,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
LangAS AddrSpace =
VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace();
- Optional<ConstantEmitter> emitter;
+ std::optional<ConstantEmitter> emitter;
llvm::Constant *InitialValue = nullptr;
bool Constant = false;
llvm::Type *Type;
@@ -5929,10 +6095,13 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
if (emitter) emitter->finalize(GV);
- setGVProperties(GV, VD);
- if (GV->getDLLStorageClass() == llvm::GlobalVariable::DLLExportStorageClass)
- // The reference temporary should never be dllexport.
- GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
+ // Don't assign dllimport or dllexport to local linkage globals.
+ if (!llvm::GlobalValue::isLocalLinkage(Linkage)) {
+ setGVProperties(GV, VD);
+ if (GV->getDLLStorageClass() == llvm::GlobalVariable::DLLExportStorageClass)
+ // The reference temporary should never be dllexport.
+ GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
+ }
GV->setAlignment(Align.getAsAlign());
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -6058,6 +6227,39 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
EmitDeclContext(LSD);
}
+void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) {
+ std::unique_ptr<CodeGenFunction> &CurCGF =
+ GlobalTopLevelStmtBlockInFlight.first;
+
+ // We emitted a top-level stmt but after it there is initialization.
+ // Stop squashing the top-level stmts into a single function.
+ if (CurCGF && CXXGlobalInits.back() != CurCGF->CurFn) {
+ CurCGF->FinishFunction(D->getEndLoc());
+ CurCGF = nullptr;
+ }
+
+ if (!CurCGF) {
+ // void __stmts__N(void)
+ // FIXME: Ask the ABI name mangler to pick a name.
+ std::string Name = "__stmts__" + llvm::utostr(CXXGlobalInits.size());
+ FunctionArgList Args;
+ QualType RetTy = getContext().VoidTy;
+ const CGFunctionInfo &FnInfo =
+ getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args);
+ llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo);
+ llvm::Function *Fn = llvm::Function::Create(
+ FnTy, llvm::GlobalValue::InternalLinkage, Name, &getModule());
+
+ CurCGF.reset(new CodeGenFunction(*this));
+ GlobalTopLevelStmtBlockInFlight.second = D;
+ CurCGF->StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
+ D->getBeginLoc(), D->getBeginLoc());
+ CXXGlobalInits.push_back(Fn);
+ }
+
+ CurCGF->EmitStmt(D->getStmt());
+}
+
void CodeGenModule::EmitDeclContext(const DeclContext *DC) {
for (auto *I : DC->decls()) {
// Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope
@@ -6125,7 +6327,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
TSK_ExplicitInstantiationDefinition &&
Spec->hasDefinition())
DI->completeTemplateDefinition(*Spec);
- } LLVM_FALLTHROUGH;
+ } [[fallthrough]];
case Decl::CXXRecord: {
CXXRecordDecl *CRD = cast<CXXRecordDecl>(D);
if (CGDebugInfo *DI = getModuleDebugInfo()) {
@@ -6267,6 +6469,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
break;
}
+ case Decl::TopLevelStmt:
+ EmitTopLevelStmt(cast<TopLevelStmtDecl>(D));
+ break;
+
case Decl::Import: {
auto *Import = cast<ImportDecl>(D);
@@ -6363,6 +6569,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
DI->EmitAndRetainType(getContext().getEnumType(cast<EnumDecl>(D)));
break;
+ case Decl::HLSLBuffer:
+ getHLSLRuntime().addBuffer(cast<HLSLBufferDecl>(D));
+ break;
+
default:
// Make sure we handled everything we should, every other kind is a
// non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 5fbcc5ad1f5f..b3354657b237 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -36,6 +36,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include <optional>
namespace llvm {
class Module;
@@ -282,12 +283,15 @@ class CodeGenModule : public CodeGenTypeCache {
public:
struct Structor {
- Structor() : Priority(0), Initializer(nullptr), AssociatedData(nullptr) {}
- Structor(int Priority, llvm::Constant *Initializer,
+ Structor()
+ : Priority(0), LexOrder(~0u), Initializer(nullptr),
+ AssociatedData(nullptr) {}
+ Structor(int Priority, unsigned LexOrder, llvm::Constant *Initializer,
llvm::Constant *AssociatedData)
- : Priority(Priority), Initializer(Initializer),
+ : Priority(Priority), LexOrder(LexOrder), Initializer(Initializer),
AssociatedData(AssociatedData) {}
int Priority;
+ unsigned LexOrder;
llvm::Constant *Initializer;
llvm::Constant *AssociatedData;
};
@@ -588,6 +592,11 @@ private:
llvm::DenseMap<const llvm::Constant *, llvm::GlobalVariable *> RTTIProxyMap;
+ // Helps squashing blocks of TopLevelStmtDecl into a single llvm::Function
+ // when used with -fincremental-extensions.
+ std::pair<std::unique_ptr<CodeGenFunction>, const TopLevelStmtDecl *>
+ GlobalTopLevelStmtBlockInFlight;
+
public:
CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
const HeaderSearchOptions &headersearchopts,
@@ -712,7 +721,8 @@ public:
llvm::MDNode *getNoObjCARCExceptionsMetadata() {
if (!NoObjCARCExceptionsMetadata)
- NoObjCARCExceptionsMetadata = llvm::MDNode::get(getLLVMContext(), None);
+ NoObjCARCExceptionsMetadata =
+ llvm::MDNode::get(getLLVMContext(), std::nullopt);
return NoObjCARCExceptionsMetadata;
}
@@ -751,6 +761,10 @@ public:
return VTables.getItaniumVTableContext();
}
+ const ItaniumVTableContext &getItaniumVTableContext() const {
+ return VTables.getItaniumVTableContext();
+ }
+
MicrosoftVTableContext &getMicrosoftVTableContext() {
return VTables.getMicrosoftVTableContext();
}
@@ -867,7 +881,7 @@ public:
llvm::GlobalVariable *
CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty,
llvm::GlobalValue::LinkageTypes Linkage,
- unsigned Alignment);
+ llvm::Align Alignment);
llvm::Function *CreateGlobalInitOrCleanUpFunction(
llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI,
@@ -1076,7 +1090,8 @@ public:
llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD,
unsigned BuiltinID);
- llvm::Function *getIntrinsic(unsigned IID, ArrayRef<llvm::Type*> Tys = None);
+ llvm::Function *getIntrinsic(unsigned IID,
+ ArrayRef<llvm::Type *> Tys = std::nullopt);
/// Emit code for a single top level declaration.
void EmitTopLevelDecl(Decl *D);
@@ -1351,13 +1366,14 @@ public:
/// \returns true if \p Fn at \p Loc should be excluded from profile
/// instrumentation by the SCL passed by \p -fprofile-list.
- bool isFunctionBlockedByProfileList(llvm::Function *Fn,
- SourceLocation Loc) const;
+ ProfileList::ExclusionType
+ isFunctionBlockedByProfileList(llvm::Function *Fn, SourceLocation Loc) const;
/// \returns true if \p Fn at \p Loc should be excluded from profile
/// instrumentation.
- bool isFunctionBlockedFromProfileInstr(llvm::Function *Fn,
- SourceLocation Loc) const;
+ ProfileList::ExclusionType
+ isFunctionBlockedFromProfileInstr(llvm::Function *Fn,
+ SourceLocation Loc) const;
SanitizerMetadata *getSanitizerMetadata() {
return SanitizerMD.get();
@@ -1406,7 +1422,7 @@ public:
void EmitOMPAllocateDecl(const OMPAllocateDecl *D);
/// Return the alignment specified in an allocate directive, if present.
- llvm::Optional<CharUnits> getOMPAllocateAlignment(const VarDecl *VD);
+ std::optional<CharUnits> getOMPAllocateAlignment(const VarDecl *VD);
/// Returns whether the given record has hidden LTO visibility and therefore
/// may participate in (single-module) CFI and whole-program vtable
@@ -1433,9 +1449,14 @@ public:
llvm::GlobalVariable *VTable,
const VTableLayout &VTLayout);
+ llvm::Type *getVTableComponentType() const;
+
/// Generate a cross-DSO type identifier for MD.
llvm::ConstantInt *CreateCrossDsoCfiTypeId(llvm::Metadata *MD);
+ /// Generate a KCFI type identifier for T.
+ llvm::ConstantInt *CreateKCFITypeId(QualType T);
+
/// Create a metadata identifier for the given type. This may either be an
/// MDString (for external identifiers) or a distinct unnamed MDNode (for
/// internal identifiers).
@@ -1454,9 +1475,16 @@ public:
void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F);
+ /// Set type metadata to the given function.
+ void setKCFIType(const FunctionDecl *FD, llvm::Function *F);
+
+ /// Emit KCFI type identifier constants and remove unused identifiers.
+ void finalizeKCFITypes();
+
/// Whether this function's return type has no side effects, and thus may
/// be trivially discarded if it is unused.
- bool MayDropFunctionReturn(const ASTContext &Context, QualType ReturnType);
+ bool MayDropFunctionReturn(const ASTContext &Context,
+ QualType ReturnType) const;
/// Returns whether this module needs the "all-vtables" type identifier.
bool NeedAllVtablesTypeId() const;
@@ -1577,6 +1605,7 @@ private:
void EmitDeclContext(const DeclContext *DC);
void EmitLinkageSpec(const LinkageSpecDecl *D);
+ void EmitTopLevelStmt(const TopLevelStmtDecl *D);
/// Emit the function that initializes C++ thread_local variables.
void EmitCXXThreadLocalInitFunc();
@@ -1601,6 +1630,7 @@ private:
// FIXME: Hardcoding priority here is gross.
void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
+ unsigned LexOrder = ~0U,
llvm::Constant *AssociatedData = nullptr);
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
bool IsDtorAttrFunc = false);
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 587bcef78ee5..15a3d74666ca 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"
+#include <optional>
static llvm::cl::opt<bool>
EnableValueProfiling("enable-value-profiling",
@@ -755,7 +756,7 @@ void PGOHash::combine(HashType Type) {
if (Count && Count % NumTypesPerWord == 0) {
using namespace llvm::support;
uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
- MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+ MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
Working = 0;
}
@@ -781,7 +782,7 @@ uint64_t PGOHash::finalize() {
} else {
using namespace llvm::support;
uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
- MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+ MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
}
}
@@ -822,6 +823,8 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
CGM.ClearUnusedCoverageMapping(D);
if (Fn->hasFnAttribute(llvm::Attribute::NoProfile))
return;
+ if (Fn->hasFnAttribute(llvm::Attribute::SkipProfile))
+ return;
setFuncName(Fn);
@@ -963,11 +966,11 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
Builder.getInt32(Counter), StepV};
if (!StepV)
Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
- makeArrayRef(Args, 4));
+ ArrayRef(Args, 4));
else
Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
- makeArrayRef(Args));
+ ArrayRef(Args));
}
void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
@@ -1114,7 +1117,7 @@ CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
uint64_t LoopCount) const {
if (!PGO.haveRegionCounts())
return nullptr;
- Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
+ std::optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
if (!CondCount || *CondCount == 0)
return nullptr;
return createProfileWeights(LoopCount,
diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h
index f740692ac205..66c93cba4bb0 100644
--- a/clang/lib/CodeGen/CodeGenPGO.h
+++ b/clang/lib/CodeGen/CodeGenPGO.h
@@ -19,6 +19,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include <array>
#include <memory>
+#include <optional>
namespace clang {
namespace CodeGen {
@@ -59,12 +60,12 @@ public:
/// Check if an execution count is known for a given statement. If so, return
/// true and put the value in Count; else return false.
- Optional<uint64_t> getStmtCount(const Stmt *S) const {
+ std::optional<uint64_t> getStmtCount(const Stmt *S) const {
if (!StmtCountMap)
- return None;
+ return std::nullopt;
auto I = StmtCountMap->find(S);
if (I == StmtCountMap->end())
- return None;
+ return std::nullopt;
return I->second;
}
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 0cb63fbbe9e5..395ed7b1d703 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -338,7 +338,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
using TBAAStructField = llvm::MDBuilder::TBAAStructField;
SmallVector<TBAAStructField, 4> Fields;
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
- // Handle C++ base classes. Non-virtual bases can treated a a kind of
+ // Handle C++ base classes. Non-virtual bases can treated a kind of
// field. Virtual bases are more complex and omitted, but avoid an
// incomplete view for NewStructPathTBAA.
if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0)
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h
index 577f88367a3a..e848dc3b449c 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -75,6 +75,9 @@ struct CodeGenTypeCache {
llvm::PointerType *GlobalsInt8PtrTy;
};
+ /// void* in the address space for constant globals
+ llvm::PointerType *ConstGlobalsPtrTy;
+
/// The size and alignment of the builtin C type 'int'. This comes
/// up enough in various ABI lowering tasks to be worth pre-computing.
union {
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index fcce424747f1..abbf71daf1d5 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -67,7 +67,7 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
if (RD->getDeclContext())
RD->printQualifiedName(OS, Policy);
else
- RD->printName(OS);
+ RD->printName(OS, Policy);
} else if (const TypedefNameDecl *TDD = RD->getTypedefNameForAnonDecl()) {
// FIXME: We should not have to check for a null decl context here.
// Right now we do it because the implicit Obj-C decls don't have one.
@@ -655,7 +655,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
const ReferenceType *RTy = cast<ReferenceType>(Ty);
QualType ETy = RTy->getPointeeType();
llvm::Type *PointeeType = ConvertTypeForMem(ETy);
- unsigned AS = Context.getTargetAddressSpace(ETy);
+ unsigned AS = getTargetAddressSpace(ETy);
ResultType = llvm::PointerType::get(PointeeType, AS);
break;
}
@@ -665,7 +665,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
llvm::Type *PointeeType = ConvertTypeForMem(ETy);
if (PointeeType->isVoidTy())
PointeeType = llvm::Type::getInt8Ty(getLLVMContext());
- unsigned AS = Context.getTargetAddressSpace(ETy);
+ unsigned AS = getTargetAddressSpace(ETy);
ResultType = llvm::PointerType::get(PointeeType, AS);
break;
}
@@ -772,10 +772,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
// Block pointers lower to function type. For function type,
// getTargetAddressSpace() returns default address space for
// function pointer i.e. program address space. Therefore, for block
- // pointers, it is important to pass qualifiers when calling
- // getTargetAddressSpace(), to ensure that we get the address space
- // for data pointers and not function pointers.
- unsigned AS = Context.getTargetAddressSpace(FTy.getQualifiers());
+ // pointers, it is important to pass the pointee AST address space when
+ // calling getTargetAddressSpace(), to ensure that we get the LLVM IR
+ // address space for data pointers and not function pointers.
+ unsigned AS = Context.getTargetAddressSpace(FTy.getAddressSpace());
ResultType = llvm::PointerType::get(PointeeType, AS);
break;
}
@@ -807,8 +807,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
ResultType,
llvm::ArrayType::get(CGM.Int8Ty, (atomicSize - valueSize) / 8)
};
- ResultType = llvm::StructType::get(getLLVMContext(),
- llvm::makeArrayRef(elts));
+ ResultType =
+ llvm::StructType::get(getLLVMContext(), llvm::ArrayRef(elts));
}
break;
}
@@ -958,3 +958,13 @@ bool CodeGenTypes::isZeroInitializable(QualType T) {
bool CodeGenTypes::isZeroInitializable(const RecordDecl *RD) {
return getCGRecordLayout(RD).isZeroInitializable();
}
+
+unsigned CodeGenTypes::getTargetAddressSpace(QualType T) const {
+ // Return the address space for the type. If the type is a
+ // function type without an address space qualifier, the
+ // program address space is used. Otherwise, the target picks
+ // the best address space based on the type information
+ return T->isFunctionType() && !T.hasAddressSpace()
+ ? getDataLayout().getProgramAddressSpace()
+ : getContext().getTargetAddressSpace(T.getAddressSpace());
+}
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index cd20563cbf75..e76fda95513f 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -109,6 +109,7 @@ public:
const llvm::DataLayout &getDataLayout() const {
return TheModule.getDataLayout();
}
+ CodeGenModule &getCGM() const { return CGM; }
ASTContext &getContext() const { return Context; }
const ABIInfo &getABIInfo() const { return TheABIInfo; }
const TargetInfo &getTarget() const { return Target; }
@@ -305,7 +306,7 @@ public: // These are internal details of CGT that shouldn't be used externally.
bool isRecordBeingLaidOut(const Type *Ty) const {
return RecordsBeingLaidOut.count(Ty);
}
-
+ unsigned getTargetAddressSpace(QualType T) const;
};
} // end namespace CodeGen
diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h
index 188b82e56f53..1a7a181ca7f0 100644
--- a/clang/lib/CodeGen/ConstantEmitter.h
+++ b/clang/lib/CodeGen/ConstantEmitter.h
@@ -67,6 +67,9 @@ public:
return Abstract;
}
+ bool isInConstantContext() const { return InConstantContext; }
+ void setInConstantContext(bool var) { InConstantContext = var; }
+
/// Try to emit the initiaizer of the given declaration as an abstract
/// constant. If this succeeds, the emission must be finalized.
llvm::Constant *tryEmitForInitializer(const VarDecl &D);
diff --git a/clang/lib/CodeGen/ConstantInitBuilder.cpp b/clang/lib/CodeGen/ConstantInitBuilder.cpp
index 06d3e44f01b1..3cf69f3b6415 100644
--- a/clang/lib/CodeGen/ConstantInitBuilder.cpp
+++ b/clang/lib/CodeGen/ConstantInitBuilder.cpp
@@ -209,8 +209,7 @@ ConstantAggregateBuilderBase::addPlaceholderWithSize(llvm::Type *type) {
// Advance the offset past that field.
auto &layout = Builder.CGM.getDataLayout();
if (!Packed)
- offset = offset.alignTo(CharUnits::fromQuantity(
- layout.getABITypeAlignment(type)));
+ offset = offset.alignTo(CharUnits::fromQuantity(layout.getABITypeAlign(type)));
offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type));
CachedOffsetEnd = Builder.Buffer.size();
@@ -249,8 +248,8 @@ CharUnits ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const{
"cannot compute offset when a placeholder is present");
llvm::Type *elementType = element->getType();
if (!Packed)
- offset = offset.alignTo(CharUnits::fromQuantity(
- layout.getABITypeAlignment(elementType)));
+ offset = offset.alignTo(
+ CharUnits::fromQuantity(layout.getABITypeAlign(elementType)));
offset += CharUnits::fromQuantity(layout.getTypeStoreSize(elementType));
} while (++cacheEnd != end);
}
@@ -268,7 +267,7 @@ llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy) {
assert((Begin < buffer.size() ||
(Begin == buffer.size() && eltTy))
&& "didn't add any array elements without element type");
- auto elts = llvm::makeArrayRef(buffer).slice(Begin);
+ auto elts = llvm::ArrayRef(buffer).slice(Begin);
if (!eltTy) eltTy = elts[0]->getType();
auto type = llvm::ArrayType::get(eltTy, elts.size());
auto constant = llvm::ConstantArray::get(type, elts);
@@ -281,7 +280,7 @@ ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) {
markFinished();
auto &buffer = getBuffer();
- auto elts = llvm::makeArrayRef(buffer).slice(Begin);
+ auto elts = llvm::ArrayRef(buffer).slice(Begin);
if (ty == nullptr && elts.empty())
ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed);
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index 0fe084b628da..101cd6a67b49 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -17,7 +17,6 @@
#include "clang/Basic/FileManager.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Lexer.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ProfileData/Coverage/CoverageMapping.h"
@@ -26,6 +25,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+#include <optional>
// This selects the coverage mapping format defined when `InstrProfData.inc`
// is textually included.
@@ -97,27 +97,29 @@ class SourceMappingRegion {
Counter Count;
/// Secondary Counter used for Branch Regions for "False" branches.
- Optional<Counter> FalseCount;
+ std::optional<Counter> FalseCount;
/// The region's starting location.
- Optional<SourceLocation> LocStart;
+ std::optional<SourceLocation> LocStart;
/// The region's ending location.
- Optional<SourceLocation> LocEnd;
+ std::optional<SourceLocation> LocEnd;
/// Whether this region is a gap region. The count from a gap region is set
/// as the line execution count if there are no other regions on the line.
bool GapRegion;
public:
- SourceMappingRegion(Counter Count, Optional<SourceLocation> LocStart,
- Optional<SourceLocation> LocEnd, bool GapRegion = false)
+ SourceMappingRegion(Counter Count, std::optional<SourceLocation> LocStart,
+ std::optional<SourceLocation> LocEnd,
+ bool GapRegion = false)
: Count(Count), LocStart(LocStart), LocEnd(LocEnd), GapRegion(GapRegion) {
}
- SourceMappingRegion(Counter Count, Optional<Counter> FalseCount,
- Optional<SourceLocation> LocStart,
- Optional<SourceLocation> LocEnd, bool GapRegion = false)
+ SourceMappingRegion(Counter Count, std::optional<Counter> FalseCount,
+ std::optional<SourceLocation> LocStart,
+ std::optional<SourceLocation> LocEnd,
+ bool GapRegion = false)
: Count(Count), FalseCount(FalseCount), LocStart(LocStart),
LocEnd(LocEnd), GapRegion(GapRegion) {}
@@ -325,24 +327,24 @@ public:
/// Get the coverage mapping file ID for \c Loc.
///
- /// If such file id doesn't exist, return None.
- Optional<unsigned> getCoverageFileID(SourceLocation Loc) {
+ /// If such file id doesn't exist, return std::nullopt.
+ std::optional<unsigned> getCoverageFileID(SourceLocation Loc) {
auto Mapping = FileIDMapping.find(SM.getFileID(Loc));
if (Mapping != FileIDMapping.end())
return Mapping->second.first;
- return None;
+ return std::nullopt;
}
/// This shrinks the skipped range if it spans a line that contains a
/// non-comment token. If shrinking the skipped range would make it empty,
- /// this returns None.
+ /// this returns std::nullopt.
/// Note this function can potentially be expensive because
/// getSpellingLineNumber uses getLineNumber, which is expensive.
- Optional<SpellingRegion> adjustSkippedRange(SourceManager &SM,
- SourceLocation LocStart,
- SourceLocation LocEnd,
- SourceLocation PrevTokLoc,
- SourceLocation NextTokLoc) {
+ std::optional<SpellingRegion> adjustSkippedRange(SourceManager &SM,
+ SourceLocation LocStart,
+ SourceLocation LocEnd,
+ SourceLocation PrevTokLoc,
+ SourceLocation NextTokLoc) {
SpellingRegion SR{SM, LocStart, LocEnd};
SR.ColumnStart = 1;
if (PrevTokLoc.isValid() && SM.isWrittenInSameFile(LocStart, PrevTokLoc) &&
@@ -355,7 +357,7 @@ public:
}
if (SR.isInSourceOrder())
return SR;
- return None;
+ return std::nullopt;
}
/// Gather all the regions that were skipped by the preprocessor
@@ -385,7 +387,7 @@ public:
auto CovFileID = getCoverageFileID(LocStart);
if (!CovFileID)
continue;
- Optional<SpellingRegion> SR;
+ std::optional<SpellingRegion> SR;
if (I.isComment())
SR = adjustSkippedRange(SM, LocStart, LocEnd, I.PrevTokLoc,
I.NextTokLoc);
@@ -527,7 +529,7 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
if (MappingRegions.empty())
return;
- CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions);
+ CoverageMappingWriter Writer(FileIDMapping, std::nullopt, MappingRegions);
Writer.write(OS);
}
};
@@ -583,9 +585,10 @@ struct CounterCoverageMappingBuilder
///
/// Returns the index on the stack where the region was pushed. This can be
/// used with popRegions to exit a "scope", ending the region that was pushed.
- size_t pushRegion(Counter Count, Optional<SourceLocation> StartLoc = None,
- Optional<SourceLocation> EndLoc = None,
- Optional<Counter> FalseCount = None) {
+ size_t pushRegion(Counter Count,
+ std::optional<SourceLocation> StartLoc = std::nullopt,
+ std::optional<SourceLocation> EndLoc = std::nullopt,
+ std::optional<Counter> FalseCount = std::nullopt) {
if (StartLoc && !FalseCount) {
MostRecentLocation = *StartLoc;
@@ -810,7 +813,7 @@ struct CounterCoverageMappingBuilder
}
llvm::SmallSet<SourceLocation, 8> StartLocs;
- Optional<Counter> ParentCounter;
+ std::optional<Counter> ParentCounter;
for (SourceMappingRegion &I : llvm::reverse(RegionStack)) {
if (!I.hasStartLoc())
continue;
@@ -878,8 +881,8 @@ struct CounterCoverageMappingBuilder
}
/// Find a valid gap range between \p AfterLoc and \p BeforeLoc.
- Optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc,
- SourceLocation BeforeLoc) {
+ std::optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc,
+ SourceLocation BeforeLoc) {
// If AfterLoc is in function-like macro, use the right parenthesis
// location.
if (AfterLoc.isMacroID()) {
@@ -917,10 +920,10 @@ struct CounterCoverageMappingBuilder
// If the start and end locations of the gap are both within the same macro
// file, the range may not be in source order.
if (AfterLoc.isMacroID() || BeforeLoc.isMacroID())
- return None;
+ return std::nullopt;
if (!SM.isWrittenInSameFile(AfterLoc, BeforeLoc) ||
!SpellingRegion(SM, AfterLoc, BeforeLoc).isInSourceOrder())
- return None;
+ return std::nullopt;
return {{AfterLoc, BeforeLoc}};
}
@@ -1377,19 +1380,23 @@ struct CounterCoverageMappingBuilder
// Extend into the condition before we propagate through it below - this is
// needed to handle macros that generate the "if" but not the condition.
- extendRegion(S->getCond());
+ if (!S->isConsteval())
+ extendRegion(S->getCond());
Counter ParentCount = getRegion().getCounter();
Counter ThenCount = getRegionCounter(S);
- // Emitting a counter for the condition makes it easier to interpret the
- // counter for the body when looking at the coverage.
- propagateCounts(ParentCount, S->getCond());
+ if (!S->isConsteval()) {
+ // Emitting a counter for the condition makes it easier to interpret the
+ // counter for the body when looking at the coverage.
+ propagateCounts(ParentCount, S->getCond());
- // The 'then' count applies to the area immediately after the condition.
- auto Gap = findGapAreaBetween(S->getRParenLoc(), getStart(S->getThen()));
- if (Gap)
- fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount);
+ // The 'then' count applies to the area immediately after the condition.
+ std::optional<SourceRange> Gap =
+ findGapAreaBetween(S->getRParenLoc(), getStart(S->getThen()));
+ if (Gap)
+ fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount);
+ }
extendRegion(S->getThen());
Counter OutCount = propagateCounts(ThenCount, S->getThen());
@@ -1398,9 +1405,9 @@ struct CounterCoverageMappingBuilder
if (const Stmt *Else = S->getElse()) {
bool ThenHasTerminateStmt = HasTerminateStmt;
HasTerminateStmt = false;
-
// The 'else' count applies to the area immediately after the 'then'.
- Gap = findGapAreaBetween(getEnd(S->getThen()), getStart(Else));
+ std::optional<SourceRange> Gap =
+ findGapAreaBetween(getEnd(S->getThen()), getStart(Else));
if (Gap)
fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ElseCount);
extendRegion(Else);
@@ -1416,9 +1423,11 @@ struct CounterCoverageMappingBuilder
GapRegionCounter = OutCount;
}
- // Create Branch Region around condition.
- createBranchRegion(S->getCond(), ThenCount,
- subtractCounters(ParentCount, ThenCount));
+ if (!S->isConsteval()) {
+ // Create Branch Region around condition.
+ createBranchRegion(S->getCond(), ThenCount,
+ subtractCounters(ParentCount, ThenCount));
+ }
}
void VisitCXXTryStmt(const CXXTryStmt *S) {
@@ -1623,7 +1632,7 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord(
#include "llvm/ProfileData/InstrProfData.inc"
};
auto *FunctionRecordTy =
- llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
+ llvm::StructType::get(Ctx, ArrayRef(FunctionRecordTypes),
/*isPacked=*/true);
// Create the function record constant.
@@ -1631,8 +1640,8 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord(
llvm::Constant *FunctionRecordVals[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
- auto *FuncRecordConstant = llvm::ConstantStruct::get(
- FunctionRecordTy, makeArrayRef(FunctionRecordVals));
+ auto *FuncRecordConstant =
+ llvm::ConstantStruct::get(FunctionRecordTy, ArrayRef(FunctionRecordVals));
// Create the function record global.
auto *FuncRecord = new llvm::GlobalVariable(
@@ -1676,7 +1685,7 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
auto I = Entry.second;
FilenameStrs[I] = normalizeFilename(Entry.first->getName());
}
- ArrayRef<std::string> FilenameRefs = llvm::makeArrayRef(FilenameStrs);
+ ArrayRef<std::string> FilenameRefs = llvm::ArrayRef(FilenameStrs);
RawCoverageMappingReader Reader(CoverageMapping, FilenameRefs, Filenames,
Expressions, Regions);
if (Reader.read())
@@ -1722,20 +1731,19 @@ void CoverageMappingModuleGen::emit() {
#include "llvm/ProfileData/InstrProfData.inc"
};
auto CovDataHeaderTy =
- llvm::StructType::get(Ctx, makeArrayRef(CovDataHeaderTypes));
+ llvm::StructType::get(Ctx, ArrayRef(CovDataHeaderTypes));
llvm::Constant *CovDataHeaderVals[] = {
#define COVMAP_HEADER(Type, LLVMType, Name, Init) Init,
#include "llvm/ProfileData/InstrProfData.inc"
};
- auto CovDataHeaderVal = llvm::ConstantStruct::get(
- CovDataHeaderTy, makeArrayRef(CovDataHeaderVals));
+ auto CovDataHeaderVal =
+ llvm::ConstantStruct::get(CovDataHeaderTy, ArrayRef(CovDataHeaderVals));
// Create the coverage data record
llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()};
- auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes));
+ auto CovDataTy = llvm::StructType::get(Ctx, ArrayRef(CovDataTypes));
llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal};
- auto CovDataVal =
- llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals));
+ auto CovDataVal = llvm::ConstantStruct::get(CovDataTy, ArrayRef(TUDataVals));
auto CovData = new llvm::GlobalVariable(
CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage,
CovDataVal, llvm::getCoverageMappingVarName());
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index fc2ff15a6acd..18403036e700 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -433,11 +433,7 @@ public:
ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
/*UseARMGuardVarABI=*/true) {}
- bool HasThisReturn(GlobalDecl GD) const override {
- return (isa<CXXConstructorDecl>(GD.getDecl()) || (
- isa<CXXDestructorDecl>(GD.getDecl()) &&
- GD.getDtorType() != Dtor_Deleting));
- }
+ bool constructorsAndDestructorsReturnThis() const override { return true; }
void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV,
QualType ResTy) override;
@@ -468,11 +464,7 @@ public:
: ItaniumCXXABI(CGM) {}
private:
- bool HasThisReturn(GlobalDecl GD) const override {
- return isa<CXXConstructorDecl>(GD.getDecl()) ||
- (isa<CXXDestructorDecl>(GD.getDecl()) &&
- GD.getDtorType() != Dtor_Deleting);
- }
+ bool constructorsAndDestructorsReturnThis() const override { return true; }
};
class WebAssemblyCXXABI final : public ItaniumCXXABI {
@@ -486,11 +478,7 @@ public:
llvm::Value *Exn) override;
private:
- bool HasThisReturn(GlobalDecl GD) const override {
- return isa<CXXConstructorDecl>(GD.getDecl()) ||
- (isa<CXXDestructorDecl>(GD.getDecl()) &&
- GD.getDtorType() != Dtor_Deleting);
- }
+ bool constructorsAndDestructorsReturnThis() const override { return true; }
bool canCallMismatchedFunctionType() const override { return false; }
};
@@ -1014,7 +1002,7 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
} else {
const ASTContext &Context = getContext();
CharUnits PointerWidth = Context.toCharUnitsFromBits(
- Context.getTargetInfo().getPointerWidth(0));
+ Context.getTargetInfo().getPointerWidth(LangAS::Default));
VTableOffset = Index * PointerWidth.getQuantity();
}
@@ -1262,7 +1250,7 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
if (isNoReturn)
- CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None);
+ CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, std::nullopt);
else
CGF.EmitRuntimeCallOrInvoke(Fn);
}
@@ -1337,8 +1325,9 @@ static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) {
llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);
// Mark the function as nounwind readonly.
- llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
- llvm::Attribute::ReadOnly };
+ llvm::AttrBuilder FuncAttrs(CGF.getLLVMContext());
+ FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+ FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly());
llvm::AttributeList Attrs = llvm::AttributeList::get(
CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);
@@ -1769,8 +1758,11 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
}
}
- if (VTContext.isRelativeLayout() && !VTable->isDSOLocal())
- CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName());
+ if (VTContext.isRelativeLayout()) {
+ CGVT.RemoveHwasanMetadata(VTable);
+ if (!VTable->isDSOLocal())
+ CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName());
+ }
}
bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
@@ -1887,11 +1879,11 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
// values are read.
unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout()
? 32
- : CGM.getTarget().getPointerAlign(0);
+ : CGM.getTarget().getPointerAlign(LangAS::Default);
VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, VTableType, llvm::GlobalValue::ExternalLinkage,
- getContext().toCharUnitsFromBits(PAlign).getQuantity());
+ getContext().toCharUnitsFromBits(PAlign).getAsAlign());
VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// In MS C++ if you have a class with virtual functions in which you are using
@@ -1932,7 +1924,9 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
VFunc = CGF.EmitVTableTypeCheckedLoad(
MethodDecl->getParent(), VTable, TyPtr,
- VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
+ VTableIndex *
+ CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default) /
+ 8);
} else {
CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);
@@ -2374,8 +2368,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
guardAlignment = CGF.getSizeAlign();
} else {
guardTy = CGF.Int64Ty;
- guardAlignment = CharUnits::fromQuantity(
- CGM.getDataLayout().getABITypeAlignment(guardTy));
+ guardAlignment =
+ CharUnits::fromQuantity(CGM.getDataLayout().getABITypeAlign(guardTy));
}
}
llvm::PointerType *guardPtrTy = guardTy->getPointerTo(
@@ -2393,13 +2387,15 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
}
// Create the guard variable with a zero-initializer.
- // Just absorb linkage and visibility from the guarded variable.
+ // Just absorb linkage, visibility and dll storage class from the guarded
+ // variable.
guard = new llvm::GlobalVariable(CGM.getModule(), guardTy,
false, var->getLinkage(),
llvm::ConstantInt::get(guardTy, 0),
guardName.str());
guard->setDSOLocal(var->isDSOLocal());
guard->setVisibility(var->getVisibility());
+ guard->setDLLStorageClass(var->getDLLStorageClass());
// If the variable is thread-local, so is its guard variable.
guard->setThreadLocalMode(var->getThreadLocalMode());
guard->setAlignment(guardAlignment.getAsAlign());
@@ -2437,54 +2433,76 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
// __cxa_guard_release (&obj_guard);
// }
// }
+ //
+ // If threadsafe statics are enabled, but we don't have inline atomics, just
+ // call __cxa_guard_acquire unconditionally. The "inline" check isn't
+ // actually inline, and the user might not expect calls to __atomic libcalls.
- // Load the first byte of the guard variable.
- llvm::LoadInst *LI =
- Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
+ unsigned MaxInlineWidthInBits = CGF.getTarget().getMaxAtomicInlineWidth();
+ llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
+ if (!threadsafe || MaxInlineWidthInBits) {
+ // Load the first byte of the guard variable.
+ llvm::LoadInst *LI =
+ Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
- // Itanium ABI:
- // An implementation supporting thread-safety on multiprocessor
- // systems must also guarantee that references to the initialized
- // object do not occur before the load of the initialization flag.
- //
- // In LLVM, we do this by marking the load Acquire.
- if (threadsafe)
- LI->setAtomic(llvm::AtomicOrdering::Acquire);
+ // Itanium ABI:
+ // An implementation supporting thread-safety on multiprocessor
+ // systems must also guarantee that references to the initialized
+ // object do not occur before the load of the initialization flag.
+ //
+ // In LLVM, we do this by marking the load Acquire.
+ if (threadsafe)
+ LI->setAtomic(llvm::AtomicOrdering::Acquire);
- // For ARM, we should only check the first bit, rather than the entire byte:
- //
- // ARM C++ ABI 3.2.3.1:
- // To support the potential use of initialization guard variables
- // as semaphores that are the target of ARM SWP and LDREX/STREX
- // synchronizing instructions we define a static initialization
- // guard variable to be a 4-byte aligned, 4-byte word with the
- // following inline access protocol.
- // #define INITIALIZED 1
- // if ((obj_guard & INITIALIZED) != INITIALIZED) {
- // if (__cxa_guard_acquire(&obj_guard))
- // ...
- // }
- //
- // and similarly for ARM64:
- //
- // ARM64 C++ ABI 3.2.2:
- // This ABI instead only specifies the value bit 0 of the static guard
- // variable; all other bits are platform defined. Bit 0 shall be 0 when the
- // variable is not initialized and 1 when it is.
- llvm::Value *V =
- (UseARMGuardVarABI && !useInt8GuardVariable)
- ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
- : LI;
- llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized");
+ // For ARM, we should only check the first bit, rather than the entire byte:
+ //
+ // ARM C++ ABI 3.2.3.1:
+ // To support the potential use of initialization guard variables
+ // as semaphores that are the target of ARM SWP and LDREX/STREX
+ // synchronizing instructions we define a static initialization
+ // guard variable to be a 4-byte aligned, 4-byte word with the
+ // following inline access protocol.
+ // #define INITIALIZED 1
+ // if ((obj_guard & INITIALIZED) != INITIALIZED) {
+ // if (__cxa_guard_acquire(&obj_guard))
+ // ...
+ // }
+ //
+ // and similarly for ARM64:
+ //
+ // ARM64 C++ ABI 3.2.2:
+ // This ABI instead only specifies the value bit 0 of the static guard
+ // variable; all other bits are platform defined. Bit 0 shall be 0 when the
+ // variable is not initialized and 1 when it is.
+ llvm::Value *V =
+ (UseARMGuardVarABI && !useInt8GuardVariable)
+ ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
+ : LI;
+ llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized");
- llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
- llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
+ llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
- // Check if the first byte of the guard variable is zero.
- CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock,
- CodeGenFunction::GuardKind::VariableGuard, &D);
+ // Check if the first byte of the guard variable is zero.
+ CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock,
+ CodeGenFunction::GuardKind::VariableGuard, &D);
- CGF.EmitBlock(InitCheckBlock);
+ CGF.EmitBlock(InitCheckBlock);
+ }
+
+ // The semantics of dynamic initialization of variables with static or thread
+ // storage duration depends on whether they are declared at block-scope. The
+ // initialization of such variables at block-scope can be aborted with an
+ // exception and later retried (per C++20 [stmt.dcl]p4), and recursive entry
+ // to their initialization has undefined behavior (also per C++20
+ // [stmt.dcl]p4). For such variables declared at non-block scope, exceptions
+ // lead to termination (per C++20 [except.terminate]p1), and recursive
+ // references to the variables are governed only by the lifetime rules (per
+ // C++20 [class.cdtor]p2), which means such references are perfectly fine as
+ // long as they avoid touching memory. As a result, block-scope variables must
+ // not be marked as initialized until after initialization completes (unless
+ // the mark is reverted following an exception), but non-block-scope variables
+ // must be marked prior to initialization so that recursive accesses during
+ // initialization do not restart initialization.
// Variables used when coping with thread-safe statics and exceptions.
if (threadsafe) {
@@ -2501,6 +2519,12 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard);
CGF.EmitBlock(InitBlock);
+ } else if (!D.isLocalVarDecl()) {
+ // For non-local variables, store 1 into the first byte of the guard
+ // variable before the object initialization begins so that references
+ // to the variable during initialization don't restart initialization.
+ Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1),
+ Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
}
// Emit the initializer and add a global destructor if appropriate.
@@ -2513,9 +2537,10 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
// Call __cxa_guard_release. This cannot throw.
CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy),
guardAddr.getPointer());
- } else {
- // Store 1 into the first byte of the guard variable after initialization is
- // complete.
+ } else if (D.isLocalVarDecl()) {
+ // For local variables, store 1 into the first byte of the guard variable
+ // after the object initialization completes so that initialization is
+ // retried if initialization is interrupted by an exception.
Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1),
Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
}
@@ -2687,7 +2712,7 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() {
}
CGF.FinishFunction();
- AddGlobalCtor(GlobalInitFn, Priority, nullptr);
+ AddGlobalCtor(GlobalInitFn, Priority);
}
if (getCXXABI().useSinitAndSterm())
@@ -2988,14 +3013,16 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
// For a reference, the result of the wrapper function is a pointer to
// the referenced object.
- llvm::Value *Val = Var;
+ llvm::Value *Val = Builder.CreateThreadLocalAddress(Var);
+
if (VD->getType()->isReferenceType()) {
CharUnits Align = CGM.getContext().getDeclAlign(VD);
- Val = Builder.CreateAlignedLoad(Var->getValueType(), Var, Align);
+ Val = Builder.CreateAlignedLoad(Var->getValueType(), Val, Align);
}
if (Val->getType() != Wrapper->getReturnType())
Val = Builder.CreatePointerBitCastOrAddrSpaceCast(
Val, Wrapper->getReturnType(), "");
+
Builder.CreateRet(Val);
}
}
@@ -3154,7 +3181,7 @@ llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
auto Align = CGM.getContext().getTypeAlignInChars(CGM.getContext().CharTy);
llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
- Name, Init->getType(), Linkage, Align.getQuantity());
+ Name, Init->getType(), Linkage, Align.getAsAlign());
GV->setInitializer(Init);
@@ -3540,7 +3567,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
}
assert(isa<ObjCInterfaceType>(Ty));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Type::ObjCInterface:
if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
@@ -3852,8 +3879,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(M.getOrInsertComdat(GV->getName()));
- CharUnits Align =
- CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0));
+ CharUnits Align = CGM.getContext().toCharUnitsFromBits(
+ CGM.getTarget().getPointerAlign(LangAS::Default));
GV->setAlignment(Align.getAsAlign());
// The Itanium ABI specifies that type_info objects must be globally
@@ -4031,7 +4058,8 @@ void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
// LLP64 platforms.
QualType OffsetFlagsTy = CGM.getContext().LongTy;
const TargetInfo &TI = CGM.getContext().getTargetInfo();
- if (TI.getTriple().isOSCygMing() && TI.getPointerWidth(0) > TI.getLongWidth())
+ if (TI.getTriple().isOSCygMing() &&
+ TI.getPointerWidth(LangAS::Default) > TI.getLongWidth())
OffsetFlagsTy = CGM.getContext().LongLongTy;
llvm::Type *OffsetFlagsLTy =
CGM.getTypes().ConvertType(OffsetFlagsTy);
@@ -4513,7 +4541,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
switch (CatchType.getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
CastExn = CGF.EmitARCRetainNonBlock(CastExn);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
@@ -4650,13 +4678,16 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
/// void @__clang_call_terminate(i8* %exn) nounwind noreturn
/// This code is used only in C++.
static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) {
- llvm::FunctionType *fnTy =
- llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false);
+ ASTContext &C = CGM.getContext();
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+ C.VoidTy, {C.getPointerType(C.CharTy)});
+ llvm::FunctionType *fnTy = CGM.getTypes().GetFunctionType(FI);
llvm::FunctionCallee fnRef = CGM.CreateRuntimeFunction(
fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true);
llvm::Function *fn =
cast<llvm::Function>(fnRef.getCallee()->stripPointerCasts());
if (fn->empty()) {
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, fn, /*IsThunk=*/false);
fn->setDoesNotThrow();
fn->setDoesNotReturn();
diff --git a/clang/lib/CodeGen/MacroPPCallbacks.cpp b/clang/lib/CodeGen/MacroPPCallbacks.cpp
index 2f09fd2b6c15..8589869f6e2f 100644
--- a/clang/lib/CodeGen/MacroPPCallbacks.cpp
+++ b/clang/lib/CodeGen/MacroPPCallbacks.cpp
@@ -120,7 +120,7 @@ void MacroPPCallbacks::FileEntered(SourceLocation Loc) {
if (PP.getSourceManager().isWrittenInCommandLineFile(Loc))
return;
updateStatusToNextScope();
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case CommandLineIncludeScope:
EnteredCommandLineIncludeFiles++;
break;
@@ -167,7 +167,7 @@ void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason,
void MacroPPCallbacks::InclusionDirective(
SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
- bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File,
+ bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
StringRef SearchPath, StringRef RelativePath, const Module *Imported,
SrcMgr::CharacteristicKind FileType) {
diff --git a/clang/lib/CodeGen/MacroPPCallbacks.h b/clang/lib/CodeGen/MacroPPCallbacks.h
index 01041b16e4b7..5af177d0c3fa 100644
--- a/clang/lib/CodeGen/MacroPPCallbacks.h
+++ b/clang/lib/CodeGen/MacroPPCallbacks.h
@@ -101,7 +101,7 @@ public:
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
- Optional<FileEntryRef> File, StringRef SearchPath,
+ OptionalFileEntryRef File, StringRef SearchPath,
StringRef RelativePath, const Module *Imported,
SrcMgr::CharacteristicKind FileType) override;
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index f0c45654f8d9..ae785cce09f9 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -289,7 +289,7 @@ public:
CodeGenFunction::VPtr Vptr) override;
/// Don't initialize vptrs if dynamic class
- /// is marked with with the 'novtable' attribute.
+ /// is marked with the 'novtable' attribute.
bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
return !VTableClass->hasAttr<MSNoVTableAttr>();
}
@@ -458,7 +458,7 @@ public:
friend struct MSRTTIBuilder;
bool isImageRelative() const {
- return CGM.getTarget().getPointerWidth(/*AddrSpace=*/0) == 64;
+ return CGM.getTarget().getPointerWidth(LangAS::Default) == 64;
}
// 5 routines for constructing the llvm types for MS RTTI structs.
@@ -1086,8 +1086,8 @@ bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const {
return isDeletingDtor(GD);
}
-static bool isTrivialForAArch64MSVC(const CXXRecordDecl *RD) {
- // For AArch64, we use the C++14 definition of an aggregate, so we also
+static bool isTrivialForMSVC(const CXXRecordDecl *RD) {
+ // We use the C++14 definition of an aggregate, so we also
// check for:
// No private or protected non static data members.
// No base classes
@@ -1115,15 +1115,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (!RD)
return false;
- // Normally, the C++ concept of "is trivially copyable" is used to determine
- // if a struct can be returned directly. However, as MSVC and the language
- // have evolved, the definition of "trivially copyable" has changed, while the
- // ABI must remain stable. AArch64 uses the C++14 concept of an "aggregate",
- // while other ISAs use the older concept of "plain old data".
- bool isTrivialForABI = RD->isPOD();
- bool isAArch64 = CGM.getTarget().getTriple().isAArch64();
- if (isAArch64)
- isTrivialForABI = RD->canPassInRegisters() && isTrivialForAArch64MSVC(RD);
+ bool isTrivialForABI = RD->canPassInRegisters() && isTrivialForMSVC(RD);
// MSVC always returns structs indirectly from C++ instance methods.
bool isIndirectReturn = !isTrivialForABI || FI.isInstanceMethod();
@@ -1137,7 +1129,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
// On AArch64, use the `inreg` attribute if the object is considered to not
// be trivially copyable, or if this is an instance method struct return.
- FI.getReturnInfo().setInReg(isAArch64);
+ FI.getReturnInfo().setInReg(CGM.getTarget().getTriple().isAArch64());
return true;
}
@@ -1679,7 +1671,7 @@ void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
CharUnits AddressPoint =
getContext().getLangOpts().RTTIData
? getContext().toCharUnitsFromBits(
- getContext().getTargetInfo().getPointerWidth(0))
+ getContext().getTargetInfo().getPointerWidth(LangAS::Default))
: CharUnits::Zero();
if (Info.PathToIntroducingObject.empty()) {
@@ -1952,7 +1944,9 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
VFunc = CGF.EmitVTableTypeCheckedLoad(
getObjectWithVPtr(), VTable, Ty,
- ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
+ ML.Index *
+ CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default) /
+ 8);
} else {
if (CGM.getCodeGenOpts().PrepareForLTO)
CGF.EmitTypeMetadataCodeForVCall(getObjectWithVPtr(), VTable, Loc);
@@ -2083,6 +2077,8 @@ MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
// Start defining the function.
CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
FunctionArgs, MD->getLocation(), SourceLocation());
+
+ ApplyDebugLocation AL(CGF, MD->getLocation());
setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF));
// Load the vfptr and then callee from the vftable. The callee should have
@@ -2127,7 +2123,7 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
CharUnits Alignment =
CGM.getContext().getTypeAlignInChars(CGM.getContext().IntTy);
llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
- Name, VBTableType, Linkage, Alignment.getQuantity());
+ Name, VBTableType, Linkage, Alignment.getAsAlign());
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
if (RD->hasAttr<DLLImportAttr>())
@@ -2348,6 +2344,10 @@ void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
if (D.getTLSKind())
return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr);
+ // HLSL doesn't support atexit.
+ if (CGM.getLangOpts().HLSL)
+ return CGM.AddCXXDtorEntry(Dtor, Addr);
+
// The default behavior is to use atexit.
CGF.registerGlobalDtorWithAtExit(D, Dtor, Addr);
}
@@ -4142,7 +4142,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CodeGenFunction::RunCleanupsScope Cleanups(CGF);
const auto *FPT = CD->getType()->castAs<FunctionProtoType>();
- CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
+ CGF.EmitCallArgs(Args, FPT, llvm::ArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
// Insert any ABI-specific implicit constructor arguments.
AddedStructorArgCounts ExtraArgs =
@@ -4350,10 +4350,10 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
llvm::ArrayType *AT = llvm::ArrayType::get(CTType, NumEntries);
llvm::StructType *CTAType = getCatchableTypeArrayType(NumEntries);
llvm::Constant *Fields[] = {
- llvm::ConstantInt::get(CGM.IntTy, NumEntries), // NumEntries
+ llvm::ConstantInt::get(CGM.IntTy, NumEntries), // NumEntries
llvm::ConstantArray::get(
- AT, llvm::makeArrayRef(CatchableTypes.begin(),
- CatchableTypes.end())) // CatchableTypes
+ AT, llvm::ArrayRef(CatchableTypes.begin(),
+ CatchableTypes.end())) // CatchableTypes
};
SmallString<256> MangledName;
{
@@ -4470,10 +4470,45 @@ MicrosoftCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This,
}
bool MicrosoftCXXABI::isPermittedToBeHomogeneousAggregate(
- const CXXRecordDecl *CXXRD) const {
- // MSVC Windows on Arm64 considers a type not HFA if it is not an
- // aggregate according to the C++14 spec. This is not consistent with the
- // AAPCS64, but is defacto spec on that platform.
- return !CGM.getTarget().getTriple().isAArch64() ||
- isTrivialForAArch64MSVC(CXXRD);
+ const CXXRecordDecl *RD) const {
+ // All aggregates are permitted to be HFA on non-ARM platforms, which mostly
+ // affects vectorcall on x64/x86.
+ if (!CGM.getTarget().getTriple().isAArch64())
+ return true;
+ // MSVC Windows on Arm64 has its own rules for determining if a type is HFA
+ // that are inconsistent with the AAPCS64 ABI. The following are our best
+ // determination of those rules so far, based on observation of MSVC's
+ // behavior.
+ if (RD->isEmpty())
+ return false;
+ if (RD->isPolymorphic())
+ return false;
+ if (RD->hasNonTrivialCopyAssignment())
+ return false;
+ if (RD->hasNonTrivialDestructor())
+ return false;
+ if (RD->hasNonTrivialDefaultConstructor())
+ return false;
+ // These two are somewhat redundant given the caller
+ // (ABIInfo::isHomogeneousAggregate) checks the bases and fields, but that
+ // caller doesn't consider empty bases/fields to be non-homogenous, but it
+ // looks like Microsoft's AArch64 ABI does care about these empty types &
+ // anything containing/derived from one is non-homogeneous.
+ // Instead we could add another CXXABI entry point to query this property and
+ // have ABIInfo::isHomogeneousAggregate use that property.
+ // I don't think any other of the features listed above could be true of a
+ // base/field while not true of the outer struct. For example, if you have a
+ // base/field that has an non-trivial copy assignment/dtor/default ctor, then
+ // the outer struct's corresponding operation must be non-trivial.
+ for (const CXXBaseSpecifier &B : RD->bases()) {
+ if (const CXXRecordDecl *FRD = B.getType()->getAsCXXRecordDecl()) {
+ if (!isPermittedToBeHomogeneousAggregate(FRD))
+ return false;
+ }
+ }
+ // empty fields seem to be caught by the ABIInfo::isHomogeneousAggregate
+ // checking for padding - but maybe there are ways to end up with an empty
+ // field without padding? Not that I know of, so don't check fields here &
+ // rely on the padding check.
+ return true;
}
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index c9a5e56c72c7..e3e953c34c59 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -179,6 +179,7 @@ namespace {
}
bool HandleTopLevelDecl(DeclGroupRef DG) override {
+ // FIXME: Why not return false and abort parsing?
if (Diags.hasErrorOccurred())
return true;
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index f6eaa35b4873..677b66d3e1dc 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -107,7 +107,7 @@ class PCHContainerGenerator : public ASTConsumer {
return true;
SmallVector<QualType, 16> ArgTypes;
- for (auto i : D->parameters())
+ for (auto *i : D->parameters())
ArgTypes.push_back(i->getType());
QualType RetTy = D->getReturnType();
QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
@@ -126,7 +126,7 @@ class PCHContainerGenerator : public ASTConsumer {
ArgTypes.push_back(D->getSelfType(Ctx, D->getClassInterface(),
selfIsPseudoStrong, selfIsConsumed));
ArgTypes.push_back(Ctx.getObjCSelType());
- for (auto i : D->parameters())
+ for (auto *i : D->parameters())
ArgTypes.push_back(i->getType());
QualType RetTy = D->getReturnType();
QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
diff --git a/clang/lib/CodeGen/PatternInit.cpp b/clang/lib/CodeGen/PatternInit.cpp
index 26ac8b63a9ba..4400bc443688 100644
--- a/clang/lib/CodeGen/PatternInit.cpp
+++ b/clang/lib/CodeGen/PatternInit.cpp
@@ -43,8 +43,8 @@ llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM,
}
if (Ty->isPtrOrPtrVectorTy()) {
auto *PtrTy = cast<llvm::PointerType>(Ty->getScalarType());
- unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth(
- PtrTy->getAddressSpace());
+ unsigned PtrWidth =
+ CGM.getDataLayout().getPointerSizeInBits(PtrTy->getAddressSpace());
if (PtrWidth > 64)
llvm_unreachable("pattern initialization of unsupported pointer width");
llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth);
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 7848cf012633..554f1ea2a47d 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -104,5 +104,5 @@ void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) {
I->setMetadata(llvm::LLVMContext::MD_nosanitize,
- llvm::MDNode::get(CGM.getLLVMContext(), None));
+ llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt));
}
diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp
index 8fb24fcecf53..63d975193c02 100644
--- a/clang/lib/CodeGen/SwiftCallingConv.cpp
+++ b/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -15,13 +15,14 @@
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/Basic/TargetInfo.h"
+#include <optional>
using namespace clang;
using namespace CodeGen;
using namespace swiftcall;
static const SwiftABIInfo &getSwiftABIInfo(CodeGenModule &CGM) {
- return cast<SwiftABIInfo>(CGM.getTargetCodeGenInfo().getABIInfo());
+ return CGM.getTargetCodeGenInfo().getSwiftABIInfo();
}
static bool isPowerOf2(unsigned n) {
@@ -124,7 +125,7 @@ void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin,
const ASTRecordLayout &layout) {
// Unions are a special case.
if (record->isUnion()) {
- for (auto field : record->fields()) {
+ for (auto *field : record->fields()) {
if (field->isBitField()) {
addBitFieldData(field, begin, 0);
} else {
@@ -161,7 +162,7 @@ void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin,
}
// Add fields.
- for (auto field : record->fields()) {
+ for (auto *field : record->fields()) {
auto fieldOffsetInBits = layout.getFieldOffset(field->getFieldIndex());
if (field->isBitField()) {
addBitFieldData(field, begin, fieldOffsetInBits);
@@ -439,7 +440,7 @@ static bool isMergeableEntryType(llvm::Type *type) {
// merge pointers, but (1) it doesn't currently matter in practice because
// the chunk size is never greater than the size of a pointer and (2)
// Swift IRGen uses integer types for a lot of things that are "really"
- // just storing pointers (like Optional<SomePointer>). If we ever have a
+ // just storing pointers (like std::optional<SomePointer>). If we ever have a
// target that would otherwise combine pointers, we should put some effort
// into fixing those cases in Swift IRGen and then call out pointer types
// here.
@@ -590,9 +591,8 @@ SwiftAggLowering::getCoerceAndExpandTypes() const {
hasPadding = true;
}
- if (!packed && !entry.Begin.isMultipleOf(
- CharUnits::fromQuantity(
- CGM.getDataLayout().getABITypeAlignment(entry.Type))))
+ if (!packed && !entry.Begin.isMultipleOf(CharUnits::fromQuantity(
+ CGM.getDataLayout().getABITypeAlign(entry.Type))))
packed = true;
elts.push_back(entry.Type);
@@ -631,9 +631,8 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
// Avoid copying the array of types when there's just a single element.
if (Entries.size() == 1) {
- return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(
- Entries.back().Type,
- asReturnValue);
+ return getSwiftABIInfo(CGM).shouldPassIndirectly(Entries.back().Type,
+ asReturnValue);
}
SmallVector<llvm::Type*, 8> componentTys;
@@ -641,31 +640,27 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
for (auto &entry : Entries) {
componentTys.push_back(entry.Type);
}
- return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys,
- asReturnValue);
+ return getSwiftABIInfo(CGM).shouldPassIndirectly(componentTys, asReturnValue);
}
bool swiftcall::shouldPassIndirectly(CodeGenModule &CGM,
ArrayRef<llvm::Type*> componentTys,
bool asReturnValue) {
- return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys,
- asReturnValue);
+ return getSwiftABIInfo(CGM).shouldPassIndirectly(componentTys, asReturnValue);
}
CharUnits swiftcall::getMaximumVoluntaryIntegerSize(CodeGenModule &CGM) {
// Currently always the size of an ordinary pointer.
return CGM.getContext().toCharUnitsFromBits(
- CGM.getContext().getTargetInfo().getPointerWidth(0));
+ CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default));
}
CharUnits swiftcall::getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type) {
// For Swift's purposes, this is always just the store size of the type
// rounded up to a power of 2.
auto size = (unsigned long long) getTypeStoreSize(CGM, type).getQuantity();
- if (!isPowerOf2(size)) {
- size = 1ULL << (llvm::findLastSet(size, llvm::ZB_Undefined) + 1);
- }
- assert(size >= CGM.getDataLayout().getABITypeAlignment(type));
+ size = llvm::bit_ceil(size);
+ assert(CGM.getDataLayout().getABITypeAlign(type) <= size);
return CharUnits::fromQuantity(size);
}
@@ -699,8 +694,7 @@ bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
llvm::Type *eltTy, unsigned numElts) {
assert(numElts > 1 && "illegal vector length");
- return getSwiftABIInfo(CGM)
- .isLegalVectorTypeForSwift(vectorSize, eltTy, numElts);
+ return getSwiftABIInfo(CGM).isLegalVectorType(vectorSize, eltTy, numElts);
}
std::pair<llvm::Type*, unsigned>
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d1ee61eab9d6..be1dbe8480c6 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -23,7 +23,6 @@
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/CodeGen/SwiftCallingConv.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
@@ -107,7 +106,7 @@ static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
}
bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
- if (Ty->isPromotableIntegerType())
+ if (getContext().isPromotableIntegerType(Ty))
return true;
if (const auto *EIT = Ty->getAs<BitIntType>())
@@ -117,7 +116,9 @@ bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
return false;
}
-ABIInfo::~ABIInfo() {}
+ABIInfo::~ABIInfo() = default;
+
+SwiftABIInfo::~SwiftABIInfo() = default;
/// Does the given lowering require more than the given number of
/// registers when expanded?
@@ -140,7 +141,7 @@ static bool occupiesMoreThan(CodeGenTypes &cgt,
if (type->isPointerTy()) {
intCount++;
} else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
- auto ptrWidth = cgt.getTarget().getPointerWidth(0);
+ auto ptrWidth = cgt.getTarget().getPointerWidth(LangAS::Default);
intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
} else {
assert(type->isVectorTy() || type->isFloatingPointTy());
@@ -151,12 +152,16 @@ static bool occupiesMoreThan(CodeGenTypes &cgt,
return (intCount + fpCount > maxAllRegisters);
}
-bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
- llvm::Type *eltTy,
- unsigned numElts) const {
+bool SwiftABIInfo::shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
+ bool AsReturnValue) const {
+ return occupiesMoreThan(CGT, ComponentTys, /*total=*/4);
+}
+
+bool SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
+ unsigned NumElts) const {
// The default implementation of this assumes that the target guarantees
// 128-bit SIMD support but nothing more.
- return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
+ return (VectorSize.getQuantity() > 8 && VectorSize.getQuantity() <= 16);
}
static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
@@ -317,13 +322,17 @@ static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
/// leaving one or more empty slots behind as padding. If this
/// is false, the returned address might be less-aligned than
/// DirectAlign.
+/// \param ForceRightAdjust - Default is false. On big-endian platform and
+/// if the argument is smaller than a slot, set this flag will force
+/// right-adjust the argument in its slot irrespective of the type.
static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
Address VAListAddr,
llvm::Type *DirectTy,
CharUnits DirectSize,
CharUnits DirectAlign,
CharUnits SlotSize,
- bool AllowHigherAlign) {
+ bool AllowHigherAlign,
+ bool ForceRightAdjust = false) {
// Cast the element type to i8* if necessary. Some platforms define
// va_list as a struct containing an i8* instead of just an i8*.
if (VAListAddr.getElementType() != CGF.Int8PtrTy)
@@ -349,7 +358,7 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
// If the argument is smaller than a slot, and this is a big-endian
// target, the argument will be right-adjusted in its slot.
if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
- !DirectTy->isStructTy()) {
+ (!DirectTy->isStructTy() || ForceRightAdjust)) {
Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
}
@@ -370,11 +379,15 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
/// an argument type with an alignment greater than the slot size
/// will be emitted on a higher-alignment address, potentially
/// leaving one or more empty slots behind as padding.
+/// \param ForceRightAdjust - Default is false. On big-endian platform and
+/// if the argument is smaller than a slot, set this flag will force
+/// right-adjust the argument in its slot irrespective of the type.
static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType ValueTy, bool IsIndirect,
TypeInfoChars ValueInfo,
CharUnits SlotSizeAndAlign,
- bool AllowHigherAlign) {
+ bool AllowHigherAlign,
+ bool ForceRightAdjust = false) {
// The size and alignment of the value that was passed directly.
CharUnits DirectSize, DirectAlign;
if (IsIndirect) {
@@ -390,9 +403,9 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
if (IsIndirect)
DirectTy = DirectTy->getPointerTo(0);
- Address Addr =
- emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign,
- SlotSizeAndAlign, AllowHigherAlign);
+ Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize,
+ DirectAlign, SlotSizeAndAlign,
+ AllowHigherAlign, ForceRightAdjust);
if (IsIndirect) {
Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align);
@@ -814,7 +827,7 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
// This is a very simple ABI that relies a lot on DefaultABIInfo.
//===----------------------------------------------------------------------===//
-class WebAssemblyABIInfo final : public SwiftABIInfo {
+class WebAssemblyABIInfo final : public ABIInfo {
public:
enum ABIKind {
MVP = 0,
@@ -827,7 +840,7 @@ private:
public:
explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
- : SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {}
+ : ABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
@@ -845,22 +858,16 @@ private:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
-
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
-
- bool isSwiftErrorInRegister() const override {
- return false;
- }
};
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
WebAssemblyABIInfo::ABIKind K)
- : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {}
+ : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
+ SwiftInfo =
+ std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+ }
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
@@ -1071,7 +1078,7 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
.Cases("y", "&y", "^Ym", true)
.Default(false);
if (IsMMXCons && Ty->isVectorTy()) {
- if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() !=
+ if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() !=
64) {
// Invalid MMX constraint
return nullptr;
@@ -1136,7 +1143,7 @@ struct CCState {
};
/// X86_32ABIInfo - The X86-32 ABI information.
-class X86_32ABIInfo : public SwiftABIInfo {
+class X86_32ABIInfo : public ABIInfo {
enum Class {
Integer,
Float
@@ -1210,26 +1217,27 @@ public:
X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
bool RetSmallStructInRegABI, bool Win32StructABI,
unsigned NumRegisterParameters, bool SoftFloatABI)
- : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
- IsRetSmallStructInRegABI(RetSmallStructInRegABI),
- IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
- IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
- IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() ||
- CGT.getTarget().getTriple().isOSCygMing()),
- DefaultNumRegisterParameters(NumRegisterParameters) {}
+ : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+ IsRetSmallStructInRegABI(RetSmallStructInRegABI),
+ IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
+ IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+ IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() ||
+ CGT.getTarget().getTriple().isOSCygMing()),
+ DefaultNumRegisterParameters(NumRegisterParameters) {}
+};
+
+class X86_32SwiftABIInfo : public SwiftABIInfo {
+public:
+ explicit X86_32SwiftABIInfo(CodeGenTypes &CGT)
+ : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {}
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
+ bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
+ bool AsReturnValue) const override {
// LLVM's x86-32 lowering currently only assigns up to three
// integer registers and three fp registers. Oddly, it'll use up to
// four vector registers for vectors, but those can overlap with the
// scalar registers.
- return occupiesMoreThan(CGT, scalars, /*total*/ 3);
- }
-
- bool isSwiftErrorInRegister() const override {
- // x86-32 lowering does not support passing swifterror in a register.
- return false;
+ return occupiesMoreThan(CGT, ComponentTys, /*total=*/3);
}
};
@@ -1240,7 +1248,9 @@ public:
unsigned NumRegisterParameters, bool SoftFloatABI)
: TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>(
CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
- NumRegisterParameters, SoftFloatABI)) {}
+ NumRegisterParameters, SoftFloatABI)) {
+ SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT);
+ }
static bool isStructReturnInRegABI(
const llvm::Triple &Triple, const CodeGenOptions &Opts);
@@ -1769,23 +1779,22 @@ bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
}
bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
- if (!updateFreeRegs(Ty, State))
- return false;
+ bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) &&
+ (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() ||
+ Ty->isReferenceType());
- if (IsMCUABI)
+ if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall ||
+ State.CC == llvm::CallingConv::X86_VectorCall))
return false;
- if (State.CC == llvm::CallingConv::X86_FastCall ||
- State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall) {
- if (getContext().getTypeSize(Ty) > 32)
- return false;
+ if (!updateFreeRegs(Ty, State))
+ return false;
- return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() ||
- Ty->isReferenceType());
- }
+ if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall)
+ return false;
- return true;
+ // Return true to apply inreg to all legal parameters except for MCU targets.
+ return !IsMCUABI;
}
void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
@@ -2250,7 +2259,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
}
/// X86_64ABIInfo - The X86_64 ABI information.
-class X86_64ABIInfo : public SwiftABIInfo {
+class X86_64ABIInfo : public ABIInfo {
enum Class {
Integer = 0,
SSE,
@@ -2396,10 +2405,9 @@ class X86_64ABIInfo : public SwiftABIInfo {
bool Has64BitPointers;
public:
- X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
- SwiftABIInfo(CGT), AVXLevel(AVXLevel),
- Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
- }
+ X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
+ : ABIInfo(CGT), AVXLevel(AVXLevel),
+ Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {}
bool isPassedUsingAVXType(QualType type) const {
unsigned neededInt, neededSSE;
@@ -2409,7 +2417,7 @@ public:
if (info.isDirect()) {
llvm::Type *ty = info.getCoerceToType();
if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
- return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128;
+ return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128;
}
return false;
}
@@ -2424,21 +2432,13 @@ public:
bool has64BitPointers() const {
return Has64BitPointers;
}
-
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
- bool isSwiftErrorInRegister() const override {
- return true;
- }
};
/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
-class WinX86_64ABIInfo : public SwiftABIInfo {
+class WinX86_64ABIInfo : public ABIInfo {
public:
WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
- : SwiftABIInfo(CGT), AVXLevel(AVXLevel),
+ : ABIInfo(CGT), AVXLevel(AVXLevel),
IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
void computeInfo(CGFunctionInfo &FI) const override;
@@ -2457,15 +2457,6 @@ public:
return isX86VectorCallAggregateSmallEnough(NumMembers);
}
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
-
- bool isSwiftErrorInRegister() const override {
- return true;
- }
-
private:
ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
bool IsVectorCall, bool IsRegCall) const;
@@ -2480,7 +2471,10 @@ private:
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
- : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {}
+ : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {
+ SwiftInfo =
+ std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true);
+ }
const X86_64ABIInfo &getABIInfo() const {
return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
@@ -2624,7 +2618,7 @@ void X86_64TargetCodeGenInfo::checkFunctionCallABI(
llvm::StringMap<bool> CalleeMap;
unsigned ArgIndex = 0;
- // We need to loop through the actual call arguments rather than the the
+ // We need to loop through the actual call arguments rather than the
// function's parameters, in case this variadic.
for (const CallArg &Arg : Args) {
// The "avx" feature changes how vectors >128 in size are passed. "avx512f"
@@ -2722,7 +2716,10 @@ class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
X86AVXABILevel AVXLevel)
- : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {}
+ : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {
+ SwiftInfo =
+ std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true);
+ }
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
@@ -2871,7 +2868,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
Current = Integer;
} else if (k == BuiltinType::Float || k == BuiltinType::Double ||
- k == BuiltinType::Float16) {
+ k == BuiltinType::Float16 || k == BuiltinType::BFloat16) {
Current = SSE;
} else if (k == BuiltinType::LongDouble) {
const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
@@ -3002,7 +2999,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
Current = Integer;
else if (Size <= 128)
Lo = Hi = Integer;
- } else if (ET->isFloat16Type() || ET == getContext().FloatTy) {
+ } else if (ET->isFloat16Type() || ET == getContext().FloatTy ||
+ ET->isBFloat16Type()) {
Current = SSE;
} else if (ET == getContext().DoubleTy) {
Lo = Hi = SSE;
@@ -3474,9 +3472,9 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
if (SourceSize > T0Size)
T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
if (T1 == nullptr) {
- // Check if IRType is a half + float. float type will be in IROffset+4 due
+ // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
// to its alignment.
- if (T0->isHalfTy() && SourceSize > 4)
+ if (T0->is16bitFPTy() && SourceSize > 4)
T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
// If we can't get a second FP type, return a simple half or float.
// avx512fp16-abi.c:pr51813_2 shows it works to return float for
@@ -3488,7 +3486,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
if (T0->isFloatTy() && T1->isFloatTy())
return llvm::FixedVectorType::get(T0, 2);
- if (T0->isHalfTy() && T1->isHalfTy()) {
+ if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
llvm::Type *T2 = nullptr;
if (SourceSize > 4)
T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
@@ -3497,7 +3495,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
return llvm::FixedVectorType::get(T0, 4);
}
- if (T0->isHalfTy() || T1->isHalfTy())
+ if (T0->is16bitFPTy() || T1->is16bitFPTy())
return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
return llvm::Type::getDoubleTy(getVMContext());
@@ -3594,7 +3592,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
// (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
// the second element at offset 8. Check for this:
unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
- unsigned HiAlign = TD.getABITypeAlignment(Hi);
+ llvm::Align HiAlign = TD.getABITypeAlign(Hi);
unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
@@ -4171,13 +4169,13 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
// FIXME: Our choice of alignment here and below is probably pessimistic.
llvm::Value *V = CGF.Builder.CreateAlignedLoad(
TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo),
- CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo)));
+ CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
// Copy the second element.
V = CGF.Builder.CreateAlignedLoad(
TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi),
- CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi)));
+ CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
@@ -4590,7 +4588,7 @@ bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const {
Ty = EnumTy->getDecl()->getIntegerType();
// Promotable integer types are required to be promoted by the ABI.
- if (Ty->isPromotableIntegerType())
+ if (getContext().isPromotableIntegerType(Ty))
return true;
if (!Is64Bit)
@@ -4984,7 +4982,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
namespace {
/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
-class PPC64_SVR4_ABIInfo : public SwiftABIInfo {
+class PPC64_SVR4_ABIInfo : public ABIInfo {
public:
enum ABIKind {
ELFv1 = 0,
@@ -4999,7 +4997,7 @@ private:
public:
PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind,
bool SoftFloatABI)
- : SwiftABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {}
+ : ABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {}
bool isPromotableTypeForABI(QualType Ty) const;
CharUnits getParamTypeAlignment(QualType Ty) const;
@@ -5040,15 +5038,6 @@ public:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
-
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
-
- bool isSwiftErrorInRegister() const override {
- return false;
- }
};
class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5058,7 +5047,10 @@ public:
PPC64_SVR4_ABIInfo::ABIKind Kind,
bool SoftFloatABI)
: TargetCodeGenInfo(
- std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) {}
+ std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) {
+ SwiftInfo =
+ std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+ }
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
@@ -5467,8 +5459,21 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
}
// Otherwise, just use the general rule.
- return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
- TypeInfo, SlotSize, /*AllowHigher*/ true);
+ //
+ // The PPC64 ABI passes some arguments in integer registers, even to variadic
+ // functions. To allow va_list to use the simple "void*" representation,
+ // variadic calls allocate space in the argument area for the integer argument
+ // registers, and variadic functions spill their integer argument registers to
+ // this area in their prologues. When aggregates smaller than a register are
+ // passed this way, they are passed in the least significant bits of the
+ // register, which means that after spilling on big-endian targets they will
+ // be right-aligned in their argument slot. This is uncommon; for a variety of
+ // reasons, other big-endian targets don't end up right-aligning aggregate
+ // types this way, and so right-alignment only applies to fundamental types.
+ // So on PPC64, we must force the use of right-alignment even for aggregates.
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
+ SlotSize, /*AllowHigher*/ true,
+ /*ForceRightAdjust*/ true);
}
bool
@@ -5492,7 +5497,7 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
namespace {
-class AArch64ABIInfo : public SwiftABIInfo {
+class AArch64ABIInfo : public ABIInfo {
public:
enum ABIKind {
AAPCS = 0,
@@ -5504,8 +5509,7 @@ private:
ABIKind Kind;
public:
- AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
- : SwiftABIInfo(CGT), Kind(Kind) {}
+ AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
private:
ABIKind getABIKind() const { return Kind; }
@@ -5553,26 +5557,26 @@ private:
Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
- bool isSwiftErrorInRegister() const override {
- return true;
- }
-
- bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
- unsigned elts) const override;
-
bool allowBFloatArgsAndRet() const override {
return getTarget().hasBFloat16Type();
}
};
+class AArch64SwiftABIInfo : public SwiftABIInfo {
+public:
+ explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
+ : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
+
+ bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
+ unsigned NumElts) const override;
+};
+
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
- : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {}
+ : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
+ SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
+ }
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
@@ -5594,14 +5598,15 @@ public:
if (TA == nullptr)
return;
- ParsedTargetAttr Attr = TA->parse();
+ ParsedTargetAttr Attr =
+ CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
if (Attr.BranchProtection.empty())
return;
TargetInfo::BranchProtectionInfo BPI;
StringRef Error;
- (void)CGM.getTarget().validateBranchProtection(
- Attr.BranchProtection, Attr.Architecture, BPI, Error);
+ (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
+ Attr.CPU, BPI, Error);
assert(Error.empty());
auto *Fn = cast<llvm::Function>(GV);
@@ -5826,8 +5831,9 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
Alignment = getContext().getTypeUnadjustedAlign(Ty);
Alignment = Alignment < 128 ? 64 : 128;
} else {
- Alignment = std::max(getContext().getTypeAlign(Ty),
- (unsigned)getTarget().getPointerWidth(0));
+ Alignment =
+ std::max(getContext().getTypeAlign(Ty),
+ (unsigned)getTarget().getPointerWidth(LangAS::Default));
}
Size = llvm::alignTo(Size, Alignment);
@@ -5946,13 +5952,13 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
return false;
}
-bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize,
- llvm::Type *eltTy,
- unsigned elts) const {
- if (!llvm::isPowerOf2_32(elts))
+bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
+ llvm::Type *EltTy,
+ unsigned NumElts) const {
+ if (!llvm::isPowerOf2_32(NumElts))
return false;
- if (totalSize.getQuantity() != 8 &&
- (totalSize.getQuantity() != 16 || elts == 1))
+ if (VectorSize.getQuantity() != 8 &&
+ (VectorSize.getQuantity() != 16 || NumElts == 1))
return false;
return true;
}
@@ -5992,6 +5998,16 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const {
ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
CGF.CurFnInfo->getCallingConvention());
+ // Empty records are ignored for parameter passing purposes.
+ if (AI.isIgnore()) {
+ uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
+ CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
+ VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
+ auto *Load = CGF.Builder.CreateLoad(VAListAddr);
+ Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
+ return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
+ }
+
bool IsIndirect = AI.isIndirect();
llvm::Type *BaseTy = CGF.ConvertType(Ty);
@@ -6242,7 +6258,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
- uint64_t PointerSize = getTarget().getPointerWidth(0) / 8;
+ uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
// Empty records are ignored for parameter passing purposes.
@@ -6290,7 +6306,7 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
namespace {
-class ARMABIInfo : public SwiftABIInfo {
+class ARMABIInfo : public ABIInfo {
public:
enum ABIKind {
APCS = 0,
@@ -6304,8 +6320,7 @@ private:
bool IsFloatABISoftFP;
public:
- ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
- : SwiftABIInfo(CGT), Kind(_Kind) {
+ ARMABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {
setCCs();
IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" ||
CGT.getCodeGenOpts().FloatABI == ""; // default
@@ -6369,22 +6384,23 @@ private:
llvm::CallingConv::ID getLLVMDefaultCC() const;
llvm::CallingConv::ID getABIDefaultCC() const;
void setCCs();
+};
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
- bool isSwiftErrorInRegister() const override {
- return true;
- }
- bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
- unsigned elts) const override;
+class ARMSwiftABIInfo : public SwiftABIInfo {
+public:
+ explicit ARMSwiftABIInfo(CodeGenTypes &CGT)
+ : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
+
+ bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
+ unsigned NumElts) const override;
};
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
public:
ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
- : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {}
+ : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {
+ SwiftInfo = std::make_unique<ARMSwiftABIInfo>(CGT);
+ }
const ARMABIInfo &getABIInfo() const {
return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
@@ -6422,13 +6438,13 @@ public:
auto *Fn = cast<llvm::Function>(GV);
if (const auto *TA = FD->getAttr<TargetAttr>()) {
- ParsedTargetAttr Attr = TA->parse();
+ ParsedTargetAttr Attr =
+ CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
if (!Attr.BranchProtection.empty()) {
TargetInfo::BranchProtectionInfo BPI;
StringRef DiagMsg;
- StringRef Arch = Attr.Architecture.empty()
- ? CGM.getTarget().getTargetOpts().CPU
- : Attr.Architecture;
+ StringRef Arch =
+ Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU;
if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
Arch, BPI, DiagMsg)) {
CGM.getDiags().Report(
@@ -6451,11 +6467,11 @@ public:
// If the Branch Protection attribute is missing, validate the target
// Architecture attribute against Branch Protection command line
// settings.
- if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.Architecture))
+ if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.CPU))
CGM.getDiags().Report(
D->getLocation(),
diag::warn_target_unsupported_branch_protection_attribute)
- << Attr.Architecture;
+ << Attr.CPU;
}
}
@@ -6690,7 +6706,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
getABIKind() == ARMABIInfo::AAPCS) {
TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
- ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
+ ABIAlign = std::clamp(TyAlign, (uint64_t)4, (uint64_t)8);
} else {
TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
}
@@ -6986,16 +7002,15 @@ bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const {
}
}
-bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
- llvm::Type *eltTy,
- unsigned numElts) const {
- if (!llvm::isPowerOf2_32(numElts))
+bool ARMSwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
+ unsigned NumElts) const {
+ if (!llvm::isPowerOf2_32(NumElts))
return false;
- unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy);
+ unsigned size = CGT.getDataLayout().getTypeStoreSizeInBits(EltTy);
if (size > 64)
return false;
- if (vectorSize.getQuantity() != 8 &&
- (vectorSize.getQuantity() != 16 || numElts == 1))
+ if (VectorSize.getQuantity() != 8 &&
+ (VectorSize.getQuantity() != 16 || NumElts == 1))
return false;
return true;
}
@@ -7046,10 +7061,10 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
- Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
- getVAListElementType(CGF), SlotSize);
- Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
- return Addr;
+ VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
+ auto *Load = CGF.Builder.CreateLoad(VAListAddr);
+ Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
+ return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
}
CharUnits TySize = getContext().getTypeSizeInChars(Ty);
@@ -7380,13 +7395,13 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
namespace {
-class SystemZABIInfo : public SwiftABIInfo {
+class SystemZABIInfo : public ABIInfo {
bool HasVector;
bool IsSoftFloatABI;
public:
SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF)
- : SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {}
+ : ABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {}
bool isPromotableIntegerTypeForABI(QualType Ty) const;
bool isCompoundType(QualType Ty) const;
@@ -7397,30 +7412,58 @@ public:
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType ArgTy) const;
- void computeInfo(CGFunctionInfo &FI) const override {
- if (!getCXXABI().classifyReturnType(FI))
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- for (auto &I : FI.arguments())
- I.info = classifyArgumentType(I.type);
- }
-
+ void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
-
- bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
- bool asReturnValue) const override {
- return occupiesMoreThan(CGT, scalars, /*total*/ 4);
- }
- bool isSwiftErrorInRegister() const override {
- return false;
- }
};
class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
+ // These are used for speeding up the search for a visible vector ABI.
+ mutable bool HasVisibleVecABIFlag = false;
+ mutable std::set<const Type *> SeenTypes;
+
+ // Returns true (the first time) if Ty is or found to make use of a vector
+ // type (e.g. as a function argument).
+ bool isVectorTypeBased(const Type *Ty) const;
+
public:
SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI)
: TargetCodeGenInfo(
- std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {}
+ std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {
+ SwiftInfo =
+ std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+ }
+
+ // The vector ABI is different when the vector facility is present and when
+ // a module e.g. defines an externally visible vector variable, a flag
+ // indicating a visible vector ABI is added. Eventually this will result in
+ // a GNU attribute indicating the vector ABI of the module. Ty is the type
+ // of a variable or function parameter that is globally visible.
+ void handleExternallyVisibleObjABI(const Type *Ty,
+ CodeGen::CodeGenModule &M) const {
+ if (!HasVisibleVecABIFlag && isVectorTypeBased(Ty)) {
+ M.getModule().addModuleFlag(llvm::Module::Warning,
+ "s390x-visible-vector-ABI", 1);
+ HasVisibleVecABIFlag = true;
+ }
+ }
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override {
+ if (!D)
+ return;
+
+ // Check if the vector ABI becomes visible by an externally visible
+ // variable or function.
+ if (const auto *VD = dyn_cast<VarDecl>(D)) {
+ if (VD->isExternallyVisible())
+ handleExternallyVisibleObjABI(VD->getType().getTypePtr(), M);
+ }
+ else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+ if (FD->isExternallyVisible())
+ handleExternallyVisibleObjABI(FD->getType().getTypePtr(), M);
+ }
+ }
llvm::Value *testFPKind(llvm::Value *V, unsigned BuiltinID,
CGBuilderTy &Builder,
@@ -7579,6 +7622,9 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
// Every non-vector argument occupies 8 bytes and is passed by preference
// in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are
// always passed on the stack.
+ const SystemZTargetCodeGenInfo &SZCGI =
+ static_cast<const SystemZTargetCodeGenInfo &>(
+ CGT.getCGM().getTargetCodeGenInfo());
Ty = getContext().getCanonicalType(Ty);
auto TyInfo = getContext().getTypeInfoInChars(Ty);
llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
@@ -7589,6 +7635,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
bool IsVector = false;
CharUnits UnpaddedSize;
CharUnits DirectAlign;
+ SZCGI.handleExternallyVisibleObjABI(Ty.getTypePtr(), CGT.getCGM());
if (IsIndirect) {
DirectTy = llvm::PointerType::getUnqual(DirectTy);
UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8);
@@ -7783,6 +7830,51 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(nullptr);
}
+void SystemZABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ const SystemZTargetCodeGenInfo &SZCGI =
+ static_cast<const SystemZTargetCodeGenInfo &>(
+ CGT.getCGM().getTargetCodeGenInfo());
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ unsigned Idx = 0;
+ for (auto &I : FI.arguments()) {
+ I.info = classifyArgumentType(I.type);
+ if (FI.isVariadic() && Idx++ >= FI.getNumRequiredArgs())
+ // Check if a vararg vector argument is passed, in which case the
+ // vector ABI becomes visible as the va_list could be passed on to
+ // other functions.
+ SZCGI.handleExternallyVisibleObjABI(I.type.getTypePtr(), CGT.getCGM());
+ }
+}
+
+bool SystemZTargetCodeGenInfo::isVectorTypeBased(const Type *Ty) const {
+ while (Ty->isPointerType() || Ty->isArrayType())
+ Ty = Ty->getPointeeOrArrayElementType();
+ if (!SeenTypes.insert(Ty).second)
+ return false;
+ if (Ty->isVectorType())
+ return true;
+ if (const auto *RecordTy = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RecordTy->getDecl();
+ if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+ if (CXXRD->hasDefinition())
+ for (const auto &I : CXXRD->bases())
+ if (isVectorTypeBased(I.getType().getTypePtr()))
+ return true;
+ for (const auto *FD : RD->fields())
+ if (isVectorTypeBased(FD->getType().getTypePtr()))
+ return true;
+ }
+ if (const auto *FT = Ty->getAs<FunctionType>())
+ if (isVectorTypeBased(FT->getReturnType().getTypePtr()))
+ return true;
+ if (const FunctionProtoType *Proto = Ty->getAs<FunctionProtoType>())
+ for (auto ParamType : Proto->getParamTypes())
+ if (isVectorTypeBased(ParamType.getTypePtr()))
+ return true;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// MSP430 ABI Implementation
//===----------------------------------------------------------------------===//
@@ -7867,7 +7959,7 @@ void MSP430TargetCodeGenInfo::setTargetAttributes(
namespace {
class MipsABIInfo : public ABIInfo {
bool IsO32;
- unsigned MinABIStackAlignInBytes, StackAlignInBytes;
+ const unsigned MinABIStackAlignInBytes, StackAlignInBytes;
void CoerceToIntArgs(uint64_t TySize,
SmallVectorImpl<llvm::Type *> &ArgList) const;
llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
@@ -8044,8 +8136,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
uint64_t TySize = getContext().getTypeSize(Ty);
uint64_t Align = getContext().getTypeAlign(Ty) / 8;
- Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
- (uint64_t)StackAlignInBytes);
+ Align = std::clamp(Align, (uint64_t)MinABIStackAlignInBytes,
+ (uint64_t)StackAlignInBytes);
unsigned CurrOffset = llvm::alignTo(Offset, Align);
Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
@@ -8200,7 +8292,7 @@ Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
// Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
// Pointers are also promoted in the same way but this only matters for N32.
unsigned SlotSizeInBits = IsO32 ? 32 : 64;
- unsigned PtrWidth = getTarget().getPointerWidth(0);
+ unsigned PtrWidth = getTarget().getPointerWidth(LangAS::Default);
bool DidPromote = false;
if ((Ty->isIntegerType() &&
getContext().getIntWidth(Ty) < SlotSizeInBits) ||
@@ -8340,18 +8432,23 @@ public:
: DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {}
ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const {
- if (isAggregateTypeForABI(Ty)) {
- // On AVR, a return struct with size less than or equals to 8 bytes is
- // returned directly via registers R18-R25. On AVRTiny, a return struct
- // with size less than or equals to 4 bytes is returned directly via
- // registers R22-R25.
- if (getContext().getTypeSize(Ty) <= RetRegs * 8)
- return ABIArgInfo::getDirect();
- // A return struct with larger size is returned via a stack
- // slot, along with a pointer to it as the function's implicit argument.
+ // On AVR, a return struct with size less than or equals to 8 bytes is
+ // returned directly via registers R18-R25. On AVRTiny, a return struct
+ // with size less than or equals to 4 bytes is returned directly via
+ // registers R22-R25.
+ if (isAggregateTypeForABI(Ty) &&
+ getContext().getTypeSize(Ty) <= RetRegs * 8)
+ return ABIArgInfo::getDirect();
+ // A return value (struct or scalar) with larger size is returned via a
+ // stack slot, along with a pointer as the function's implicit argument.
+ if (getContext().getTypeSize(Ty) > RetRegs * 8) {
LargeRet = true;
return getNaturalAlignIndirect(Ty);
}
+ // An i8 return value should not be extended to i16, since AVR has 8-bit
+ // registers.
+ if (Ty->isIntegralOrEnumerationType() && getContext().getTypeSize(Ty) <= 8)
+ return ABIArgInfo::getDirect();
// Otherwise we follow the default way which is compatible.
return DefaultABIInfo::classifyReturnType(Ty);
}
@@ -9445,8 +9542,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const bool IsHIPKernel =
M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
+ const bool IsOpenMPkernel =
+ M.getLangOpts().OpenMPIsDevice &&
+ (F->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL);
- if (IsHIPKernel)
+ // TODO: This should be moved to language specific attributes instead.
+ if (IsHIPKernel || IsOpenMPkernel)
F->addFnAttr("uniform-work-group-size", "true");
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
@@ -9747,7 +9848,7 @@ private:
// Check if Ty is a usable substitute for the coercion type.
bool isUsableType(llvm::StructType *Ty) const {
- return llvm::makeArrayRef(Elems) == Ty->elements();
+ return llvm::ArrayRef(Elems) == Ty->elements();
}
// Get the coercion type as a literal struct type.
@@ -10302,7 +10403,7 @@ bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
bool IsRecursive) {
if (!ID || IncompleteUsedCount)
- return; // No key or it is is an incomplete sub-type so don't add.
+ return; // No key or it is an incomplete sub-type so don't add.
Entry &E = Map[ID];
if (IsRecursive && !E.Str.empty()) {
assert(E.State==Recursive && E.Str.size() == Str.size() &&
@@ -10907,11 +11008,6 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
}
}
- // We must track the number of GPRs used in order to conform to the RISC-V
- // ABI, as integer scalars passed in registers should have signext/zeroext
- // when promoted, but are anyext if passed on the stack. As GPR usage is
- // different for variadic arguments, we must also track whether we are
- // examining a vararg or not.
int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
int NumFixedArgs = FI.getNumRequiredArgs();
@@ -11001,9 +11097,22 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
// Unions aren't eligible unless they're empty (which is caught above).
if (RD->isUnion())
return false;
+ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+ // If this is a C++ record, check the bases first.
+ if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ for (const CXXBaseSpecifier &B : CXXRD->bases()) {
+ const auto *BDecl =
+ cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
+ CharUnits BaseOff = Layout.getBaseClassOffset(BDecl);
+ bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff,
+ Field1Ty, Field1Off, Field2Ty,
+ Field2Off);
+ if (!Ret)
+ return false;
+ }
+ }
int ZeroWidthBitFieldCount = 0;
for (const FieldDecl *FD : RD->fields()) {
- const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
QualType QTy = FD->getType();
if (FD->isBitField()) {
@@ -11090,7 +11199,7 @@ ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
}
CharUnits Field2Align =
- CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
+ CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty));
CharUnits Field1End = Field1Off +
CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align);
@@ -11176,7 +11285,6 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
}
uint64_t NeededAlign = getContext().getTypeAlign(Ty);
- bool MustUseStack = false;
// Determine the number of GPRs needed to pass the current argument
// according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
// register pairs, so may consume 3 registers.
@@ -11187,7 +11295,6 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
NeededArgGPRs = 2;
if (NeededArgGPRs > ArgGPRsLeft) {
- MustUseStack = true;
NeededArgGPRs = ArgGPRsLeft;
}
@@ -11198,14 +11305,13 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- // All integral types are promoted to XLen width, unless passed on the
- // stack.
- if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) {
+ // All integral types are promoted to XLen width
+ if (Size < XLen && Ty->isIntegralOrEnumerationType()) {
return extendType(Ty);
}
if (const auto *EIT = Ty->getAs<BitIntType>()) {
- if (EIT->getNumBits() < XLen && !MustUseStack)
+ if (EIT->getNumBits() < XLen)
return extendType(Ty);
if (EIT->getNumBits() > 128 ||
(!getContext().getTargetInfo().hasInt128Type() &&
@@ -11522,6 +11628,524 @@ public:
} // end anonymous namespace
//===----------------------------------------------------------------------===//
+// BPF ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class BPFABIInfo : public DefaultABIInfo {
+public:
+ BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+ ABIArgInfo classifyArgumentType(QualType Ty) const {
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ if (isAggregateTypeForABI(Ty)) {
+ uint64_t Bits = getContext().getTypeSize(Ty);
+ if (Bits == 0)
+ return ABIArgInfo::getIgnore();
+
+ // If the aggregate needs 1 or 2 registers, do not use reference.
+ if (Bits <= 128) {
+ llvm::Type *CoerceTy;
+ if (Bits <= 64) {
+ CoerceTy =
+ llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
+ } else {
+ llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64);
+ CoerceTy = llvm::ArrayType::get(RegTy, 2);
+ }
+ return ABIArgInfo::getDirect(CoerceTy);
+ } else {
+ return getNaturalAlignIndirect(Ty);
+ }
+ }
+
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ ASTContext &Context = getContext();
+ if (const auto *EIT = Ty->getAs<BitIntType>())
+ if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty))
+ return getNaturalAlignIndirect(Ty);
+
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
+ }
+
+ ABIArgInfo classifyReturnType(QualType RetTy) const {
+ if (RetTy->isVoidType())
+ return ABIArgInfo::getIgnore();
+
+ if (isAggregateTypeForABI(RetTy))
+ return getNaturalAlignIndirect(RetTy);
+
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
+ RetTy = EnumTy->getDecl()->getIntegerType();
+
+ ASTContext &Context = getContext();
+ if (const auto *EIT = RetTy->getAs<BitIntType>())
+ if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty))
+ return getNaturalAlignIndirect(RetTy);
+
+ // Caller will do necessary sign/zero extension.
+ return ABIArgInfo::getDirect();
+ }
+
+ void computeInfo(CGFunctionInfo &FI) const override {
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ for (auto &I : FI.arguments())
+ I.info = classifyArgumentType(I.type);
+ }
+
+};
+
+class BPFTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ BPFTargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(std::make_unique<BPFABIInfo>(CGT)) {}
+
+ const BPFABIInfo &getABIInfo() const {
+ return static_cast<const BPFABIInfo&>(TargetCodeGenInfo::getABIInfo());
+ }
+};
+
+}
+
+// LoongArch ABI Implementation. Documented at
+// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
+//
+//===----------------------------------------------------------------------===//
+
+namespace {
+class LoongArchABIInfo : public DefaultABIInfo {
+private:
+ // Size of the integer ('r') registers in bits.
+ unsigned GRLen;
+ // Size of the floating point ('f') registers in bits.
+ unsigned FRLen;
+ // Number of general-purpose argument registers.
+ static const int NumGARs = 8;
+ // Number of floating-point argument registers.
+ static const int NumFARs = 8;
+ bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff,
+ llvm::Type *&Field1Ty,
+ CharUnits &Field1Off,
+ llvm::Type *&Field2Ty,
+ CharUnits &Field2Off) const;
+
+public:
+ LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen)
+ : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {}
+
+ void computeInfo(CGFunctionInfo &FI) const override;
+
+ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft,
+ int &FARsLeft) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+
+ ABIArgInfo extendType(QualType Ty) const;
+
+ bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
+ CharUnits &Field1Off, llvm::Type *&Field2Ty,
+ CharUnits &Field2Off, int &NeededArgGPRs,
+ int &NeededArgFPRs) const;
+ ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty,
+ CharUnits Field1Off,
+ llvm::Type *Field2Ty,
+ CharUnits Field2Off) const;
+};
+} // end anonymous namespace
+
+void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ QualType RetTy = FI.getReturnType();
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(RetTy);
+
+ // IsRetIndirect is true if classifyArgumentType indicated the value should
+ // be passed indirect, or if the type size is a scalar greater than 2*GRLen
+ // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct
+ // in LLVM IR, relying on the backend lowering code to rewrite the argument
+ // list and pass indirectly on LA32.
+ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
+ if (!IsRetIndirect && RetTy->isScalarType() &&
+ getContext().getTypeSize(RetTy) > (2 * GRLen)) {
+ if (RetTy->isComplexType() && FRLen) {
+ QualType EltTy = RetTy->castAs<ComplexType>()->getElementType();
+ IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen;
+ } else {
+ // This is a normal scalar > 2*GRLen, such as fp128 on LA32.
+ IsRetIndirect = true;
+ }
+ }
+
+ // We must track the number of GARs and FARs used in order to conform to the
+ // LoongArch ABI. As GAR usage is different for variadic arguments, we must
+ // also track whether we are examining a vararg or not.
+ int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs;
+ int FARsLeft = FRLen ? NumFARs : 0;
+ int NumFixedArgs = FI.getNumRequiredArgs();
+
+ int ArgNum = 0;
+ for (auto &ArgInfo : FI.arguments()) {
+ ArgInfo.info = classifyArgumentType(
+ ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft);
+ ArgNum++;
+ }
+}
+
+// Returns true if the struct is a potential candidate to be passed in FARs (and
+// GARs). If this function returns true, the caller is responsible for checking
+// that if there is only a single field then that field is a float.
+bool LoongArchABIInfo::detectFARsEligibleStructHelper(
+ QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off,
+ llvm::Type *&Field2Ty, CharUnits &Field2Off) const {
+ bool IsInt = Ty->isIntegralOrEnumerationType();
+ bool IsFloat = Ty->isRealFloatingType();
+
+ if (IsInt || IsFloat) {
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (IsInt && Size > GRLen)
+ return false;
+ // Can't be eligible if larger than the FP registers. Half precision isn't
+ // currently supported on LoongArch and the ABI hasn't been confirmed, so
+ // default to the integer ABI in that case.
+ if (IsFloat && (Size > FRLen || Size < 32))
+ return false;
+ // Can't be eligible if an integer type was already found (int+int pairs
+ // are not eligible).
+ if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
+ return false;
+ if (!Field1Ty) {
+ Field1Ty = CGT.ConvertType(Ty);
+ Field1Off = CurOff;
+ return true;
+ }
+ if (!Field2Ty) {
+ Field2Ty = CGT.ConvertType(Ty);
+ Field2Off = CurOff;
+ return true;
+ }
+ return false;
+ }
+
+ if (auto CTy = Ty->getAs<ComplexType>()) {
+ if (Field1Ty)
+ return false;
+ QualType EltTy = CTy->getElementType();
+ if (getContext().getTypeSize(EltTy) > FRLen)
+ return false;
+ Field1Ty = CGT.ConvertType(EltTy);
+ Field1Off = CurOff;
+ Field2Ty = Field1Ty;
+ Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
+ return true;
+ }
+
+ if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
+ uint64_t ArraySize = ATy->getSize().getZExtValue();
+ QualType EltTy = ATy->getElementType();
+ CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
+ for (uint64_t i = 0; i < ArraySize; ++i) {
+ if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off,
+ Field2Ty, Field2Off))
+ return false;
+ CurOff += EltSize;
+ }
+ return true;
+ }
+
+ if (const auto *RTy = Ty->getAs<RecordType>()) {
+ // Structures with either a non-trivial destructor or a non-trivial
+ // copy constructor are not eligible for the FP calling convention.
+ if (getRecordArgABI(Ty, CGT.getCXXABI()))
+ return false;
+ if (isEmptyRecord(getContext(), Ty, true))
+ return true;
+ const RecordDecl *RD = RTy->getDecl();
+ // Unions aren't eligible unless they're empty (which is caught above).
+ if (RD->isUnion())
+ return false;
+ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+ // If this is a C++ record, check the bases first.
+ if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ for (const CXXBaseSpecifier &B : CXXRD->bases()) {
+ const auto *BDecl =
+ cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
+ if (!detectFARsEligibleStructHelper(
+ B.getType(), CurOff + Layout.getBaseClassOffset(BDecl),
+ Field1Ty, Field1Off, Field2Ty, Field2Off))
+ return false;
+ }
+ }
+ for (const FieldDecl *FD : RD->fields()) {
+ QualType QTy = FD->getType();
+ if (FD->isBitField()) {
+ unsigned BitWidth = FD->getBitWidthValue(getContext());
+ // Zero-width bitfields are ignored.
+ if (BitWidth == 0)
+ continue;
+ // Allow a bitfield with a type greater than GRLen as long as the
+ // bitwidth is GRLen or less.
+ if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) {
+ QTy = getContext().getIntTypeForBitwidth(GRLen, false);
+ }
+ }
+
+ if (!detectFARsEligibleStructHelper(
+ QTy,
+ CurOff + getContext().toCharUnitsFromBits(
+ Layout.getFieldOffset(FD->getFieldIndex())),
+ Field1Ty, Field1Off, Field2Ty, Field2Off))
+ return false;
+ }
+ return Field1Ty != nullptr;
+ }
+
+ return false;
+}
+
+// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when
+// flattened it contains a single fp value, fp+fp, or int+fp of appropriate
+// size). If so, NeededFARs and NeededGARs are incremented appropriately.
+bool LoongArchABIInfo::detectFARsEligibleStruct(
+ QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off,
+ llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs,
+ int &NeededFARs) const {
+ Field1Ty = nullptr;
+ Field2Ty = nullptr;
+ NeededGARs = 0;
+ NeededFARs = 0;
+ if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty,
+ Field1Off, Field2Ty, Field2Off))
+ return false;
+ // Not really a candidate if we have a single int but no float.
+ if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
+ return false;
+ if (Field1Ty && Field1Ty->isFloatingPointTy())
+ NeededFARs++;
+ else if (Field1Ty)
+ NeededGARs++;
+ if (Field2Ty && Field2Ty->isFloatingPointTy())
+ NeededFARs++;
+ else if (Field2Ty)
+ NeededGARs++;
+ return true;
+}
+
+// Call getCoerceAndExpand for the two-element flattened struct described by
+// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
+// appropriate coerceToType and unpaddedCoerceToType.
+ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct(
+ llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
+ CharUnits Field2Off) const {
+ SmallVector<llvm::Type *, 3> CoerceElts;
+ SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
+ if (!Field1Off.isZero())
+ CoerceElts.push_back(llvm::ArrayType::get(
+ llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
+
+ CoerceElts.push_back(Field1Ty);
+ UnpaddedCoerceElts.push_back(Field1Ty);
+
+ if (!Field2Ty) {
+ return ABIArgInfo::getCoerceAndExpand(
+ llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
+ UnpaddedCoerceElts[0]);
+ }
+
+ CharUnits Field2Align =
+ CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty));
+ CharUnits Field1End =
+ Field1Off +
+ CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
+ CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align);
+
+ CharUnits Padding = CharUnits::Zero();
+ if (Field2Off > Field2OffNoPadNoPack)
+ Padding = Field2Off - Field2OffNoPadNoPack;
+ else if (Field2Off != Field2Align && Field2Off > Field1End)
+ Padding = Field2Off - Field1End;
+
+ bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
+
+ if (!Padding.isZero())
+ CoerceElts.push_back(llvm::ArrayType::get(
+ llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
+
+ CoerceElts.push_back(Field2Ty);
+ UnpaddedCoerceElts.push_back(Field2Ty);
+
+ return ABIArgInfo::getCoerceAndExpand(
+ llvm::StructType::get(getVMContext(), CoerceElts, IsPacked),
+ llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked));
+}
+
+ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
+ int &GARsLeft,
+ int &FARsLeft) const {
+ assert(GARsLeft <= NumGARs && "GAR tracking underflow");
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // Structures with either a non-trivial destructor or a non-trivial
+ // copy constructor are always passed indirectly.
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+ if (GARsLeft)
+ GARsLeft -= 1;
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
+ CGCXXABI::RAA_DirectInMemory);
+ }
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ uint64_t Size = getContext().getTypeSize(Ty);
+
+ // Pass floating point values via FARs if possible.
+ if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
+ FRLen >= Size && FARsLeft) {
+ FARsLeft--;
+ return ABIArgInfo::getDirect();
+ }
+
+ // Complex types for the *f or *d ABI must be passed directly rather than
+ // using CoerceAndExpand.
+ if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) {
+ QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
+ if (getContext().getTypeSize(EltTy) <= FRLen) {
+ FARsLeft -= 2;
+ return ABIArgInfo::getDirect();
+ }
+ }
+
+ if (IsFixed && FRLen && Ty->isStructureOrClassType()) {
+ llvm::Type *Field1Ty = nullptr;
+ llvm::Type *Field2Ty = nullptr;
+ CharUnits Field1Off = CharUnits::Zero();
+ CharUnits Field2Off = CharUnits::Zero();
+ int NeededGARs = 0;
+ int NeededFARs = 0;
+ bool IsCandidate = detectFARsEligibleStruct(
+ Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs);
+ if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) {
+ GARsLeft -= NeededGARs;
+ FARsLeft -= NeededFARs;
+ return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty,
+ Field2Off);
+ }
+ }
+
+ uint64_t NeededAlign = getContext().getTypeAlign(Ty);
+ // Determine the number of GARs needed to pass the current argument
+ // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned"
+ // register pairs, so may consume 3 registers.
+ int NeededGARs = 1;
+ if (!IsFixed && NeededAlign == 2 * GRLen)
+ NeededGARs = 2 + (GARsLeft % 2);
+ else if (Size > GRLen && Size <= 2 * GRLen)
+ NeededGARs = 2;
+
+ if (NeededGARs > GARsLeft)
+ NeededGARs = GARsLeft;
+
+ GARsLeft -= NeededGARs;
+
+ if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ // All integral types are promoted to GRLen width.
+ if (Size < GRLen && Ty->isIntegralOrEnumerationType())
+ return extendType(Ty);
+
+ if (const auto *EIT = Ty->getAs<BitIntType>()) {
+ if (EIT->getNumBits() < GRLen)
+ return extendType(Ty);
+ if (EIT->getNumBits() > 128 ||
+ (!getContext().getTargetInfo().hasInt128Type() &&
+ EIT->getNumBits() > 64))
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+ }
+
+ return ABIArgInfo::getDirect();
+ }
+
+ // Aggregates which are <= 2*GRLen will be passed in registers if possible,
+ // so coerce to integers.
+ if (Size <= 2 * GRLen) {
+ // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is
+ // required, and a 2-element GRLen array if only GRLen alignment is
+ // required.
+ if (Size <= GRLen) {
+ return ABIArgInfo::getDirect(
+ llvm::IntegerType::get(getVMContext(), GRLen));
+ }
+ if (getContext().getTypeAlign(Ty) == 2 * GRLen) {
+ return ABIArgInfo::getDirect(
+ llvm::IntegerType::get(getVMContext(), 2 * GRLen));
+ }
+ return ABIArgInfo::getDirect(
+ llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2));
+ }
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+}
+
+ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isVoidType())
+ return ABIArgInfo::getIgnore();
+ // The rules for return and argument types are the same, so defer to
+ // classifyArgumentType.
+ int GARsLeft = 2;
+ int FARsLeft = FRLen ? 2 : 0;
+ return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft);
+}
+
+Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8);
+
+ // Empty records are ignored for parameter passing purposes.
+ if (isEmptyRecord(getContext(), Ty, true)) {
+ Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
+ getVAListElementType(CGF), SlotSize);
+ Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
+ return Addr;
+ }
+
+ auto TInfo = getContext().getTypeInfoInChars(Ty);
+
+ // Arguments bigger than 2*GRLen bytes are passed indirectly.
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty,
+ /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo,
+ SlotSize,
+ /*AllowHigherAlign=*/true);
+}
+
+ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const {
+ int TySize = getContext().getTypeSize(Ty);
+ // LA64 ABI requires unsigned 32 bit integers to be sign extended.
+ if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
+ return ABIArgInfo::getSignExtend(Ty);
+ return ABIArgInfo::getExtend(Ty);
+}
+
+namespace {
+class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen,
+ unsigned FRLen)
+ : TargetCodeGenInfo(
+ std::make_unique<LoongArchABIInfo>(CGT, GRLen, FRLen)) {}
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
// Driver code
//===----------------------------------------------------------------------===//
@@ -11666,7 +12290,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::riscv32:
case llvm::Triple::riscv64: {
StringRef ABIStr = getTarget().getABI();
- unsigned XLen = getTarget().getPointerWidth(0);
+ unsigned XLen = getTarget().getPointerWidth(LangAS::Default);
unsigned ABIFLen = 0;
if (ABIStr.endswith("f"))
ABIFLen = 32;
@@ -11749,6 +12373,20 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
: hasFP64 ? 64
: 32));
}
+ case llvm::Triple::bpfeb:
+ case llvm::Triple::bpfel:
+ return SetCGInfo(new BPFTargetCodeGenInfo(Types));
+ case llvm::Triple::loongarch32:
+ case llvm::Triple::loongarch64: {
+ StringRef ABIStr = getTarget().getABI();
+ unsigned ABIFRLen = 0;
+ if (ABIStr.endswith("f"))
+ ABIFRLen = 32;
+ else if (ABIStr.endswith("d"))
+ ABIFRLen = 64;
+ return SetCGInfo(new LoongArchTargetCodeGenInfo(
+ Types, getTarget().getPointerWidth(LangAS::Default), ABIFRLen));
+ }
}
}
@@ -11835,8 +12473,8 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
llvm::SmallVector<llvm::Value *, 2> Args;
Args.push_back(Cast);
- for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
- Args.push_back(I);
+ for (llvm::Argument &A : llvm::drop_begin(F->args()))
+ Args.push_back(&A);
llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
call->setCallingConv(Invoke->getCallingConv());
Builder.CreateRetVoid();
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 30421612015b..c7c1ec7fce7e 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -38,6 +38,7 @@ class ABIInfo;
class CallArgList;
class CodeGenFunction;
class CGBlockInfo;
+class SwiftABIInfo;
/// TargetCodeGenInfo - This class organizes various target-specific
/// codegeneration issues, like target-specific attributes, builtins and so
@@ -45,6 +46,12 @@ class CGBlockInfo;
class TargetCodeGenInfo {
std::unique_ptr<ABIInfo> Info;
+protected:
+ // Target hooks supporting Swift calling conventions. The target must
+ // initialize this field if it claims to support these calling conventions
+ // by returning true from TargetInfo::checkCallingConvention for them.
+ std::unique_ptr<SwiftABIInfo> SwiftInfo;
+
public:
TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info);
virtual ~TargetCodeGenInfo();
@@ -52,6 +59,12 @@ public:
/// getABIInfo() - Returns ABI info helper for the target.
const ABIInfo &getABIInfo() const { return *Info; }
+ /// Returns Swift ABI info helper for the target.
+ const SwiftABIInfo &getSwiftABIInfo() const {
+ assert(SwiftInfo && "Swift ABI info has not been initialized");
+ return *SwiftInfo;
+ }
+
/// setTargetAttributes - Provides a convenient hook to handle extra
/// target-specific attributes for the given global.
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
diff --git a/clang/lib/CodeGen/VarBypassDetector.cpp b/clang/lib/CodeGen/VarBypassDetector.cpp
index e8717a61ce5e..6eda83dfdef2 100644
--- a/clang/lib/CodeGen/VarBypassDetector.cpp
+++ b/clang/lib/CodeGen/VarBypassDetector.cpp
@@ -77,7 +77,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S,
return false;
++StmtsToSkip;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Stmt::GotoStmtClass:
FromScopes.push_back({S, ParentScope});