diff options
Diffstat (limited to 'clang/lib/CodeGen')
67 files changed, 6009 insertions, 4195 deletions
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index c1eb8a975796..755d2aaa7beb 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -33,7 +33,6 @@ namespace CodeGen { class CGFunctionInfo; class CodeGenFunction; class CodeGenTypes; - class SwiftABIInfo; // FIXME: All of this stuff should be part of the target interface // somehow. It is currently here because it is not clear how to factor @@ -44,9 +43,8 @@ namespace CodeGen { /// ABIInfo - Target specific hooks for defining how a type should be /// passed or returned from functions. class ABIInfo { - public: - CodeGen::CodeGenTypes &CGT; protected: + CodeGen::CodeGenTypes &CGT; llvm::CallingConv::ID RuntimeCC; public: ABIInfo(CodeGen::CodeGenTypes &cgt) @@ -54,8 +52,6 @@ namespace CodeGen { virtual ~ABIInfo(); - virtual bool supportsSwift() const { return false; } - virtual bool allowBFloatArgsAndRet() const { return false; } CodeGen::CGCXXABI &getCXXABI() const; @@ -114,33 +110,33 @@ namespace CodeGen { CodeGen::ABIArgInfo getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const; - - }; - /// A refining implementation of ABIInfo for targets that support swiftcall. - /// - /// If we find ourselves wanting multiple such refinements, they'll probably - /// be independent refinements, and we should probably find another way - /// to do it than simple inheritance. - class SwiftABIInfo : public ABIInfo { - public: - SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {} + /// Target specific hooks for defining how a type should be passed or returned + /// from functions with one of the Swift calling conventions. + class SwiftABIInfo { + protected: + CodeGenTypes &CGT; + bool SwiftErrorInRegister; - bool supportsSwift() const final { return true; } + public: + SwiftABIInfo(CodeGen::CodeGenTypes &CGT, bool SwiftErrorInRegister) + : CGT(CGT), SwiftErrorInRegister(SwiftErrorInRegister) {} - virtual bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> types, - bool asReturnValue) const = 0; + virtual ~SwiftABIInfo(); - virtual bool isLegalVectorTypeForSwift(CharUnits totalSize, - llvm::Type *eltTy, - unsigned elts) const; + /// Returns true if an aggregate which expands to the given type sequence + /// should be passed / returned indirectly. + virtual bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, + bool AsReturnValue) const; - virtual bool isSwiftErrorInRegister() const = 0; + /// Returns true if the given vector type is legal from Swift's calling + /// convention perspective. + virtual bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const; - static bool classof(const ABIInfo *info) { - return info->supportsSwift(); - } + /// Returns true if swifterror is lowered to a register by the target ABI. + bool isSwiftErrorInRegister() const { return SwiftErrorInRegister; }; }; } // end namespace CodeGen } // end namespace clang diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7c4e35634e5d..10d6bff25e6d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -29,12 +30,13 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" +#include "llvm/IRPrinter/IRPrintingPasses.h" #include "llvm/LTO/LTOBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" @@ -70,14 +72,17 @@ #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" @@ -87,6 +92,7 @@ #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> +#include <optional> using namespace clang; using namespace llvm; @@ -96,6 +102,11 @@ using namespace llvm; namespace llvm { extern cl::opt<bool> DebugInfoCorrelate; + +// Experiment to move sanitizers earlier. +static cl::opt<bool> ClSanitizeOnOptimizerEarlyEP( + "sanitizer-early-opt-ep", cl::Optional, + cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false)); } namespace { @@ -215,6 +226,16 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; Opts.TraceLoads = CGOpts.SanitizeCoverageTraceLoads; Opts.TraceStores = CGOpts.SanitizeCoverageTraceStores; + Opts.CollectControlFlow = CGOpts.SanitizeCoverageControlFlow; + return Opts; +} + +static SanitizerBinaryMetadataOptions +getSanitizerBinaryMetadataOptions(const CodeGenOptions &CGOpts) { + SanitizerBinaryMetadataOptions Opts; + Opts.Covered = CGOpts.SanitizeBinaryMetadataCovered; + Opts.Atomics = CGOpts.SanitizeBinaryMetadataAtomics; + Opts.UAR = CGOpts.SanitizeBinaryMetadataUAR; return Opts; } @@ -250,27 +271,28 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, switch (CodeGenOpts.getVecLib()) { case CodeGenOptions::Accelerate: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate); + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate, + TargetTriple); break; case CodeGenOptions::LIBMVEC: - switch(TargetTriple.getArch()) { - default: - break; - case llvm::Triple::x86_64: - TLII->addVectorizableFunctionsFromVecLib - (TargetLibraryInfoImpl::LIBMVEC_X86); - break; - } + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86, + TargetTriple); break; case CodeGenOptions::MASSV: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV, + TargetTriple); break; case CodeGenOptions::SVML: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML); + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML, + TargetTriple); + break; + case CodeGenOptions::SLEEF: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI, + TargetTriple); break; case CodeGenOptions::Darwin_libsystem_m: TLII->addVectorizableFunctionsFromVecLib( - TargetLibraryInfoImpl::DarwinLibSystemM); + TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple); break; default: break; @@ -278,22 +300,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, return TLII; } -static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) { - switch (CodeGenOpts.OptimizationLevel) { - default: - llvm_unreachable("Invalid optimization level!"); - case 0: - return CodeGenOpt::None; - case 1: - return CodeGenOpt::Less; - case 2: - return CodeGenOpt::Default; // O2/Os/Oz - case 3: - return CodeGenOpt::Aggressive; - } -} - -static Optional<llvm::CodeModel::Model> +static std::optional<llvm::CodeModel::Model> getCodeModel(const CodeGenOptions &CodeGenOpts) { unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) .Case("tiny", llvm::CodeModel::Tiny) @@ -305,7 +312,7 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) { .Default(~0u); assert(CodeModel != ~0u && "invalid code model!"); if (CodeModel == ~1u) - return None; + return std::nullopt; return static_cast<llvm::CodeModel::Model>(CodeModel); } @@ -391,7 +398,12 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.NoInfsFPMath = LangOpts.NoHonorInfs; Options.NoNaNsFPMath = LangOpts.NoHonorNaNs; Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; - Options.UnsafeFPMath = LangOpts.UnsafeFPMath; + Options.UnsafeFPMath = LangOpts.AllowFPReassoc && LangOpts.AllowRecip && + LangOpts.NoSignedZero && LangOpts.ApproxFunc && + (LangOpts.getDefaultFPContractMode() == + LangOptions::FPModeKind::FPM_Fast || + LangOpts.getDefaultFPContractMode() == + LangOptions::FPModeKind::FPM_FastHonorPragmas); Options.ApproxFuncFPMath = LangOpts.ApproxFunc; Options.BBSections = @@ -422,7 +434,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, CodeGenOpts.UniqueBasicBlockSectionNames; Options.TLSSize = CodeGenOpts.TLSSize; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS; + Options.ExplicitEmulatedTLS = true; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.StackUsageOutput = CodeGenOpts.StackUsageOutput; @@ -478,15 +490,16 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); Options.MCOptions.Argv0 = CodeGenOpts.Argv0; Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs; + Options.MCOptions.AsSecureLogFile = CodeGenOpts.AsSecureLogFile; Options.MisExpect = CodeGenOpts.MisExpect; return true; } -static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts, - const LangOptions &LangOpts) { +static std::optional<GCOVOptions> +getGCOVOptions(const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts) { if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes) - return None; + return std::nullopt; // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if // LLVM's -default-gcov-version flag is set to something invalid. GCOVOptions Options; @@ -500,11 +513,11 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts, return Options; } -static Optional<InstrProfOptions> +static std::optional<InstrProfOptions> getInstrProfOptions(const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts) { if (!CodeGenOpts.hasProfileClangInstr()) - return None; + return std::nullopt; InstrProfOptions Options; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; @@ -547,11 +560,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { return; } - Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts); + std::optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); llvm::Reloc::Model RM = CodeGenOpts.RelocationModel; - CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts); + std::optional<CodeGenOpt::Level> OptLevelOrNone = + CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel); + assert(OptLevelOrNone && "Invalid optimization level!"); + CodeGenOpt::Level OptLevel = *OptLevelOrNone; llvm::TargetOptions Options; if (!initTargetOptions(Diags, Options, CodeGenOpts, TargetOpts, LangOpts, @@ -620,18 +636,48 @@ static OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { } } +static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, + PassBuilder &PB) { + // If the back-end supports KCFI operand bundle lowering, skip KCFIPass. + if (TargetTriple.getArch() == llvm::Triple::x86_64 || + TargetTriple.isAArch64(64)) + return; + + // Ensure we lower KCFI operand bundles with -O0. + PB.registerOptimizerLastEPCallback( + [&](ModulePassManager &MPM, OptimizationLevel Level) { + if (Level == OptimizationLevel::O0 && + LangOpts.Sanitize.has(SanitizerKind::KCFI)) + MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass())); + }); + + // When optimizations are requested, run KCIFPass after InstCombine to + // avoid unnecessary checks. + PB.registerPeepholeEPCallback( + [&](FunctionPassManager &FPM, OptimizationLevel Level) { + if (Level != OptimizationLevel::O0 && + LangOpts.Sanitize.has(SanitizerKind::KCFI)) + FPM.addPass(KCFIPass()); + }); +} + static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { - PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM, - OptimizationLevel Level) { + auto SanitizersCallback = [&](ModulePassManager &MPM, + OptimizationLevel Level) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); - MPM.addPass(ModuleSanitizerCoveragePass( + MPM.addPass(SanitizerCoveragePass( SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles, CodeGenOpts.SanitizeCoverageIgnorelistFiles)); } + if (CodeGenOpts.hasSanitizeBinaryMetadata()) { + MPM.addPass(SanitizerBinaryMetadataPass( + getSanitizerBinaryMetadataOptions(CodeGenOpts))); + } + auto MSanPass = [&](SanitizerMask Mask, bool CompileKernel) { if (LangOpts.Sanitize.has(Mask)) { int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins; @@ -639,22 +685,21 @@ static void addSanitizers(const Triple &TargetTriple, MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel, CodeGenOpts.SanitizeMemoryParamRetval); - MPM.addPass(ModuleMemorySanitizerPass(options)); - FunctionPassManager FPM; - FPM.addPass(MemorySanitizerPass(options)); + MPM.addPass(MemorySanitizerPass(options)); if (Level != OptimizationLevel::O0) { - // MemorySanitizer inserts complex instrumentation that mostly - // follows the logic of the original code, but operates on - // "shadow" values. It can benefit from re-running some - // general purpose optimization passes. - FPM.addPass(EarlyCSEPass()); - // TODO: Consider add more passes like in - // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible - // difference on size. It's not clear if the rest is still - // usefull. InstCombinePass breakes - // compiler-rt/test/msan/select_origin.cpp. + // MemorySanitizer inserts complex instrumentation that mostly follows + // the logic of the original code, but operates on "shadow" values. It + // can benefit from re-running some general purpose optimization + // passes. + MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); + FunctionPassManager FPM; + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + FPM.addPass(InstCombinePass()); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(GVNPass()); + FPM.addPass(InstCombinePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } }; MSanPass(SanitizerKind::Memory, false); @@ -676,8 +721,8 @@ static void addSanitizers(const Triple &TargetTriple, Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask); Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); - MPM.addPass(ModuleAddressSanitizerPass( - Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); + MPM.addPass(AddressSanitizerPass(Opts, UseGlobalGC, UseOdrIndicator, + DestructorKind)); } }; ASanPass(SanitizerKind::Address, false); @@ -697,13 +742,28 @@ static void addSanitizers(const Triple &TargetTriple, if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) { MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles)); } - }); + }; + if (ClSanitizeOnOptimizerEarlyEP) { + PB.registerOptimizerEarlyEPCallback( + [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) { + ModulePassManager NewMPM; + SanitizersCallback(NewMPM, Level); + if (!NewMPM.isEmpty()) { + // Sanitizers can abandon<GlobalsAA>. + NewMPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); + MPM.addPass(std::move(NewMPM)); + } + }); + } else { + // LastEP does not need GlobalsAA. + PB.registerOptimizerLastEPCallback(SanitizersCallback); + } } void EmitAssemblyHelper::RunOptimizationPipeline( BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS, std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS) { - Optional<PGOOptions> PGOOpt; + std::optional<PGOOptions> PGOOpt; if (CodeGenOpts.hasProfileIRInstr()) // -fprofile-generate. @@ -782,12 +842,20 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PrintPassOptions PrintPassOpts; PrintPassOpts.Indent = DebugPassStructure; PrintPassOpts.SkipAnalyses = DebugPassStructure; - StandardInstrumentations SI(CodeGenOpts.DebugPassManager || - DebugPassStructure, - /*VerifyEach*/ false, PrintPassOpts); + StandardInstrumentations SI( + TheModule->getContext(), + (CodeGenOpts.DebugPassManager || DebugPassStructure), + /*VerifyEach*/ false, PrintPassOpts); SI.registerCallbacks(PIC, &FAM); PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC); + if (CodeGenOpts.EnableAssignmentTracking) { + PB.registerPipelineStartEPCallback( + [&](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(AssignmentTrackingPass()); + }); + } + // Enable verify-debuginfo-preserve-each for new PM. DebugifyEachInstrumentation Debugify; DebugInfoPerPass DebugInfoBeforePass; @@ -896,15 +964,18 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Don't add sanitizers if we are here from ThinLTO PostLink. That already // done on PreLink stage. - if (!IsThinLTOPostLink) + if (!IsThinLTOPostLink) { addSanitizers(TargetTriple, CodeGenOpts, LangOpts, PB); + addKCFIPass(TargetTriple, LangOpts, PB); + } - if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts)) + if (std::optional<GCOVOptions> Options = + getGCOVOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( [Options](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(GCOVProfilerPass(*Options)); }); - if (Optional<InstrProfOptions> Options = + if (std::optional<InstrProfOptions> Options = getInstrProfOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( [Options](ModulePassManager &MPM, OptimizationLevel Level) { @@ -933,19 +1004,24 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) MPM.addPass(VerifierPass()); - switch (Action) { - case Backend_EmitBC: + if (Action == Backend_EmitBC || Action == Backend_EmitLL) { if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { - if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { - ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); - if (!ThinLinkOS) - return; - } if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", CodeGenOpts.EnableSplitLTOUnit); - MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() - : nullptr)); + if (Action == Backend_EmitBC) { + if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { + ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); + if (!ThinLinkOS) + return; + } + MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() + : nullptr)); + } else { + MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, + /*EmitLTOSummary=*/true)); + } + } else { // Emit a module summary by default for Regular LTO except for ld64 // targets @@ -957,17 +1033,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline( TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", uint32_t(1)); } - MPM.addPass( - BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); + if (Action == Backend_EmitBC) + MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, + EmitLTOSummary)); + else + MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, + EmitLTOSummary)); } - break; - - case Backend_EmitLL: - MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists)); - break; - - default: - break; } // Now that we have all of the passes ready, run them. @@ -1059,7 +1131,7 @@ static void runThinLTOBackend( if (!lto::initImportList(*M, *CombinedIndex, ImportList)) return; - auto AddStream = [&](size_t Task) { + auto AddStream = [&](size_t Task, const Twine &ModuleName) { return std::make_unique<CachedFileStream>(std::move(OS), CGOpts.ObjectFilenameForDebug); }; @@ -1077,7 +1149,10 @@ static void runThinLTOBackend( Conf.CodeModel = getCodeModel(CGOpts); Conf.MAttrs = TOpts.Features; Conf.RelocModel = CGOpts.RelocationModel; - Conf.CGOptLevel = getCGOptLevel(CGOpts); + std::optional<CodeGenOpt::Level> OptLevelOrNone = + CodeGenOpt::getLevel(CGOpts.OptimizationLevel); + assert(OptLevelOrNone && "Invalid optimization level!"); + Conf.CGOptLevel = *OptLevelOrNone; Conf.OptLevel = CGOpts.OptimizationLevel; initTargetOptions(Diags, Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index dee0cb64be97..8ef95bb80846 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -609,7 +609,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_add_fetch: PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd : llvm::Instruction::Add; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: @@ -621,7 +621,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_sub_fetch: PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub : llvm::Instruction::Sub; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: @@ -631,7 +631,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_min_fetch: PostOpMinMax = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: @@ -642,7 +642,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_max_fetch: PostOpMinMax = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: @@ -653,7 +653,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_and_fetch: PostOp = llvm::Instruction::And; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: @@ -663,7 +663,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_or_fetch: PostOp = llvm::Instruction::Or; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: @@ -673,7 +673,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_xor_fetch: PostOp = llvm::Instruction::Xor; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: @@ -683,7 +683,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_nand_fetch: PostOp = llvm::Instruction::And; // the NOT is special cased below - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: Op = llvm::AtomicRMWInst::Nand; @@ -914,13 +914,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty)); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_sub_fetch: ShouldCastToIntPtrTy = !MemTy->isFloatingType(); - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: @@ -1045,7 +1045,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // suitably aligned for the optimized version. if (Misaligned) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: @@ -1176,7 +1176,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_fetch_add_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_add_fetch: PostOp = llvm::Instruction::Add; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: @@ -1189,7 +1189,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_fetch_and_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_and_fetch: PostOp = llvm::Instruction::And; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: @@ -1202,7 +1202,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_fetch_or_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_or_fetch: PostOp = llvm::Instruction::Or; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__hip_atomic_fetch_or: @@ -1215,7 +1215,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_fetch_sub_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_sub_fetch: PostOp = llvm::Instruction::Sub; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: @@ -1227,7 +1227,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_fetch_xor_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_xor_fetch: PostOp = llvm::Instruction::Xor; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__hip_atomic_fetch_xor: @@ -1238,7 +1238,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { break; case AtomicExpr::AO__atomic_min_fetch: PostOpMinMax = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_min: @@ -1251,7 +1251,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { break; case AtomicExpr::AO__atomic_max_fetch: PostOpMinMax = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: case AtomicExpr::AO__hip_atomic_fetch_max: @@ -1266,7 +1266,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_fetch_nand_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_nand_fetch: PostOp = llvm::Instruction::And; // the NOT is special cased below - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: LibCallName = "__atomic_fetch_nand"; @@ -1594,7 +1594,7 @@ llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO, /// we are operating under /volatile:ms *and* the LValue itself is volatile and /// performing such an operation can be performed without a libcall. bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) { - if (!CGM.getCodeGenOpts().MSVolatile) return false; + if (!CGM.getLangOpts().MSVolatile) return false; AtomicInfo AI(*this, LV); bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType()); // An atomic is inline if we don't need to use a libcall. diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index ff6ca0914e0d..6e4a0dbf2335 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -502,12 +502,10 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, if (CGM.getLangOpts().OpenCL) { // The header is basically 'struct { int; int; generic void *; // custom_fields; }'. Assert that struct is packed. - auto GenericAS = - CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); - auto GenPtrAlign = - CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); - auto GenPtrSize = - CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); + auto GenPtrAlign = CharUnits::fromQuantity( + CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8); + auto GenPtrSize = CharUnits::fromQuantity( + CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8); assert(CGM.getIntSize() <= GenPtrSize); assert(CGM.getIntAlign() <= GenPtrAlign); assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); @@ -521,10 +519,10 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, unsigned BlockAlign = GenPtrAlign.getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { - for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { + for (auto *I : Helper->getCustomFieldTypes()) /* custom fields */ { // TargetOpenCLBlockHelp needs to make sure the struct is packed. // If necessary, add padding fields to the custom fields. - unsigned Align = CGM.getDataLayout().getABITypeAlignment(I); + unsigned Align = CGM.getDataLayout().getABITypeAlign(I).value(); if (BlockAlign < Align) BlockAlign = Align; assert(Offset % Align == 0); @@ -806,9 +804,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; auto GenVoidPtrSize = CharUnits::fromQuantity( - CGM.getTarget().getPointerWidth( - CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / - 8); + CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; @@ -1356,7 +1352,7 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, fields.add(buildBlockDescriptor(CGM, blockInfo)); } else if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { - for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) { + for (auto *I : Helper->getCustomFieldValues(CGM, blockInfo)) { fields.add(I); } } @@ -2676,7 +2672,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { size = varOffset; // Conversely, we might have to prevent LLVM from inserting padding. - } else if (CGM.getDataLayout().getABITypeAlignment(varTy) > + } else if (CGM.getDataLayout().getABITypeAlign(varTy) > uint64_t(varAlign.getQuantity())) { packed = true; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 113c629bf9ed..f72e04a425d9 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/IntrinsicsR600.h" @@ -51,20 +52,17 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/X86TargetParser.h" +#include <optional> #include <sstream> using namespace clang; using namespace CodeGen; using namespace llvm; -static -int64_t clamp(int64_t Value, int64_t Low, int64_t High) { - return std::min(High, std::max(Low, Value)); -} - static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes) { ConstantInt *Byte; @@ -110,6 +108,15 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"}, }; + // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit + // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions + // if it is 64-bit 'long double' mode. + static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{ + {Builtin::BI__builtin_frexpl, "frexp"}, + {Builtin::BI__builtin_ldexpl, "ldexp"}, + {Builtin::BI__builtin_modfl, "modf"}, + }; + // If the builtin has been declared explicitly with an assembler label, // use the mangled name. This differs from the plain label on platforms // that prefix labels. @@ -122,8 +129,14 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() && F128Builtins.find(BuiltinID) != F128Builtins.end()) Name = F128Builtins[BuiltinID]; + else if (getTriple().isOSAIX() && + &getTarget().getLongDoubleFormat() == + &llvm::APFloat::IEEEdouble() && + AIXLongDouble64Builtins.find(BuiltinID) != + AIXLongDouble64Builtins.end()) + Name = AIXLongDouble64Builtins[BuiltinID]; else - Name = Context.BuiltinInfo.getName(BuiltinID) + 10; + Name = Context.BuiltinInfo.getName(BuiltinID).substr(10); } llvm::FunctionType *Ty = @@ -675,6 +688,8 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF, Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); llvm::Instruction *Call = CGF.Builder.CreateCall(F); Call->setMetadata(llvm::LLVMContext::MD_range, RNode); + Call->setMetadata(llvm::LLVMContext::MD_noundef, + llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return Call; } @@ -1164,12 +1179,12 @@ enum class CodeGenFunction::MSVCIntrin { __fastfail, }; -static Optional<CodeGenFunction::MSVCIntrin> +static std::optional<CodeGenFunction::MSVCIntrin> translateArmToMsvcIntrin(unsigned BuiltinID) { using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: - return None; + return std::nullopt; case clang::ARM::BI_BitScanForward: case clang::ARM::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; @@ -1310,12 +1325,12 @@ translateArmToMsvcIntrin(unsigned BuiltinID) { llvm_unreachable("must return from switch"); } -static Optional<CodeGenFunction::MSVCIntrin> +static std::optional<CodeGenFunction::MSVCIntrin> translateAarch64ToMsvcIntrin(unsigned BuiltinID) { using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: - return None; + return std::nullopt; case clang::AArch64::BI_BitScanForward: case clang::AArch64::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; @@ -1464,12 +1479,12 @@ translateAarch64ToMsvcIntrin(unsigned BuiltinID) { llvm_unreachable("must return from switch"); } -static Optional<CodeGenFunction::MSVCIntrin> +static std::optional<CodeGenFunction::MSVCIntrin> translateX86ToMsvcIntrin(unsigned BuiltinID) { using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: - return None; + return std::nullopt; case clang::X86::BI_BitScanForward: case clang::X86::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; @@ -1704,7 +1719,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, SanitizerHandler::InvalidBuiltin, {EmitCheckSourceLocation(E->getExprLoc()), llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, - None); + std::nullopt); return ArgValue; } @@ -2204,7 +2219,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // might. Also, math builtins have the same semantics as their math library // twins. Thus, we can transform math library and builtin calls to their // LLVM counterparts if the call is marked 'const' (known to never set errno). - if (FD->hasAttr<ConstAttr>()) { + // In case FP exceptions are enabled, the experimental versions of the + // intrinsics model those. + bool ConstWithoutErrnoAndExceptions = + getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID); + bool ConstWithoutExceptions = + getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID); + if (FD->hasAttr<ConstAttr>() || + ((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) && + (!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIceil: case Builtin::BIceilf: @@ -2515,11 +2538,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_va_start: case Builtin::BI__va_start: case Builtin::BI__builtin_va_end: - return RValue::get( - EmitVAStartEnd(BuiltinID == Builtin::BI__va_start - ? EmitScalarExpr(E->getArg(0)) - : EmitVAListRef(E->getArg(0)).getPointer(), - BuiltinID != Builtin::BI__builtin_va_end)); + EmitVAStartEnd(BuiltinID == Builtin::BI__va_start + ? EmitScalarExpr(E->getArg(0)) + : EmitVAListRef(E->getArg(0)).getPointer(), + BuiltinID != Builtin::BI__builtin_va_end); + return RValue::get(nullptr); case Builtin::BI__builtin_va_copy: { Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); @@ -2528,8 +2551,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, DstPtr = Builder.CreateBitCast(DstPtr, Type); SrcPtr = Builder.CreateBitCast(SrcPtr, Type); - return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), - {DstPtr, SrcPtr})); + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr}); + return RValue::get(nullptr); } case Builtin::BI__builtin_abs: case Builtin::BI__builtin_labs: @@ -2778,6 +2801,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_assume_aligned: { const Expr *Ptr = E->getArg(0); Value *PtrValue = EmitScalarExpr(Ptr); + if (PtrValue->getType() != VoidPtrTy) + PtrValue = EmitCastToVoidPtr(PtrValue); Value *OffsetValue = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; @@ -2799,7 +2824,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); - return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); + Builder.CreateCall(FnAssume, ArgValue); + return RValue::get(nullptr); } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target @@ -2920,7 +2946,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); - return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); + Builder.CreateCall(F, {Address, RW, Locality, Data}); + return RValue::get(nullptr); } case Builtin::BI__builtin_readcyclecounter: { Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); @@ -2933,9 +2960,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: - return RValue::get(EmitTrapCall(Intrinsic::trap)); + EmitTrapCall(Intrinsic::trap); + return RValue::get(nullptr); case Builtin::BI__debugbreak: - return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); + EmitTrapCall(Intrinsic::debugtrap); + return RValue::get(nullptr); case Builtin::BI__builtin_unreachable: { EmitUnreachable(E->getExprLoc()); @@ -2971,7 +3000,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Ordered comparisons: we know the arguments to these are matching scalar // floating point values. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); - // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); @@ -3051,16 +3079,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_ceil: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + case Builtin::BI__builtin_elementwise_cos: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos")); case Builtin::BI__builtin_elementwise_floor: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor")); case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); + case Builtin::BI__builtin_elementwise_sin: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin")); + case Builtin::BI__builtin_elementwise_trunc: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc")); - + case Builtin::BI__builtin_elementwise_canonicalize: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.trunc")); + case Builtin::BI__builtin_elementwise_copysign: + return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -3294,7 +3333,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_flt_rounds: { - Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds); + Function *F = CGM.getIntrinsic(Intrinsic::get_rounding); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F); @@ -3717,7 +3756,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_unwind_init: { Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); - return RValue::get(Builder.CreateCall(F)); + Builder.CreateCall(F); + return RValue::get(nullptr); } case Builtin::BI__builtin_extend_pointer: { // Extends a pointer to the size of an _Unwind_Word, which is @@ -4210,7 +4250,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, StringRef WideBytes = Str->getBytes(); std::string StrUtf8; if (!convertUTF16ToUTF8String( - makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { + ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); continue; } @@ -4226,8 +4266,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); - llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, - AnnVal->getType()); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::annotation, + {AnnVal->getType(), CGM.ConstGlobalsPtrTy}); // Get the annotation string, go through casts. Sema requires this to be a // non-wide string literal, potentially casted, so the cast<> is safe. @@ -4478,8 +4519,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); case Builtin::BI__builtin_operator_delete: - return EmitBuiltinNewDeleteCall( + EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); + return RValue::get(nullptr); case Builtin::BI__builtin_is_aligned: return EmitBuiltinIsAligned(E); @@ -4644,20 +4686,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__fastfail: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); - case Builtin::BI__builtin_coro_size: { - auto & Context = getContext(); - auto SizeTy = Context.getSizeType(); - auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); - return RValue::get(Builder.CreateCall(F)); - } - case Builtin::BI__builtin_coro_id: return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); case Builtin::BI__builtin_coro_promise: return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); case Builtin::BI__builtin_coro_resume: - return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); + EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); + return RValue::get(nullptr); case Builtin::BI__builtin_coro_frame: return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); case Builtin::BI__builtin_coro_noop: @@ -4665,7 +4700,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_coro_free: return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); case Builtin::BI__builtin_coro_destroy: - return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); + EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); + return RValue::get(nullptr); case Builtin::BI__builtin_coro_done: return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); case Builtin::BI__builtin_coro_alloc: @@ -4676,6 +4712,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); case Builtin::BI__builtin_coro_suspend: return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); + case Builtin::BI__builtin_coro_size: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_size); + case Builtin::BI__builtin_coro_align: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_align); // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions case Builtin::BIread_pipe: @@ -5034,7 +5074,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitLifetimeEnd(TmpSize, TmpPtr); return Call; } - LLVM_FALLTHROUGH; + [[fallthrough]]; } // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. @@ -5094,7 +5134,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Val = EmitScalarExpr(E->getArg(0)); Address Address = EmitPointerWithAlignment(E->getArg(1)); Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); - return RValue::get(Builder.CreateStore(HalfVal, Address)); + Builder.CreateStore(HalfVal, Address); + return RValue::get(nullptr); } case Builtin::BI__builtin_load_half: { Address Address = EmitPointerWithAlignment(E->getArg(0)); @@ -5265,7 +5306,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); // See if we have a target specific intrinsic. - const char *Name = getContext().BuiltinInfo.getName(BuiltinID); + StringRef Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; StringRef Prefix = llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); @@ -5359,6 +5400,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, V = Builder.CreateBitCast(V, RetTy); } + if (RetTy->isVoidTy()) + return RValue::get(nullptr); + return RValue::get(V); } @@ -5376,6 +5420,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { switch (EvalKind) { case TEK_Scalar: + if (V->getType()->isVoidTy()) + return RValue::get(nullptr); return RValue::get(V); case TEK_Aggregate: return RValue::getAggregate(ReturnValue.getValue(), @@ -5433,6 +5479,9 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -5630,7 +5679,7 @@ struct ARMVectorIntrinsicInfo { TypeModifier } static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { - NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0), + NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0), NEONMAP0(splat_lane_v), NEONMAP0(splat_laneq_v), NEONMAP0(splatq_lane_v), @@ -5642,21 +5691,27 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), NEONMAP0(vaddq_v), - NEONMAP1(vaesdq_v, arm_neon_aesd, 0), - NEONMAP1(vaeseq_v, arm_neon_aese, 0), - NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), - NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), - NEONMAP1(vbfdot_v, arm_neon_bfdot, 0), - NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0), - NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0), - NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0), - NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0), + NEONMAP1(vaesdq_u8, arm_neon_aesd, 0), + NEONMAP1(vaeseq_u8, arm_neon_aese, 0), + NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0), + NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0), + NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0), + NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), - NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), - NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, arm_neon_vacge, 0), NEONMAP1(vcageq_v, arm_neon_vacge, 0), NEONMAP1(vcagt_v, arm_neon_vacgt, 0), @@ -5682,90 +5737,96 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), - NEONMAP0(vcvt_f16_v), + NEONMAP0(vcvt_f16_s16), + NEONMAP0(vcvt_f16_u16), NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), - NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0), NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), - NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), - NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), - NEONMAP0(vcvt_s16_v), + NEONMAP0(vcvt_s16_f16), NEONMAP0(vcvt_s32_v), NEONMAP0(vcvt_s64_v), - NEONMAP0(vcvt_u16_v), + NEONMAP0(vcvt_u16_f16), NEONMAP0(vcvt_u32_v), NEONMAP0(vcvt_u64_v), - NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), + NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), - NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0), + NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), - NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), + NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), - NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), + NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0), - NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), + NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), - NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0), + NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0), NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), - NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0), + NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), - NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0), + NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), - NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0), + NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), - NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0), + NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0), NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), - NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0), + NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), - NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0), + NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), - NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0), + NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), - NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0), + NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0), NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), - NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0), + NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), - NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), + NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), - NEONMAP0(vcvtq_f16_v), + NEONMAP0(vcvtq_f16_s16), + NEONMAP0(vcvtq_f16_u16), NEONMAP0(vcvtq_f32_v), - NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0), NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), - NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), - NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), - NEONMAP0(vcvtq_s16_v), + NEONMAP0(vcvtq_s16_f16), NEONMAP0(vcvtq_s32_v), NEONMAP0(vcvtq_s64_v), - NEONMAP0(vcvtq_u16_v), + NEONMAP0(vcvtq_u16_f16), NEONMAP0(vcvtq_u32_v), NEONMAP0(vcvtq_u64_v), - NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0), - NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0), + NEONMAP1(vdot_s32, arm_neon_sdot, 0), + NEONMAP1(vdot_u32, arm_neon_udot, 0), + NEONMAP1(vdotq_s32, arm_neon_sdot, 0), + NEONMAP1(vdotq_u32, arm_neon_udot, 0), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), @@ -5810,7 +5871,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), - NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0), + NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0), + NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), @@ -5837,10 +5899,14 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), - NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType), - NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), @@ -5883,12 +5949,12 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), - NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), - NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), - NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), - NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), - NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), - NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), + NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0), + NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0), + NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0), + NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0), + NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0), + NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0), NEONMAP0(vshl_n_v), NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), NEONMAP0(vshll_n_v), @@ -5922,9 +5988,9 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vtrnq_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), - NEONMAP1(vusdot_v, arm_neon_usdot, 0), - NEONMAP1(vusdotq_v, arm_neon_usdot, 0), - NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0), + NEONMAP1(vusdot_s32, arm_neon_usdot, 0), + NEONMAP1(vusdotq_s32, arm_neon_usdot, 0), + NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0), NEONMAP0(vuzp_v), NEONMAP0(vuzpq_v), NEONMAP0(vzip_v), @@ -5932,7 +5998,7 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { }; static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { - NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0), + NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0), NEONMAP0(splat_lane_v), NEONMAP0(splat_laneq_v), NEONMAP0(splatq_lane_v), @@ -5943,20 +6009,33 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vaddhn_v), NEONMAP0(vaddq_p128), NEONMAP0(vaddq_v), - NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), - NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), - NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), - NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), - NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), - NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0), - NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0), - NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0), - NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0), - NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0), - NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), - NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0), + NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0), + NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0), + NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0), + NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0), + NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), + NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, aarch64_neon_facge, 0), NEONMAP1(vcageq_v, aarch64_neon_facge, 0), NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), @@ -5979,57 +6058,82 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), - NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType), - NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType), - NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType), - NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType), - NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType), - NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType), - NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType), - NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), - NEONMAP0(vcvt_f16_v), + NEONMAP0(vcvt_f16_s16), + NEONMAP0(vcvt_f16_u16), NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), - NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), - NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), - NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), - NEONMAP0(vcvtq_f16_v), + NEONMAP0(vcvtq_f16_s16), + NEONMAP0(vcvtq_f16_u16), NEONMAP0(vcvtq_f32_v), - NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0), - NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0), + NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), - NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), - NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), - NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0), - NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0), - NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), + NEONMAP1(vdot_u32, aarch64_neon_udot, 0), + NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0), + NEONMAP1(vdotq_u32, aarch64_neon_udot, 0), + NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), NEONMAP0(vfmaq_v), - NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0), - NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0), - NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0), - NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0), - NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0), - NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0), - NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0), - NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0), NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), @@ -6040,7 +6144,8 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), - NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0), + NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0), + NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), @@ -6066,10 +6171,14 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), - NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType), - NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), @@ -6087,21 +6196,21 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), - NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0), + NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0), NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), - NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType), - NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType), - NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType), - NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType), - NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType), - NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType), - NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType), - NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), NEONMAP0(vrndi_v), NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), @@ -6113,16 +6222,16 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), - NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), - NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), - NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), - NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), - NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), - NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), - NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0), - NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0), - NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0), - NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0), + NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0), + NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0), + NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0), + NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0), + NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0), + NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0), + NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0), + NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0), + NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0), + NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0), NEONMAP0(vshl_n_v), NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), NEONMAP0(vshll_n_v), @@ -6131,15 +6240,15 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vshr_n_v), NEONMAP0(vshrn_n_v), NEONMAP0(vshrq_n_v), - NEONMAP1(vsm3partw1q_v, aarch64_crypto_sm3partw1, 0), - NEONMAP1(vsm3partw2q_v, aarch64_crypto_sm3partw2, 0), - NEONMAP1(vsm3ss1q_v, aarch64_crypto_sm3ss1, 0), - NEONMAP1(vsm3tt1aq_v, aarch64_crypto_sm3tt1a, 0), - NEONMAP1(vsm3tt1bq_v, aarch64_crypto_sm3tt1b, 0), - NEONMAP1(vsm3tt2aq_v, aarch64_crypto_sm3tt2a, 0), - NEONMAP1(vsm3tt2bq_v, aarch64_crypto_sm3tt2b, 0), - NEONMAP1(vsm4ekeyq_v, aarch64_crypto_sm4ekey, 0), - NEONMAP1(vsm4eq_v, aarch64_crypto_sm4e, 0), + NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0), + NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0), + NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0), + NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0), + NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0), + NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0), + NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0), + NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0), + NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0), NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), @@ -6149,10 +6258,10 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vsubhn_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), - NEONMAP1(vusdot_v, aarch64_neon_usdot, 0), - NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0), - NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0), - NEONMAP1(vxarq_v, aarch64_crypto_xar, 0), + NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0), + NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0), + NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0), + NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), }; static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { @@ -6394,6 +6503,148 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), }; +// Some intrinsics are equivalent for codegen. +static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { + { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, }, + { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, }, + { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, }, + { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, }, + { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, }, + { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, + { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, + { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, + { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, }, + { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, }, + { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, + { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, + { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, + { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, }, + { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, }, + { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, }, + { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, }, + { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, }, + { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, }, + { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, }, + { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, }, + { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, }, + { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, }, + { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, }, + { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, }, + { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, + { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, + { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, + { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, }, + { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, }, + { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, + { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, + { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, + { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, }, + { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, }, + { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, }, + { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v }, + { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v }, + { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v }, + { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v }, + { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v }, + { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v }, + { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v }, + { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v }, + { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v }, + { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v }, + { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v }, + { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v }, + { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v }, + { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v }, + { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v }, + { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v }, + { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v }, + { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v }, + { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v }, + { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v }, + { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v }, + { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v }, + { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v }, + { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v }, + { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v }, + { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v }, + { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v }, + { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v }, + { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v }, + { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v }, + { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, }, + { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, }, + { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, }, + { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, }, + { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, }, + { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, }, + { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, }, + { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, }, + { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, }, + { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, }, + { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, }, + { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, }, + { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, }, + { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, }, + { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, }, + { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, }, + { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, }, + { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, }, + { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, }, + { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, }, + { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, }, + { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, }, + { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, }, + { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, }, + { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, }, + { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, }, + { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, }, + { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, }, + { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, }, + { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, }, + { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, }, + { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, }, + { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, }, + { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, }, + { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, }, + { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, }, + { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, }, + { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, }, + { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, }, + { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, }, + { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, }, + { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, }, + { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, }, + { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, }, + { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v }, + { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v }, + { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v }, + { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v }, + { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v }, + { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v }, + { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v }, + { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v }, + { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v }, + { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v }, + { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v }, + { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v }, + { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v }, + { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v }, + { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v }, + { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v }, + { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v }, + { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v }, + { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v }, + { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, + { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, + { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, + { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, }, + { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, }, + { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, }, + { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, }, + { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, }, + { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, }, +}; + #undef NEONMAP0 #undef NEONMAP1 #undef NEONMAP2 @@ -6531,13 +6782,13 @@ static Value *EmitCommonNeonSISDBuiltinExpr( Ops[j] = CGF.Builder.CreateTruncOrBitCast( Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType()); Ops[j] = - CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); + CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0); } Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); - if (ResultType->getPrimitiveSizeInBits().getFixedSize() < - Result->getType()->getPrimitiveSizeInBits().getFixedSize()) + if (ResultType->getPrimitiveSizeInBits().getFixedValue() < + Result->getType()->getPrimitiveSizeInBits().getFixedValue()) return CGF.Builder.CreateExtractElement(Result, C0); return CGF.Builder.CreateBitCast(Result, ResultType, s); @@ -6550,7 +6801,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( llvm::Triple::ArchType Arch) { // Get the last argument, which specifies the vector type. const Expr *Arg = E->getArg(E->getNumArgs() - 1); - Optional<llvm::APSInt> NeonTypeConst = + std::optional<llvm::APSInt> NeonTypeConst = Arg->getIntegerConstantExpr(getContext()); if (!NeonTypeConst) return nullptr; @@ -6634,7 +6885,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcalt_v: case NEON::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vcage_v: case NEON::BI__builtin_neon_vcageq_v: case NEON::BI__builtin_neon_vcagt_v: @@ -6690,17 +6941,25 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( HasLegalHalfType); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); - case NEON::BI__builtin_neon_vcvt_f16_v: - case NEON::BI__builtin_neon_vcvtq_f16_v: + case NEON::BI__builtin_neon_vcvt_f16_s16: + case NEON::BI__builtin_neon_vcvt_f16_u16: + case NEON::BI__builtin_neon_vcvtq_f16_s16: + case NEON::BI__builtin_neon_vcvtq_f16_u16: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), HasLegalHalfType); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); - case NEON::BI__builtin_neon_vcvt_n_f16_v: + case NEON::BI__builtin_neon_vcvt_n_f16_s16: + case NEON::BI__builtin_neon_vcvt_n_f16_u16: + case NEON::BI__builtin_neon_vcvtq_n_f16_s16: + case NEON::BI__builtin_neon_vcvtq_n_f16_u16: { + llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vcvt_n"); + } case NEON::BI__builtin_neon_vcvt_n_f32_v: case NEON::BI__builtin_neon_vcvt_n_f64_v: - case NEON::BI__builtin_neon_vcvtq_n_f16_v: case NEON::BI__builtin_neon_vcvtq_n_f32_v: case NEON::BI__builtin_neon_vcvtq_n_f64_v: { llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; @@ -6708,15 +6967,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); } - case NEON::BI__builtin_neon_vcvt_n_s16_v: + case NEON::BI__builtin_neon_vcvt_n_s16_f16: case NEON::BI__builtin_neon_vcvt_n_s32_v: - case NEON::BI__builtin_neon_vcvt_n_u16_v: + case NEON::BI__builtin_neon_vcvt_n_u16_f16: case NEON::BI__builtin_neon_vcvt_n_u32_v: case NEON::BI__builtin_neon_vcvt_n_s64_v: case NEON::BI__builtin_neon_vcvt_n_u64_v: - case NEON::BI__builtin_neon_vcvtq_n_s16_v: + case NEON::BI__builtin_neon_vcvtq_n_s16_f16: case NEON::BI__builtin_neon_vcvtq_n_s32_v: - case NEON::BI__builtin_neon_vcvtq_n_u16_v: + case NEON::BI__builtin_neon_vcvtq_n_u16_f16: case NEON::BI__builtin_neon_vcvtq_n_u32_v: case NEON::BI__builtin_neon_vcvtq_n_s64_v: case NEON::BI__builtin_neon_vcvtq_n_u64_v: { @@ -6728,64 +6987,64 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: - case NEON::BI__builtin_neon_vcvt_s16_v: - case NEON::BI__builtin_neon_vcvt_u16_v: + case NEON::BI__builtin_neon_vcvt_s16_f16: + case NEON::BI__builtin_neon_vcvt_u16_f16: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: - case NEON::BI__builtin_neon_vcvtq_s16_v: - case NEON::BI__builtin_neon_vcvtq_u16_v: { + case NEON::BI__builtin_neon_vcvtq_s16_f16: + case NEON::BI__builtin_neon_vcvtq_u16_f16: { Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } - case NEON::BI__builtin_neon_vcvta_s16_v: + case NEON::BI__builtin_neon_vcvta_s16_f16: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: - case NEON::BI__builtin_neon_vcvta_u16_v: + case NEON::BI__builtin_neon_vcvta_u16_f16: case NEON::BI__builtin_neon_vcvta_u32_v: case NEON::BI__builtin_neon_vcvta_u64_v: - case NEON::BI__builtin_neon_vcvtaq_s16_v: + case NEON::BI__builtin_neon_vcvtaq_s16_f16: case NEON::BI__builtin_neon_vcvtaq_s32_v: case NEON::BI__builtin_neon_vcvtaq_s64_v: - case NEON::BI__builtin_neon_vcvtaq_u16_v: + case NEON::BI__builtin_neon_vcvtaq_u16_f16: case NEON::BI__builtin_neon_vcvtaq_u32_v: case NEON::BI__builtin_neon_vcvtaq_u64_v: - case NEON::BI__builtin_neon_vcvtn_s16_v: + case NEON::BI__builtin_neon_vcvtn_s16_f16: case NEON::BI__builtin_neon_vcvtn_s32_v: case NEON::BI__builtin_neon_vcvtn_s64_v: - case NEON::BI__builtin_neon_vcvtn_u16_v: + case NEON::BI__builtin_neon_vcvtn_u16_f16: case NEON::BI__builtin_neon_vcvtn_u32_v: case NEON::BI__builtin_neon_vcvtn_u64_v: - case NEON::BI__builtin_neon_vcvtnq_s16_v: + case NEON::BI__builtin_neon_vcvtnq_s16_f16: case NEON::BI__builtin_neon_vcvtnq_s32_v: case NEON::BI__builtin_neon_vcvtnq_s64_v: - case NEON::BI__builtin_neon_vcvtnq_u16_v: + case NEON::BI__builtin_neon_vcvtnq_u16_f16: case NEON::BI__builtin_neon_vcvtnq_u32_v: case NEON::BI__builtin_neon_vcvtnq_u64_v: - case NEON::BI__builtin_neon_vcvtp_s16_v: + case NEON::BI__builtin_neon_vcvtp_s16_f16: case NEON::BI__builtin_neon_vcvtp_s32_v: case NEON::BI__builtin_neon_vcvtp_s64_v: - case NEON::BI__builtin_neon_vcvtp_u16_v: + case NEON::BI__builtin_neon_vcvtp_u16_f16: case NEON::BI__builtin_neon_vcvtp_u32_v: case NEON::BI__builtin_neon_vcvtp_u64_v: - case NEON::BI__builtin_neon_vcvtpq_s16_v: + case NEON::BI__builtin_neon_vcvtpq_s16_f16: case NEON::BI__builtin_neon_vcvtpq_s32_v: case NEON::BI__builtin_neon_vcvtpq_s64_v: - case NEON::BI__builtin_neon_vcvtpq_u16_v: + case NEON::BI__builtin_neon_vcvtpq_u16_f16: case NEON::BI__builtin_neon_vcvtpq_u32_v: case NEON::BI__builtin_neon_vcvtpq_u64_v: - case NEON::BI__builtin_neon_vcvtm_s16_v: + case NEON::BI__builtin_neon_vcvtm_s16_f16: case NEON::BI__builtin_neon_vcvtm_s32_v: case NEON::BI__builtin_neon_vcvtm_s64_v: - case NEON::BI__builtin_neon_vcvtm_u16_v: + case NEON::BI__builtin_neon_vcvtm_u16_f16: case NEON::BI__builtin_neon_vcvtm_u32_v: case NEON::BI__builtin_neon_vcvtm_u64_v: - case NEON::BI__builtin_neon_vcvtmq_s16_v: + case NEON::BI__builtin_neon_vcvtmq_s16_f16: case NEON::BI__builtin_neon_vcvtmq_s32_v: case NEON::BI__builtin_neon_vcvtmq_s64_v: - case NEON::BI__builtin_neon_vcvtmq_u16_v: + case NEON::BI__builtin_neon_vcvtmq_u16_f16: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtmq_u64_v: { llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; @@ -6861,7 +7120,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { - Value *V = UndefValue::get(Ty); + Value *V = PoisonValue::get(Ty); PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); LoadInst *Ld = Builder.CreateLoad(PtrOp0); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); @@ -6879,7 +7138,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( for (unsigned I = 2; I < Ops.size() - 1; ++I) Ops[I] = Builder.CreateBitCast(Ops[I], Ty); Ops.push_back(getAlignmentValue32(PtrOp1)); - Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); + Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); @@ -6983,10 +7242,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vrshrq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); - case NEON::BI__builtin_neon_vsha512hq_v: - case NEON::BI__builtin_neon_vsha512h2q_v: - case NEON::BI__builtin_neon_vsha512su0q_v: - case NEON::BI__builtin_neon_vsha512su1q_v: { + case NEON::BI__builtin_neon_vsha512hq_u64: + case NEON::BI__builtin_neon_vsha512h2q_u64: + case NEON::BI__builtin_neon_vsha512su0q_u64: + case NEON::BI__builtin_neon_vsha512su1q_u64: { Function *F = CGM.getIntrinsic(Int); return EmitNeonCall(F, Ops, ""); } @@ -7038,18 +7297,18 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops.push_back(getAlignmentValue32(PtrOp0)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); } - case NEON::BI__builtin_neon_vsm3partw1q_v: - case NEON::BI__builtin_neon_vsm3partw2q_v: - case NEON::BI__builtin_neon_vsm3ss1q_v: - case NEON::BI__builtin_neon_vsm4ekeyq_v: - case NEON::BI__builtin_neon_vsm4eq_v: { + case NEON::BI__builtin_neon_vsm3partw1q_u32: + case NEON::BI__builtin_neon_vsm3partw2q_u32: + case NEON::BI__builtin_neon_vsm3ss1q_u32: + case NEON::BI__builtin_neon_vsm4ekeyq_u32: + case NEON::BI__builtin_neon_vsm4eq_u32: { Function *F = CGM.getIntrinsic(Int); return EmitNeonCall(F, Ops, ""); } - case NEON::BI__builtin_neon_vsm3tt1aq_v: - case NEON::BI__builtin_neon_vsm3tt1bq_v: - case NEON::BI__builtin_neon_vsm3tt2aq_v: - case NEON::BI__builtin_neon_vsm3tt2bq_v: { + case NEON::BI__builtin_neon_vsm3tt1aq_u32: + case NEON::BI__builtin_neon_vsm3tt1bq_u32: + case NEON::BI__builtin_neon_vsm3tt2aq_u32: + case NEON::BI__builtin_neon_vsm3tt2bq_u32: { Function *F = CGM.getIntrinsic(Int); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); return EmitNeonCall(F, Ops, ""); @@ -7135,7 +7394,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } return SV; } - case NEON::BI__builtin_neon_vxarq_v: { + case NEON::BI__builtin_neon_vxarq_u64: { Function *F = CGM.getIntrinsic(Int); Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); return EmitNeonCall(F, Ops, ""); @@ -7159,70 +7418,71 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } return SV; } - case NEON::BI__builtin_neon_vdot_v: - case NEON::BI__builtin_neon_vdotq_v: { + case NEON::BI__builtin_neon_vdot_s32: + case NEON::BI__builtin_neon_vdot_u32: + case NEON::BI__builtin_neon_vdotq_s32: + case NEON::BI__builtin_neon_vdotq_u32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; - Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); } - case NEON::BI__builtin_neon_vfmlal_low_v: - case NEON::BI__builtin_neon_vfmlalq_low_v: { + case NEON::BI__builtin_neon_vfmlal_low_f16: + case NEON::BI__builtin_neon_vfmlalq_low_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); } - case NEON::BI__builtin_neon_vfmlsl_low_v: - case NEON::BI__builtin_neon_vfmlslq_low_v: { + case NEON::BI__builtin_neon_vfmlsl_low_f16: + case NEON::BI__builtin_neon_vfmlslq_low_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); } - case NEON::BI__builtin_neon_vfmlal_high_v: - case NEON::BI__builtin_neon_vfmlalq_high_v: { + case NEON::BI__builtin_neon_vfmlal_high_f16: + case NEON::BI__builtin_neon_vfmlalq_high_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); } - case NEON::BI__builtin_neon_vfmlsl_high_v: - case NEON::BI__builtin_neon_vfmlslq_high_v: { + case NEON::BI__builtin_neon_vfmlsl_high_f16: + case NEON::BI__builtin_neon_vfmlslq_high_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); } - case NEON::BI__builtin_neon_vmmlaq_v: { + case NEON::BI__builtin_neon_vmmlaq_s32: + case NEON::BI__builtin_neon_vmmlaq_u32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; - Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla"); + return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla"); } - case NEON::BI__builtin_neon_vusmmlaq_v: { + case NEON::BI__builtin_neon_vusmmlaq_s32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla"); } - case NEON::BI__builtin_neon_vusdot_v: - case NEON::BI__builtin_neon_vusdotq_v: { + case NEON::BI__builtin_neon_vusdot_s32: + case NEON::BI__builtin_neon_vusdotq_s32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot"); } - case NEON::BI__builtin_neon_vbfdot_v: - case NEON::BI__builtin_neon_vbfdotq_v: { + case NEON::BI__builtin_neon_vbfdot_f32: + case NEON::BI__builtin_neon_vbfdotq_f32: { llvm::Type *InputTy = llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot"); } - case NEON::BI__builtin_neon___a32_vcvt_bf16_v: { + case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: { llvm::Type *Tys[1] = { Ty }; Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvtfp2bf"); @@ -7354,9 +7614,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg = "") { - // write and register intrinsics only support 32 and 64 bit operations. - assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) - && "Unsupported size for register."); + // write and register intrinsics only support 32, 64 and 128 bit operations. + assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) || + RegisterType->isIntegerTy(128)) && + "Unsupported size for register."); CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; @@ -7741,7 +8002,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Arg0 = EmitScalarExpr(E->getArg(0)); Value *Arg1 = EmitScalarExpr(E->getArg(1)); - // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w + // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w // intrinsics, hence we need different codegen for these cases. if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d || BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) { @@ -7802,7 +8063,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. - if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) + if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) return EmitMSVCBuiltinExpr(*MsvcIntId, E); // Deal with MVE builtins @@ -7812,6 +8073,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) return Result; + // Some intrinsics are equivalent - if they are use the base intrinsic ID. + auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) { + return P.first == BuiltinID; + }); + if (It != end(NEONEquivalentIntrinsicMap)) + BuiltinID = It->second; + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -7971,7 +8239,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Get the last argument, which specifies the vector type. assert(HasExtraArg); const Expr *Arg = E->getArg(E->getNumArgs()-1); - Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()); + std::optional<llvm::APSInt> Result = + Arg->getIntegerConstantExpr(getContext()); if (!Result) return nullptr; @@ -8007,7 +8276,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Many NEON builtins have identical semantics and uses in ARM and // AArch64. Emit these in a single function. - auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); + auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap); const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); if (Builtin) @@ -8037,7 +8306,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, int Indices[] = {1 - Lane, Lane}; return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane"); } - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vld1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); @@ -8077,7 +8346,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vsri_n_v: case NEON::BI__builtin_neon_vsriq_n_v: rightShift = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vsli_n_v: case NEON::BI__builtin_neon_vsliq_n_v: Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); @@ -8100,7 +8369,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Tys), Ops); } - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); @@ -8301,9 +8570,9 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(Addr)); Tys.push_back(ConvertType(Addr->getType())); - Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); + Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); Value *LoadResult = Builder.CreateCall(F, Ops); - Value *MvecOut = UndefValue::get(MvecLType); + Value *MvecOut = PoisonValue::get(MvecLType); for (unsigned i = 0; i < NumVectors; ++i) { Value *Vec = Builder.CreateExtractValue(LoadResult, i); MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); @@ -8343,7 +8612,7 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i < NumVectors; i++) Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); - Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); + Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); Value *ToReturn = nullptr; for (unsigned i = 0; i < NumVectors; i++) { Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); @@ -8409,7 +8678,8 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID // Get the last argument, which specifies the vector type. const Expr *Arg = E->getArg(E->getNumArgs() - 1); - Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext()); + std::optional<llvm::APSInt> Result = + Arg->getIntegerConstantExpr(CGF.getContext()); if (!Result) return nullptr; @@ -8425,29 +8695,25 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID // argument that specifies the vector type, need to handle each case. switch (BuiltinID) { case NEON::BI__builtin_neon_vtbl1_v: { - return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, - Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, - "vtbl1"); + return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1], + Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); } case NEON::BI__builtin_neon_vtbl2_v: { - return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, - Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, - "vtbl1"); + return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2], + Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); } case NEON::BI__builtin_neon_vtbl3_v: { - return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, - Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, - "vtbl2"); + return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3], + Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); } case NEON::BI__builtin_neon_vtbl4_v: { - return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, - Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, - "vtbl2"); + return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4], + Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); } case NEON::BI__builtin_neon_vtbx1_v: { Value *TblRes = - packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], - Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); + packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty, + Intrinsic::aarch64_neon_tbl1, "vtbl1"); llvm::Constant *EightV = ConstantInt::get(Ty, 8); Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); @@ -8458,14 +8724,13 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); } case NEON::BI__builtin_neon_vtbx2_v: { - return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], - Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, - "vtbx1"); + return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3], + Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1"); } case NEON::BI__builtin_neon_vtbx3_v: { Value *TblRes = - packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], - Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); + packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty, + Intrinsic::aarch64_neon_tbl2, "vtbl2"); llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], @@ -8477,9 +8742,8 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); } case NEON::BI__builtin_neon_vtbx4_v: { - return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], - Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, - "vtbx2"); + return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5], + Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2"); } case NEON::BI__builtin_neon_vqtbl1_v: case NEON::BI__builtin_neon_vqtbl1q_v: @@ -8518,7 +8782,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4); Op = Builder.CreateBitCast(Op, Int16Ty); - Value *V = UndefValue::get(VTy); + Value *V = PoisonValue::get(VTy); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Op = Builder.CreateInsertElement(V, Op, CI); return Op; @@ -8732,8 +8996,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) { unsigned BytesPerElt = OverloadedTy->getElementType()->getScalarSizeInBits() / 8; - Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); - Ops[2] = Builder.CreateMul(Ops[2], Scale); + Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); } Value *Call = Builder.CreateCall(F, Ops); @@ -8792,8 +9055,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) { unsigned BytesPerElt = OverloadedTy->getElementType()->getScalarSizeInBits() / 8; - Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); - Ops[3] = Builder.CreateMul(Ops[3], Scale); + Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt)); } return Builder.CreateCall(F, Ops); @@ -8823,8 +9085,8 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, // Index needs to be passed as scaled offset. llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; - Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); - Ops[2] = Builder.CreateMul(Ops[2], Scale); + if (BytesPerElt > 1) + Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); } } @@ -8841,13 +9103,13 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, unsigned N; switch (IntID) { - case Intrinsic::aarch64_sve_ld2: + case Intrinsic::aarch64_sve_ld2_sret: N = 2; break; - case Intrinsic::aarch64_sve_ld3: + case Intrinsic::aarch64_sve_ld3_sret: N = 3; break; - case Intrinsic::aarch64_sve_ld4: + case Intrinsic::aarch64_sve_ld4_sret: N = 4; break; default: @@ -8858,12 +9120,22 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy); - Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); - BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); - BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); - Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()}); - return Builder.CreateCall(F, { Predicate, BasePtr }); + // Does the load have an offset? + if (Ops.size() > 2) + BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); + + BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); + Function *F = CGM.getIntrinsic(IntID, {VTy}); + Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); + unsigned MinElts = VTy->getMinNumElements(); + Value *Ret = llvm::PoisonValue::get(RetTy); + for (unsigned I = 0; I < N; I++) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + Value *SRet = Builder.CreateExtractValue(Call, I); + Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); + } + return Ret; } Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, @@ -8887,23 +9159,25 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, default: llvm_unreachable("unknown intrinsic!"); } - auto TupleTy = - llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N); Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy); - Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0); - Value *Val = Ops.back(); - BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); + + // Does the store have an offset? + if (Ops.size() > 3) + BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); + BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); + Value *Val = Ops.back(); // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we // need to break up the tuple vector. SmallVector<llvm::Value*, 5> Operands; - Function *FExtr = - CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); - for (unsigned I = 0; I < N; ++I) - Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)})); + unsigned MinElts = VTy->getElementCount().getKnownMinValue(); + for (unsigned I = 0; I < N; ++I) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx)); + } Operands.append({Predicate, BasePtr}); Function *F = CGM.getIntrinsic(IntID, { VTy }); @@ -8978,8 +9252,10 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); - Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); - BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); + + // Does the load have an offset? + if (Ops.size() > 2) + BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); @@ -9006,8 +9282,10 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); - Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0); - BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); + + // Does the store have an offset? + if (Ops.size() == 4) + BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); // Last value is always the data llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); @@ -9024,8 +9302,8 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { - auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty); - return Builder.CreateCall(F, Scalar); + return Builder.CreateVectorSplat( + cast<llvm::VectorType>(Ty)->getElementCount(), Scalar); } Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { @@ -9069,16 +9347,46 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, if (TypeFlags.isOverloadWhileRW()) return {getSVEPredType(TypeFlags), Ops[0]->getType()}; - if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet()) + if (TypeFlags.isOverloadCvt()) return {Ops[0]->getType(), Ops.back()->getType()}; - if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet()) - return {ResultType, Ops[0]->getType()}; - assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); return {DefaultType}; } +Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, + llvm::Type *Ty, + ArrayRef<Value *> Ops) { + assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) && + "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()"); + + unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue(); + auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>( + TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty); + Value *Idx = ConstantInt::get(CGM.Int64Ty, + I * SingleVecTy->getMinNumElements()); + + if (TypeFlags.isTupleSet()) + return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx); + return Builder.CreateExtractVector(Ty, Ops[0], Idx); +} + +Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, + llvm::Type *Ty, + ArrayRef<Value *> Ops) { + assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate"); + + auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType()); + unsigned MinElts = SrcTy->getMinNumElements(); + Value *Call = llvm::PoisonValue::get(Ty); + for (unsigned I = 0; I < Ops.size(); I++) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx); + } + + return Call; +} + Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { // Find out if any arguments are required to be integer constant expressions. @@ -9101,7 +9409,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, else { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. - Optional<llvm::APSInt> Result = + std::optional<llvm::APSInt> Result = E->getArg(i)->getIntegerConstantExpr(getContext()); assert(Result && "Expected argument to be a constant"); @@ -9133,6 +9441,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isStructStore()) return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) + return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); + else if (TypeFlags.isTupleCreate()) + return EmitSVETupleCreate(TypeFlags, Ty, Ops); else if (TypeFlags.isUndef()) return UndefValue::get(Ty); else if (Builtin->LLVMIntrinsic != 0) { @@ -9171,8 +9483,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { llvm::Type *OpndTy = Ops[1]->getType(); auto *SplatZero = Constant::getNullValue(OpndTy); - Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy); - Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero}); + Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero); } Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, @@ -9285,12 +9596,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy)); Value *Vec = BuildVector(VecOps); - SVETypeFlags TypeFlags(Builtin->TypeModifier); - Value *Pred = EmitSVEAllTruePred(TypeFlags); - llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy); Value *InsertSubVec = Builder.CreateInsertVector( - OverloadedTy, UndefValue::get(OverloadedTy), Vec, Builder.getInt64(0)); + OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy); @@ -9300,6 +9608,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (!IsBoolTy) return DupQLane; + SVETypeFlags TypeFlags(Builtin->TypeModifier); + Value *Pred = EmitSVEAllTruePred(TypeFlags); + // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne : Intrinsic::aarch64_sve_cmpne_wide, @@ -9346,12 +9657,12 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svtbl2_f32: case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); - auto VTy = cast<llvm::VectorType>(getSVEType(TF)); - auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy); - Function *FExtr = - CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); - Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)}); - Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)}); + auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF)); + Value *V0 = Builder.CreateExtractVector(VTy, Ops[0], + ConstantInt::get(CGM.Int64Ty, 0)); + unsigned MinElts = VTy->getMinNumElements(); + Value *V1 = Builder.CreateExtractVector( + VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); return Builder.CreateCall(F, {V0, V1, Ops[1]}); } @@ -9398,7 +9709,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svdup_neonq_f32: case SVE::BI__builtin_sve_svdup_neonq_f64: case SVE::BI__builtin_sve_svdup_neonq_bf16: { - Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0], + Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0], Builder.getInt64(0)); return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty}, {Insert, Builder.getInt64(0)}); @@ -9449,29 +9760,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } - if (BuiltinID == clang::AArch64::BI__builtin_arm_prefetch) { - Value *Address = EmitScalarExpr(E->getArg(0)); - Value *RW = EmitScalarExpr(E->getArg(1)); - Value *CacheLevel = EmitScalarExpr(E->getArg(2)); - Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); - Value *IsData = EmitScalarExpr(E->getArg(4)); - - Value *Locality = nullptr; - if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { - // Temporal fetch, needs to convert cache level to locality. - Locality = llvm::ConstantInt::get(Int32Ty, - -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); - } else { - // Streaming fetch. - Locality = llvm::ConstantInt::get(Int32Ty, 0); - } - - // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify - // PLDL3STRM or PLDL2STRM. - Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); - return Builder.CreateCall(F, {Address, RW, Locality, IsData}); - } - if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); @@ -9498,32 +9786,32 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, "cls"); } - if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32zf || - BuiltinID == clang::AArch64::BI__builtin_arm_frint32z) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf || + BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty), Arg, "frint32z"); } - if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64zf || - BuiltinID == clang::AArch64::BI__builtin_arm_frint64z) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf || + BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty), Arg, "frint64z"); } - if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32xf || - BuiltinID == clang::AArch64::BI__builtin_arm_frint32x) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf || + BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty), Arg, "frint32x"); } - if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64xf || - BuiltinID == clang::AArch64::BI__builtin_arm_frint64x) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf || + BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty), @@ -9875,32 +10163,43 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || + BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || BuiltinID == clang::AArch64::BI__builtin_arm_wsr || BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 || + BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) { SpecialRegisterAccessKind AccessKind = Write; if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || + BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_rsrp) AccessKind = VolatileRead; bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || BuiltinID == clang::AArch64::BI__builtin_arm_wsrp; - bool Is64Bit = BuiltinID != clang::AArch64::BI__builtin_arm_rsr && - BuiltinID != clang::AArch64::BI__builtin_arm_wsr; + bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr || + BuiltinID == clang::AArch64::BI__builtin_arm_wsr; + + bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || + BuiltinID == clang::AArch64::BI__builtin_arm_wsr128; llvm::Type *ValueType; llvm::Type *RegisterType = Int64Ty; - if (IsPointerBuiltin) { + if (Is32Bit) { + ValueType = Int32Ty; + } else if (Is128Bit) { + llvm::Type *Int128Ty = + llvm::IntegerType::getInt128Ty(CGM.getLLVMContext()); + ValueType = Int128Ty; + RegisterType = Int128Ty; + } else if (IsPointerBuiltin) { ValueType = VoidPtrTy; - } else if (Is64Bit) { - ValueType = Int64Ty; } else { - ValueType = Int32Ty; - } + ValueType = Int64Ty; + }; return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, AccessKind); @@ -10026,9 +10325,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. - if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) + if (std::optional<MSVCIntrin> MsvcIntId = + translateAarch64ToMsvcIntrin(BuiltinID)) return EmitMSVCBuiltinExpr(*MsvcIntId, E); + // Some intrinsics are equivalent - if they are use the base intrinsic ID. + auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) { + return P.first == BuiltinID; + }); + if (It != end(NEONEquivalentIntrinsicMap)) + BuiltinID = It->second; + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -10069,7 +10376,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } } - auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); + auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); @@ -10082,7 +10389,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const Expr *Arg = E->getArg(E->getNumArgs()-1); NeonTypeFlags Type(0); - if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext())) + if (std::optional<llvm::APSInt> Result = + Arg->getIntegerConstantExpr(getContext())) // Determine the type of this overloaded NEON intrinsic. Type = NeonTypeFlags(Result->getZExtValue()); @@ -10119,7 +10427,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvts_f32_u32: case NEON::BI__builtin_neon_vcvtd_f64_u64: usgn = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vcvts_f32_s32: case NEON::BI__builtin_neon_vcvtd_f64_s64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); @@ -10135,7 +10443,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvth_f16_u32: case NEON::BI__builtin_neon_vcvth_f16_u64: usgn = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vcvth_f16_s16: case NEON::BI__builtin_neon_vcvth_f16_s32: case NEON::BI__builtin_neon_vcvth_f16_s64: { @@ -11068,26 +11376,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); } - case NEON::BI__builtin_neon_vrnd32x_v: - case NEON::BI__builtin_neon_vrnd32xq_v: { + case NEON::BI__builtin_neon_vrnd32x_f32: + case NEON::BI__builtin_neon_vrnd32xq_f32: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); } - case NEON::BI__builtin_neon_vrnd32z_v: - case NEON::BI__builtin_neon_vrnd32zq_v: { + case NEON::BI__builtin_neon_vrnd32z_f32: + case NEON::BI__builtin_neon_vrnd32zq_f32: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); } - case NEON::BI__builtin_neon_vrnd64x_v: - case NEON::BI__builtin_neon_vrnd64xq_v: { + case NEON::BI__builtin_neon_vrnd64x_f32: + case NEON::BI__builtin_neon_vrnd64xq_f32: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); } - case NEON::BI__builtin_neon_vrnd64z_v: - case NEON::BI__builtin_neon_vrnd64zq_v: { + case NEON::BI__builtin_neon_vrnd64z_f32: + case NEON::BI__builtin_neon_vrnd64zq_f32: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); @@ -11125,26 +11433,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: - case NEON::BI__builtin_neon_vcvt_s16_v: - case NEON::BI__builtin_neon_vcvt_u16_v: + case NEON::BI__builtin_neon_vcvt_s16_f16: + case NEON::BI__builtin_neon_vcvt_u16_f16: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: - case NEON::BI__builtin_neon_vcvtq_s16_v: - case NEON::BI__builtin_neon_vcvtq_u16_v: { + case NEON::BI__builtin_neon_vcvtq_s16_f16: + case NEON::BI__builtin_neon_vcvtq_u16_f16: { Int = usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs; llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); } - case NEON::BI__builtin_neon_vcvta_s16_v: - case NEON::BI__builtin_neon_vcvta_u16_v: + case NEON::BI__builtin_neon_vcvta_s16_f16: + case NEON::BI__builtin_neon_vcvta_u16_f16: case NEON::BI__builtin_neon_vcvta_s32_v: - case NEON::BI__builtin_neon_vcvtaq_s16_v: + case NEON::BI__builtin_neon_vcvtaq_s16_f16: case NEON::BI__builtin_neon_vcvtaq_s32_v: case NEON::BI__builtin_neon_vcvta_u32_v: - case NEON::BI__builtin_neon_vcvtaq_u16_v: + case NEON::BI__builtin_neon_vcvtaq_u16_f16: case NEON::BI__builtin_neon_vcvtaq_u32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvtaq_s64_v: @@ -11154,13 +11462,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); } - case NEON::BI__builtin_neon_vcvtm_s16_v: + case NEON::BI__builtin_neon_vcvtm_s16_f16: case NEON::BI__builtin_neon_vcvtm_s32_v: - case NEON::BI__builtin_neon_vcvtmq_s16_v: + case NEON::BI__builtin_neon_vcvtmq_s16_f16: case NEON::BI__builtin_neon_vcvtmq_s32_v: - case NEON::BI__builtin_neon_vcvtm_u16_v: + case NEON::BI__builtin_neon_vcvtm_u16_f16: case NEON::BI__builtin_neon_vcvtm_u32_v: - case NEON::BI__builtin_neon_vcvtmq_u16_v: + case NEON::BI__builtin_neon_vcvtmq_u16_f16: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtm_s64_v: case NEON::BI__builtin_neon_vcvtmq_s64_v: @@ -11170,13 +11478,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); } - case NEON::BI__builtin_neon_vcvtn_s16_v: + case NEON::BI__builtin_neon_vcvtn_s16_f16: case NEON::BI__builtin_neon_vcvtn_s32_v: - case NEON::BI__builtin_neon_vcvtnq_s16_v: + case NEON::BI__builtin_neon_vcvtnq_s16_f16: case NEON::BI__builtin_neon_vcvtnq_s32_v: - case NEON::BI__builtin_neon_vcvtn_u16_v: + case NEON::BI__builtin_neon_vcvtn_u16_f16: case NEON::BI__builtin_neon_vcvtn_u32_v: - case NEON::BI__builtin_neon_vcvtnq_u16_v: + case NEON::BI__builtin_neon_vcvtnq_u16_f16: case NEON::BI__builtin_neon_vcvtnq_u32_v: case NEON::BI__builtin_neon_vcvtn_s64_v: case NEON::BI__builtin_neon_vcvtnq_s64_v: @@ -11186,13 +11494,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); } - case NEON::BI__builtin_neon_vcvtp_s16_v: + case NEON::BI__builtin_neon_vcvtp_s16_f16: case NEON::BI__builtin_neon_vcvtp_s32_v: - case NEON::BI__builtin_neon_vcvtpq_s16_v: + case NEON::BI__builtin_neon_vcvtpq_s16_f16: case NEON::BI__builtin_neon_vcvtpq_s32_v: - case NEON::BI__builtin_neon_vcvtp_u16_v: + case NEON::BI__builtin_neon_vcvtp_u16_f16: case NEON::BI__builtin_neon_vcvtp_u32_v: - case NEON::BI__builtin_neon_vcvtpq_u16_v: + case NEON::BI__builtin_neon_vcvtpq_u16_f16: case NEON::BI__builtin_neon_vcvtpq_u32_v: case NEON::BI__builtin_neon_vcvtp_s64_v: case NEON::BI__builtin_neon_vcvtpq_s64_v: @@ -11268,7 +11576,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddv_u8: // FIXME: These are handled by the AArch64 scalar code. usgn = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vaddv_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; @@ -11280,7 +11588,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vaddv_u16: usgn = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vaddv_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; @@ -11292,7 +11600,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vaddvq_u8: usgn = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vaddvq_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; @@ -11304,7 +11612,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vaddvq_u16: usgn = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NEON::BI__builtin_neon_vaddvq_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; @@ -11652,7 +11960,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { - Value *V = UndefValue::get(Ty); + Value *V = PoisonValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], @@ -11745,7 +12053,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); - Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); + Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); @@ -11759,7 +12067,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); - Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); + Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); @@ -11774,7 +12082,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateBitCast(Ops[4], Ty); Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); - Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); + Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); @@ -11934,7 +12242,7 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, "unexpected BPF builtin"); // A sequence number, injected into IR builtin functions, to - // prevent CSE given the only difference of the funciton + // prevent CSE given the only difference of the function // may just be the debuginfo metadata. static uint32_t BuiltinSeqNum; @@ -12050,11 +12358,11 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { } // Otherwise, insertelement the values to build the vector. - Value *Result = llvm::UndefValue::get( + Value *Result = llvm::PoisonValue::get( llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size())); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); + Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i)); return Result; } @@ -12074,9 +12382,8 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, int Indices[4]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; - MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, - makeArrayRef(Indices, NumElts), - "extract"); + MaskVec = CGF.Builder.CreateShuffleVector( + MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract"); } return MaskVec; } @@ -12347,7 +12654,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, default: break; case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: Subtract = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddph512_mask: case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: @@ -12355,7 +12662,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, break; case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: Subtract = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: @@ -12363,21 +12670,21 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, break; case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: Subtract = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddps512_mask: case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: Subtract = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: Subtract = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: @@ -12385,7 +12692,7 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, break; case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: Subtract = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: @@ -12628,18 +12935,6 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, return Res; } -// Convert a BF16 to a float. -static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, - const CallExpr *E, - ArrayRef<Value *> Ops) { - llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); - Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty); - Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16); - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType); - return BitCast; -} - Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { llvm::Type *Int32Ty = Builder.getInt32Ty(); @@ -12668,9 +12963,11 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) #define X86_CPU_TYPE(ENUM, STR) \ .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) +#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \ + .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) #define X86_CPU_SUBTYPE(ENUM, STR) \ .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) -#include "llvm/Support/X86TargetParser.def" +#include "llvm/TargetParser/X86TargetParser.def" .Default({0, 0}); assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); @@ -12749,6 +13046,16 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { return Result; } +Value *CodeGenFunction::EmitAArch64CpuInit() { + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver"); + cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); + cast<llvm::GlobalValue>(Func.getCallee()) + ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + return Builder.CreateCall(Func); +} + Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); @@ -12760,6 +13067,32 @@ Value *CodeGenFunction::EmitX86CpuInit() { return Builder.CreateCall(Func); } +llvm::Value * +CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) { + uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs); + Value *Result = Builder.getTrue(); + if (FeaturesMask != 0) { + // Get features from structure in runtime library + // struct { + // unsigned long long features; + // } __aarch64_cpu_features; + llvm::Type *STy = llvm::StructType::get(Int64Ty); + llvm::Constant *AArch64CPUFeatures = + CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features"); + cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true); + llvm::Value *CpuFeatures = Builder.CreateGEP( + STy, AArch64CPUFeatures, + {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)}); + Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures, + CharUnits::fromQuantity(8)); + Value *Mask = Builder.getInt64(FeaturesMask); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } + return Result; +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_cpu_is) @@ -12771,7 +13104,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. - if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID)) + if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID)) return EmitMSVCBuiltinExpr(*MsvcIntId, E); SmallVector<Value*, 4> Ops; @@ -13512,8 +13845,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + Index; - Value *Res = Builder.CreateShuffleVector(Ops[0], - makeArrayRef(Indices, NumElts), + Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "extract"); if (Ops.size() == 4) @@ -13551,9 +13883,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i != DstNumElts; ++i) Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; - Value *Op1 = Builder.CreateShuffleVector(Ops[1], - makeArrayRef(Indices, DstNumElts), - "widen"); + Value *Op1 = Builder.CreateShuffleVector( + Ops[1], ArrayRef(Indices, DstNumElts), "widen"); for (unsigned i = 0; i != DstNumElts; ++i) { if (i >= Index && i < (Index + SrcNumElts)) @@ -13563,8 +13894,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateShuffleVector(Ops[0], Op1, - makeArrayRef(Indices, DstNumElts), - "insert"); + ArrayRef(Indices, DstNumElts), "insert"); } case X86::BI__builtin_ia32_pmovqd512_mask: case X86::BI__builtin_ia32_pmovwb512_mask: { @@ -13614,8 +13944,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i; return Builder.CreateShuffleVector(Ops[0], Ops[1], - makeArrayRef(Indices, NumElts), - "blend"); + ArrayRef(Indices, NumElts), "blend"); } case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: @@ -13637,7 +13966,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[l + i] = l + i; } - return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "pshuflw"); } case X86::BI__builtin_ia32_pshufhw: @@ -13660,7 +13989,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "pshufhw"); } case X86::BI__builtin_ia32_pshufd: @@ -13689,7 +14018,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "permil"); } case X86::BI__builtin_ia32_shufpd: @@ -13719,8 +14048,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateShuffleVector(Ops[0], Ops[1], - makeArrayRef(Indices, NumElts), - "shufp"); + ArrayRef(Indices, NumElts), "shufp"); } case X86::BI__builtin_ia32_permdi256: case X86::BI__builtin_ia32_permdf256: @@ -13736,7 +14064,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); - return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "perm"); } case X86::BI__builtin_ia32_palignr128: @@ -13773,8 +14101,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateShuffleVector(Ops[1], Ops[0], - makeArrayRef(Indices, NumElts), - "palignr"); + ArrayRef(Indices, NumElts), "palignr"); } case X86::BI__builtin_ia32_alignd128: case X86::BI__builtin_ia32_alignd256: @@ -13794,8 +14121,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[i] = i + ShiftVal; return Builder.CreateShuffleVector(Ops[1], Ops[0], - makeArrayRef(Indices, NumElts), - "valign"); + ArrayRef(Indices, NumElts), "valign"); } case X86::BI__builtin_ia32_shuf_f32x4_256: case X86::BI__builtin_ia32_shuf_f64x2_256: @@ -13823,8 +14149,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateShuffleVector(Ops[0], Ops[1], - makeArrayRef(Indices, NumElts), - "shuf"); + ArrayRef(Indices, NumElts), "shuf"); } case X86::BI__builtin_ia32_vperm2f128_pd256: @@ -13863,8 +14188,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateShuffleVector(OutOps[0], OutOps[1], - makeArrayRef(Indices, NumElts), - "vperm"); + ArrayRef(Indices, NumElts), "vperm"); } case X86::BI__builtin_ia32_pslldqi128_byteshift: @@ -13892,9 +14216,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); - Value *SV = Builder.CreateShuffleVector(Zero, Cast, - makeArrayRef(Indices, NumElts), - "pslldq"); + Value *SV = Builder.CreateShuffleVector( + Zero, Cast, ArrayRef(Indices, NumElts), "pslldq"); return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast"); } case X86::BI__builtin_ia32_psrldqi128_byteshift: @@ -13922,9 +14245,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); - Value *SV = Builder.CreateShuffleVector(Cast, Zero, - makeArrayRef(Indices, NumElts), - "psrldq"); + Value *SV = Builder.CreateShuffleVector( + Cast, Zero, ArrayRef(Indices, NumElts), "psrldq"); return Builder.CreateBitCast(SV, ResultType, "cast"); } case X86::BI__builtin_ia32_kshiftliqi: @@ -13944,9 +14266,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[i] = NumElts + i - ShiftVal; Value *Zero = llvm::Constant::getNullValue(In->getType()); - Value *SV = Builder.CreateShuffleVector(Zero, In, - makeArrayRef(Indices, NumElts), - "kshiftl"); + Value *SV = Builder.CreateShuffleVector( + Zero, In, ArrayRef(Indices, NumElts), "kshiftl"); return Builder.CreateBitCast(SV, Ops[0]->getType()); } case X86::BI__builtin_ia32_kshiftriqi: @@ -13966,9 +14287,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[i] = i + ShiftVal; Value *Zero = llvm::Constant::getNullValue(In->getType()); - Value *SV = Builder.CreateShuffleVector(In, Zero, - makeArrayRef(Indices, NumElts), - "kshiftr"); + Value *SV = Builder.CreateShuffleVector( + In, Zero, ArrayRef(Indices, NumElts), "kshiftr"); return Builder.CreateBitCast(SV, Ops[0]->getType()); } case X86::BI__builtin_ia32_movnti: @@ -14046,6 +14366,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_selectph_128: case X86::BI__builtin_ia32_selectph_256: case X86::BI__builtin_ia32_selectph_512: + case X86::BI__builtin_ia32_selectpbf_128: + case X86::BI__builtin_ia32_selectpbf_256: + case X86::BI__builtin_ia32_selectpbf_512: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: @@ -14054,6 +14377,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_selectpd_512: return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); case X86::BI__builtin_ia32_selectsh_128: + case X86::BI__builtin_ia32_selectsbf_128: case X86::BI__builtin_ia32_selectss_128: case X86::BI__builtin_ia32_selectsd_128: { Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); @@ -14251,14 +14575,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // First extract half of each vector. This gives better codegen than // doing it in a single shuffle. - LHS = Builder.CreateShuffleVector(LHS, LHS, - makeArrayRef(Indices, NumElts / 2)); - RHS = Builder.CreateShuffleVector(RHS, RHS, - makeArrayRef(Indices, NumElts / 2)); + LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2)); + RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2)); // Concat the vectors. // NOTE: Operands are swapped to match the intrinsic definition. - Value *Res = Builder.CreateShuffleVector(RHS, LHS, - makeArrayRef(Indices, NumElts)); + Value *Res = + Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts)); return Builder.CreateBitCast(Res, Ops[0]->getType()); } @@ -14448,6 +14770,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_reduce_fadd_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14458,6 +14781,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_reduce_fmul_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14468,6 +14792,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_reduce_fmax_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } @@ -14478,6 +14803,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_reduce_fmin_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } @@ -14725,7 +15051,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmppd256_mask: case X86::BI__builtin_ia32_cmppd512_mask: IsMaskFCmp = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: @@ -14890,7 +15216,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); } -// AVX512 bf16 intrinsics + // AVX512 bf16 intrinsics case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { Ops[2] = getMaskVecValue( *this, Ops[2], @@ -14899,7 +15225,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } case X86::BI__builtin_ia32_cvtsbf162ss_32: - return EmitX86CvtBF16ToFloatExpr(*this, E, Ops); + return Builder.CreateFPExt(Ops[0], Builder.getFloatTy()); case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { @@ -15214,7 +15540,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__builtin_ia32_vfcmaddcph512_mask: IsConjFMA = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case X86::BI__builtin_ia32_vfmaddcph512_mask: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512 @@ -15224,7 +15550,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: IsConjFMA = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case X86::BI__builtin_ia32_vfmaddcsh_round_mask: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; @@ -15234,7 +15560,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: IsConjFMA = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; @@ -15242,6 +15568,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, static constexpr int Mask[] = {0, 5, 6, 7}; return Builder.CreateShuffleVector(Call, Ops[2], Mask); } + case X86::BI__builtin_ia32_prefetchi: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()), + {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1], + llvm::ConstantInt::get(Int32Ty, 0)}); } } @@ -15976,7 +16307,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, assert(ArgCI && "Third arg to xxinsertw intrinsic must be constant integer"); const int64_t MaxIndex = 12; - int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); // The builtin semantics don't exactly match the xxinsertw instructions // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the @@ -16018,7 +16349,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, assert(ArgCI && "Second Arg to xxextractuw intrinsic must be a constant integer!"); const int64_t MaxIndex = 12; - int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); if (getTarget().isLittleEndian()) { // Reverse the index. @@ -16103,10 +16434,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); bool isLittleEndian = getTarget().isLittleEndian(); - Value *UndefValue = - llvm::UndefValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); + Value *PoisonValue = + llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); Value *Res = Builder.CreateInsertElement( - UndefValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); + PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); Res = Builder.CreateInsertElement(Res, Op1, (uint64_t)(isLittleEndian ? 0 : 1)); return Builder.CreateBitCast(Res, ConvertType(E->getType())); @@ -16235,7 +16566,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, auto Pair = EmitAtomicCompareExchange( LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); - // Unlike c11's atomic_compare_exchange, accroding to + // Unlike c11's atomic_compare_exchange, according to // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp // > In either case, the contents of the memory location specified by addr // > are copied into the memory location specified by old_val_addr. @@ -16353,16 +16684,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_ppc_test_data_class: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *ArgType = Op0->getType(); - unsigned IntrinsicID; - if (ArgType->isDoubleTy()) - IntrinsicID = Intrinsic::ppc_test_data_class_d; - else if (ArgType->isFloatTy()) - IntrinsicID = Intrinsic::ppc_test_data_class_f; - else - llvm_unreachable("Invalid Argument Type"); - return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), {Op0, Op1}, - "test_data_class"); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()), + {Op0, Op1}, "test_data_class"); } case PPC::BI__builtin_ppc_maxfe: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -16475,8 +16799,10 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); LD->setMetadata(llvm::LLVMContext::MD_range, RNode); + LD->setMetadata(llvm::LLVMContext::MD_noundef, + llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, - llvm::MDNode::get(CGF.getLLVMContext(), None)); + llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; } @@ -16493,7 +16819,7 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { auto *LD = CGF.Builder.CreateLoad( Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4))); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, - llvm::MDNode::get(CGF.getLLVMContext(), None)); + llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; } } // namespace @@ -16504,39 +16830,35 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { // it into LLVM's memory ordering specifier using atomic C ABI, and writes // to \p AO. \p Scope takes a const char * and converts it into AMDGCN // specific SyncScopeID and writes it to \p SSID. -bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, +void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID) { - if (isa<llvm::ConstantInt>(Order)) { - int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); - - // Map C11/C++11 memory ordering to LLVM memory ordering - assert(llvm::isValidAtomicOrderingCABI(ord)); - switch (static_cast<llvm::AtomicOrderingCABI>(ord)) { - case llvm::AtomicOrderingCABI::acquire: - case llvm::AtomicOrderingCABI::consume: - AO = llvm::AtomicOrdering::Acquire; - break; - case llvm::AtomicOrderingCABI::release: - AO = llvm::AtomicOrdering::Release; - break; - case llvm::AtomicOrderingCABI::acq_rel: - AO = llvm::AtomicOrdering::AcquireRelease; - break; - case llvm::AtomicOrderingCABI::seq_cst: - AO = llvm::AtomicOrdering::SequentiallyConsistent; - break; - case llvm::AtomicOrderingCABI::relaxed: - AO = llvm::AtomicOrdering::Monotonic; - break; - } + int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); - StringRef scp; - llvm::getConstantStringInfo(Scope, scp); - SSID = getLLVMContext().getOrInsertSyncScopeID(scp); - return true; + // Map C11/C++11 memory ordering to LLVM memory ordering + assert(llvm::isValidAtomicOrderingCABI(ord)); + switch (static_cast<llvm::AtomicOrderingCABI>(ord)) { + case llvm::AtomicOrderingCABI::acquire: + case llvm::AtomicOrderingCABI::consume: + AO = llvm::AtomicOrdering::Acquire; + break; + case llvm::AtomicOrderingCABI::release: + AO = llvm::AtomicOrdering::Release; + break; + case llvm::AtomicOrderingCABI::acq_rel: + AO = llvm::AtomicOrdering::AcquireRelease; + break; + case llvm::AtomicOrderingCABI::seq_cst: + AO = llvm::AtomicOrdering::SequentiallyConsistent; + break; + case llvm::AtomicOrderingCABI::relaxed: + AO = llvm::AtomicOrdering::Monotonic; + break; } - return false; + + StringRef scp; + llvm::getConstantStringInfo(Scope, scp); + SSID = getLLVMContext().getOrInsertSyncScopeID(scp); } Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, @@ -16593,7 +16915,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Args.push_back(EmitScalarExpr(E->getArg(I))); assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) - Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); + Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); @@ -16661,6 +16983,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); case AMDGPU::BI__builtin_amdgcn_sbfe: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); + case AMDGPU::BI__builtin_amdgcn_ballot_w32: + case AMDGPU::BI__builtin_amdgcn_ballot_w64: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); + return Builder.CreateCall(F, { Src }); + } case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: @@ -16862,6 +17191,21 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, RayInverseDir, TextureDescr}); } + case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: { + SmallVector<Value *, 4> Args; + for (int i = 0, e = E->getNumArgs(); i != e; ++i) + Args.push_back(EmitScalarExpr(E->getArg(i))); + + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn); + Value *Call = Builder.CreateCall(F, Args); + Value *Rtn = Builder.CreateExtractValue(Call, 0); + Value *A = Builder.CreateExtractValue(Call, 1); + llvm::Type *RetTy = ConvertType(E->getType()); + Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn, + (uint64_t)0); + return Builder.CreateInsertElement(I0, A, 1); + } + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: @@ -16966,12 +17310,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } - case AMDGPU::BI__builtin_amdgcn_fence: { - if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)), AO, SSID)) - return Builder.CreateFence(AO, SSID); - LLVM_FALLTHROUGH; + ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), + EmitScalarExpr(E->getArg(1)), AO, SSID); + return Builder.CreateFence(AO, SSID); } case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: @@ -16997,22 +17339,29 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()}); - if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(3)), AO, SSID)) { + ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)), AO, SSID); - // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and - // scope as unsigned values - Value *MemOrder = Builder.getInt32(static_cast<int>(AO)); - Value *MemScope = Builder.getInt32(static_cast<int>(SSID)); + // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and + // scope as unsigned values + Value *MemOrder = Builder.getInt32(static_cast<int>(AO)); + Value *MemScope = Builder.getInt32(static_cast<int>(SSID)); - QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); - bool Volatile = - PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); - Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile)); + QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); + bool Volatile = + PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); + Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile)); - return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile}); - } - LLVM_FALLTHROUGH; + return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile}); + } + case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: + case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResultType = ConvertType(E->getType()); + // s_sendmsg_rtn is mangled using return type only. + Function *F = + CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); + return Builder.CreateCall(F, {Arg}); } default: return nullptr; @@ -18035,7 +18384,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); - Optional<llvm::APSInt> isColMajorArg = + std::optional<llvm::APSInt> isColMajorArg = E->getArg(3)->getIntegerConstantExpr(getContext()); if (!isColMajorArg) return nullptr; @@ -18082,7 +18431,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); - Optional<llvm::APSInt> isColMajorArg = + std::optional<llvm::APSInt> isColMajorArg = E->getArg(3)->getIntegerConstantExpr(getContext()); if (!isColMajorArg) return nullptr; @@ -18141,7 +18490,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); Address SrcC = EmitPointerWithAlignment(E->getArg(3)); - Optional<llvm::APSInt> LayoutArg = + std::optional<llvm::APSInt> LayoutArg = E->getArg(4)->getIntegerConstantExpr(getContext()); if (!LayoutArg) return nullptr; @@ -18152,7 +18501,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 || BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1) SatfArg = 0; // .b1 does not have satf argument. - else if (Optional<llvm::APSInt> OptSatfArg = + else if (std::optional<llvm::APSInt> OptSatfArg = E->getArg(5)->getIntegerConstantExpr(getContext())) SatfArg = *OptSatfArg; else @@ -18595,7 +18944,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, IntNo = Intrinsic::wasm_extadd_pairwise_unsigned; break; default: - llvm_unreachable("unexptected builtin ID"); + llvm_unreachable("unexpected builtin ID"); } Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); @@ -18715,7 +19064,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); while (OpIdx < 18) { - Optional<llvm::APSInt> LaneConst = + std::optional<llvm::APSInt> LaneConst = E->getArg(OpIdx)->getIntegerConstantExpr(getContext()); assert(LaneConst && "Constant arg isn't actually constant?"); Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst); @@ -18723,22 +19072,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } - case WebAssembly::BI__builtin_wasm_fma_f32x4: - case WebAssembly::BI__builtin_wasm_fms_f32x4: - case WebAssembly::BI__builtin_wasm_fma_f64x2: - case WebAssembly::BI__builtin_wasm_fms_f64x2: { + case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: { Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); unsigned IntNo; switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_fma_f32x4: - case WebAssembly::BI__builtin_wasm_fma_f64x2: - IntNo = Intrinsic::wasm_fma; + case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: + IntNo = Intrinsic::wasm_relaxed_madd; break; - case WebAssembly::BI__builtin_wasm_fms_f32x4: - case WebAssembly::BI__builtin_wasm_fms_f64x2: - IntNo = Intrinsic::wasm_fms; + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: + IntNo = Intrinsic::wasm_relaxed_nmadd; break; default: llvm_unreachable("unexpected builtin ID"); @@ -18746,15 +19095,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); return Builder.CreateCall(Callee, {A, B, C}); } - case WebAssembly::BI__builtin_wasm_laneselect_i8x16: - case WebAssembly::BI__builtin_wasm_laneselect_i16x8: - case WebAssembly::BI__builtin_wasm_laneselect_i32x4: - case WebAssembly::BI__builtin_wasm_laneselect_i64x2: { + case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16: + case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8: + case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4: + case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: { Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); Function *Callee = - CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType()); + CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType()); return Builder.CreateCall(Callee, {A, B, C}); } case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: { @@ -18816,18 +19165,27 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } - case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_s_i16x8: { + case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_signed); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } - case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_add_s_i32x4: { + case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Acc = EmitScalarExpr(E->getArg(2)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed); + return Builder.CreateCall(Callee, {LHS, RHS, Acc}); + } + case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Value *Acc = EmitScalarExpr(E->getArg(2)); Function *Callee = - CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_add_signed); + CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); return Builder.CreateCall(Callee, {LHS, RHS, Acc}); } default: @@ -18842,7 +19200,7 @@ getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) { Intrinsic::ID IntrinsicID; unsigned VecLen; }; - Info Infos[] = { + static Info Infos[] = { #define CUSTOM_BUILTIN_MAPPING(x,s) \ { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s }, CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0) @@ -18884,8 +19242,7 @@ getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) { static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true); (void)SortOnce; - const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos), - Info{BuiltinID, 0, 0}, CmpInfo); + const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo); if (F == std::end(Infos) || F->BuiltinID != BuiltinID) return {Intrinsic::not_intrinsic, 0}; @@ -19004,6 +19361,25 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, PredAddr.getAlignment()); return Builder.CreateExtractValue(Result, 0); } + // These are identical to the builtins above, except they don't consume + // input carry, only generate carry-out. Since they still produce two + // outputs, generate the store of the predicate, but no load. + case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo: + case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo: + case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: { + // Get the type from the 0-th argument. + llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); + Address PredAddr = Builder.CreateElementBitCast( + EmitPointerWithAlignment(E->getArg(2)), VecType); + llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), + {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + + llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); + Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), + PredAddr.getAlignment()); + return Builder.CreateExtractValue(Result, 0); + } case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq: @@ -19104,7 +19480,12 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID = Intrinsic::not_intrinsic; unsigned NF = 1; - constexpr unsigned TAIL_UNDISTURBED = 0; + // The 0th bit simulates the `vta` of RVV + // The 1st bit simulates the `vma` of RVV + constexpr unsigned RVV_VTA = 0x1; + constexpr unsigned RVV_VMA = 0x2; + int PolicyAttrs = 0; + bool IsMasked = false; // Required for overloaded intrinsics. llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes; @@ -19119,38 +19500,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_clmul: case RISCV::BI__builtin_riscv_clmulh: case RISCV::BI__builtin_riscv_clmulr: - case RISCV::BI__builtin_riscv_bcompress_32: - case RISCV::BI__builtin_riscv_bcompress_64: - case RISCV::BI__builtin_riscv_bdecompress_32: - case RISCV::BI__builtin_riscv_bdecompress_64: - case RISCV::BI__builtin_riscv_bfp_32: - case RISCV::BI__builtin_riscv_bfp_64: - case RISCV::BI__builtin_riscv_grev_32: - case RISCV::BI__builtin_riscv_grev_64: - case RISCV::BI__builtin_riscv_gorc_32: - case RISCV::BI__builtin_riscv_gorc_64: - case RISCV::BI__builtin_riscv_shfl_32: - case RISCV::BI__builtin_riscv_shfl_64: - case RISCV::BI__builtin_riscv_unshfl_32: - case RISCV::BI__builtin_riscv_unshfl_64: case RISCV::BI__builtin_riscv_xperm4: case RISCV::BI__builtin_riscv_xperm8: - case RISCV::BI__builtin_riscv_xperm_n: - case RISCV::BI__builtin_riscv_xperm_b: - case RISCV::BI__builtin_riscv_xperm_h: - case RISCV::BI__builtin_riscv_xperm_w: - case RISCV::BI__builtin_riscv_crc32_b: - case RISCV::BI__builtin_riscv_crc32_h: - case RISCV::BI__builtin_riscv_crc32_w: - case RISCV::BI__builtin_riscv_crc32_d: - case RISCV::BI__builtin_riscv_crc32c_b: - case RISCV::BI__builtin_riscv_crc32c_h: - case RISCV::BI__builtin_riscv_crc32c_w: - case RISCV::BI__builtin_riscv_crc32c_d: - case RISCV::BI__builtin_riscv_fsl_32: - case RISCV::BI__builtin_riscv_fsr_32: - case RISCV::BI__builtin_riscv_fsl_64: - case RISCV::BI__builtin_riscv_fsr_64: case RISCV::BI__builtin_riscv_brev8: case RISCV::BI__builtin_riscv_zip_32: case RISCV::BI__builtin_riscv_unzip_32: { @@ -19183,88 +19534,6 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, ID = Intrinsic::riscv_clmulr; break; - // Zbe - case RISCV::BI__builtin_riscv_bcompress_32: - case RISCV::BI__builtin_riscv_bcompress_64: - ID = Intrinsic::riscv_bcompress; - break; - case RISCV::BI__builtin_riscv_bdecompress_32: - case RISCV::BI__builtin_riscv_bdecompress_64: - ID = Intrinsic::riscv_bdecompress; - break; - - // Zbf - case RISCV::BI__builtin_riscv_bfp_32: - case RISCV::BI__builtin_riscv_bfp_64: - ID = Intrinsic::riscv_bfp; - break; - - // Zbp - case RISCV::BI__builtin_riscv_grev_32: - case RISCV::BI__builtin_riscv_grev_64: - ID = Intrinsic::riscv_grev; - break; - case RISCV::BI__builtin_riscv_gorc_32: - case RISCV::BI__builtin_riscv_gorc_64: - ID = Intrinsic::riscv_gorc; - break; - case RISCV::BI__builtin_riscv_shfl_32: - case RISCV::BI__builtin_riscv_shfl_64: - ID = Intrinsic::riscv_shfl; - break; - case RISCV::BI__builtin_riscv_unshfl_32: - case RISCV::BI__builtin_riscv_unshfl_64: - ID = Intrinsic::riscv_unshfl; - break; - case RISCV::BI__builtin_riscv_xperm_n: - ID = Intrinsic::riscv_xperm_n; - break; - case RISCV::BI__builtin_riscv_xperm_b: - ID = Intrinsic::riscv_xperm_b; - break; - case RISCV::BI__builtin_riscv_xperm_h: - ID = Intrinsic::riscv_xperm_h; - break; - case RISCV::BI__builtin_riscv_xperm_w: - ID = Intrinsic::riscv_xperm_w; - break; - - // Zbr - case RISCV::BI__builtin_riscv_crc32_b: - ID = Intrinsic::riscv_crc32_b; - break; - case RISCV::BI__builtin_riscv_crc32_h: - ID = Intrinsic::riscv_crc32_h; - break; - case RISCV::BI__builtin_riscv_crc32_w: - ID = Intrinsic::riscv_crc32_w; - break; - case RISCV::BI__builtin_riscv_crc32_d: - ID = Intrinsic::riscv_crc32_d; - break; - case RISCV::BI__builtin_riscv_crc32c_b: - ID = Intrinsic::riscv_crc32c_b; - break; - case RISCV::BI__builtin_riscv_crc32c_h: - ID = Intrinsic::riscv_crc32c_h; - break; - case RISCV::BI__builtin_riscv_crc32c_w: - ID = Intrinsic::riscv_crc32c_w; - break; - case RISCV::BI__builtin_riscv_crc32c_d: - ID = Intrinsic::riscv_crc32c_d; - break; - - // Zbt - case RISCV::BI__builtin_riscv_fsl_32: - case RISCV::BI__builtin_riscv_fsl_64: - ID = Intrinsic::riscv_fsl; - break; - case RISCV::BI__builtin_riscv_fsr_32: - case RISCV::BI__builtin_riscv_fsr_64: - ID = Intrinsic::riscv_fsr; - break; - // Zbkx case RISCV::BI__builtin_riscv_xperm8: ID = Intrinsic::riscv_xperm8; @@ -19407,3 +19676,129 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } + +Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + SmallVector<Value *, 4> Ops; + + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + + switch (BuiltinID) { + default: + llvm_unreachable("unexpected builtin ID."); + case LoongArch::BI__builtin_loongarch_cacop_d: + ID = Intrinsic::loongarch_cacop_d; + break; + case LoongArch::BI__builtin_loongarch_cacop_w: + ID = Intrinsic::loongarch_cacop_w; + break; + case LoongArch::BI__builtin_loongarch_dbar: + ID = Intrinsic::loongarch_dbar; + break; + case LoongArch::BI__builtin_loongarch_break: + ID = Intrinsic::loongarch_break; + break; + case LoongArch::BI__builtin_loongarch_ibar: + ID = Intrinsic::loongarch_ibar; + break; + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + ID = Intrinsic::loongarch_movfcsr2gr; + break; + case LoongArch::BI__builtin_loongarch_movgr2fcsr: + ID = Intrinsic::loongarch_movgr2fcsr; + break; + case LoongArch::BI__builtin_loongarch_syscall: + ID = Intrinsic::loongarch_syscall; + break; + case LoongArch::BI__builtin_loongarch_crc_w_b_w: + ID = Intrinsic::loongarch_crc_w_b_w; + break; + case LoongArch::BI__builtin_loongarch_crc_w_h_w: + ID = Intrinsic::loongarch_crc_w_h_w; + break; + case LoongArch::BI__builtin_loongarch_crc_w_w_w: + ID = Intrinsic::loongarch_crc_w_w_w; + break; + case LoongArch::BI__builtin_loongarch_crc_w_d_w: + ID = Intrinsic::loongarch_crc_w_d_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_b_w: + ID = Intrinsic::loongarch_crcc_w_b_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_h_w: + ID = Intrinsic::loongarch_crcc_w_h_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_w_w: + ID = Intrinsic::loongarch_crcc_w_w_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_d_w: + ID = Intrinsic::loongarch_crcc_w_d_w; + break; + case LoongArch::BI__builtin_loongarch_csrrd_w: + ID = Intrinsic::loongarch_csrrd_w; + break; + case LoongArch::BI__builtin_loongarch_csrwr_w: + ID = Intrinsic::loongarch_csrwr_w; + break; + case LoongArch::BI__builtin_loongarch_csrxchg_w: + ID = Intrinsic::loongarch_csrxchg_w; + break; + case LoongArch::BI__builtin_loongarch_csrrd_d: + ID = Intrinsic::loongarch_csrrd_d; + break; + case LoongArch::BI__builtin_loongarch_csrwr_d: + ID = Intrinsic::loongarch_csrwr_d; + break; + case LoongArch::BI__builtin_loongarch_csrxchg_d: + ID = Intrinsic::loongarch_csrxchg_d; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_b: + ID = Intrinsic::loongarch_iocsrrd_b; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_h: + ID = Intrinsic::loongarch_iocsrrd_h; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_w: + ID = Intrinsic::loongarch_iocsrrd_w; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_d: + ID = Intrinsic::loongarch_iocsrrd_d; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_b: + ID = Intrinsic::loongarch_iocsrwr_b; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_h: + ID = Intrinsic::loongarch_iocsrwr_h; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_w: + ID = Intrinsic::loongarch_iocsrwr_w; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_d: + ID = Intrinsic::loongarch_iocsrwr_d; + break; + case LoongArch::BI__builtin_loongarch_cpucfg: + ID = Intrinsic::loongarch_cpucfg; + break; + case LoongArch::BI__builtin_loongarch_asrtle_d: + ID = Intrinsic::loongarch_asrtle_d; + break; + case LoongArch::BI__builtin_loongarch_asrtgt_d: + ID = Intrinsic::loongarch_asrtgt_d; + break; + case LoongArch::BI__builtin_loongarch_lddir_d: + ID = Intrinsic::loongarch_lddir_d; + break; + case LoongArch::BI__builtin_loongarch_ldpte_d: + ID = Intrinsic::loongarch_ldpte_d; + break; + // TODO: Support more Intrinsics. + } + + assert(ID != Intrinsic::not_intrinsic); + + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops); +} diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index a8bb0dd65d1a..bb887df3e4e0 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -49,10 +49,10 @@ private: const Decl *D; }; llvm::SmallVector<KernelInfo, 16> EmittedKernels; - // Map a device stub function to a symbol for identifying kernel in host code. + // Map a kernel mangled name to a symbol for identifying kernel in host code // For CUDA, the symbol for identifying the kernel is the same as the device // stub function. For HIP, they are different. - llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles; + llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles; // Map a kernel handle to the kernel stub. llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs; struct VarInfo { @@ -69,6 +69,8 @@ private: bool RelocatableDeviceCode; /// Mangle context for device. std::unique_ptr<MangleContext> DeviceMC; + /// Some zeros used for GEPs. + llvm::Constant *Zeros[2]; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -86,14 +88,25 @@ private: /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. llvm::Constant *makeConstantString(const std::string &Str, - const std::string &Name = "", - const std::string &SectionName = "", - unsigned Alignment = 0) { - llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0), - llvm::ConstantInt::get(SizeTy, 0)}; + const std::string &Name = "") { auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str()); - llvm::GlobalVariable *GV = - cast<llvm::GlobalVariable>(ConstStr.getPointer()); + return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), + ConstStr.getPointer(), Zeros); + } + + /// Helper function which generates an initialized constant array from Str, + /// and optionally sets section name and alignment. AddNull specifies whether + /// the array should nave NUL termination. + llvm::Constant *makeConstantArray(StringRef Str, + StringRef Name = "", + StringRef SectionName = "", + unsigned Alignment = 0, + bool AddNull = false) { + llvm::Constant *Value = + llvm::ConstantDataArray::getString(Context, Str, AddNull); + auto *GV = new llvm::GlobalVariable( + TheModule, Value->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Value, Name); if (!SectionName.empty()) { GV->setSection(SectionName); // Mark the address as used which make sure that this section isn't @@ -102,9 +115,7 @@ private: } if (Alignment) GV->setAlignment(llvm::Align(Alignment)); - - return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), - ConstStr.getPointer(), Zeros); + return llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros); } /// Helper function that generates an empty dummy function returning void. @@ -220,6 +231,8 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) IntTy = CGM.IntTy; SizeTy = CGM.SizeTy; VoidTy = CGM.VoidTy; + Zeros[0] = llvm::ConstantInt::get(SizeTy, 0); + Zeros[1] = Zeros[0]; CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); @@ -297,7 +310,8 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); - if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) { + if (auto *GV = + dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn->getName()])) { GV->setLinkage(CGF.CurFn->getLinkage()); GV->setInitializer(CGF.CurFn); } @@ -387,8 +401,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch - llvm::Value *Kernel = - CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy); + llvm::Value *Kernel = CGF.Builder.CreatePointerCast( + KernelHandles[CGF.CurFn->getName()], VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); @@ -443,8 +457,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, // Emit the call to cudaLaunch llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); - llvm::Value *Arg = - CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy); + llvm::Value *Arg = CGF.Builder.CreatePointerCast( + KernelHandles[CGF.CurFn->getName()], CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -538,7 +552,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy), + Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), @@ -744,9 +758,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // If fatbin is available from early finalization, create a string // literal containing the fat binary loaded from the given file. const unsigned HIPCodeObjectAlign = 4096; - FatBinStr = - makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", - FatbinConstantName, HIPCodeObjectAlign); + FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, HIPCodeObjectAlign); } else { // If fatbin is not available, create an external symbol // __hip_fatbin in section .hip_fatbin. The external symbol is supposed @@ -780,8 +793,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // For CUDA, create a string literal containing the fat binary loaded from // the given file. - FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", - FatbinConstantName, 8); + FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, 8); FatMagic = CudaFatMagic; } @@ -888,8 +901,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { SmallString<64> ModuleID; llvm::raw_svector_ostream OS(ModuleID); OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID()); - llvm::Constant *ModuleIDConstant = makeConstantString( - std::string(ModuleID.str()), "", ModuleIDSectionName, 32); + llvm::Constant *ModuleIDConstant = makeConstantArray( + std::string(ModuleID.str()), "", ModuleIDSectionName, 32, /*AddNull=*/true); // Create an alias for the FatbinWrapper that nvcc will look for. llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage, @@ -1118,7 +1131,7 @@ void CGNVCUDARuntime::createOffloadingEntries() { StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" : "cuda_offloading_entries"; for (KernelInfo &I : EmittedKernels) - OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel], + OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()], getDeviceSideName(cast<NamedDecl>(I.D)), 0, DeviceVarFlags::OffloadGlobalEntry, Section); @@ -1181,12 +1194,12 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, GlobalDecl GD) { - auto Loc = KernelHandles.find(F); + auto Loc = KernelHandles.find(F->getName()); if (Loc != KernelHandles.end()) return Loc->second; if (!CGM.getLangOpts().HIP) { - KernelHandles[F] = F; + KernelHandles[F->getName()] = F; KernelStubs[F] = F; return F; } @@ -1200,7 +1213,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); - KernelHandles[F] = Var; + KernelHandles[F->getName()] = Var; KernelStubs[Var] = F; return Var; } diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h index 73c7ca7bc15f..9a9c6d26cc63 100644 --- a/clang/lib/CodeGen/CGCUDARuntime.h +++ b/clang/lib/CodeGen/CGCUDARuntime.h @@ -55,7 +55,7 @@ public: /// The kind flag for an offloading entry. enum OffloadEntryKindFlag : uint32_t { /// Mark the entry as a global entry. This indicates the presense of a - /// kernel if the size size field is zero and a variable otherwise. + /// kernel if the size field is zero and a variable otherwise. OffloadGlobalEntry = 0x0, /// Mark the entry as a managed global variable. OffloadGlobalManagedEntry = 0x1, diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index 0768e6581acb..a600768b2074 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -105,6 +105,10 @@ protected: /// final class will have been taken care of by the caller. virtual bool isThisCompleteObject(GlobalDecl GD) const = 0; + virtual bool constructorsAndDestructorsReturnThis() const { + return CGM.getCodeGenOpts().CtorDtorReturnThis; + } + public: virtual ~CGCXXABI(); @@ -120,7 +124,13 @@ public: /// /// There currently is no way to indicate if a destructor returns 'this' /// when called virtually, and code generation does not support the case. - virtual bool HasThisReturn(GlobalDecl GD) const { return false; } + virtual bool HasThisReturn(GlobalDecl GD) const { + if (isa<CXXConstructorDecl>(GD.getDecl()) || + (isa<CXXDestructorDecl>(GD.getDecl()) && + GD.getDtorType() != Dtor_Deleting)) + return constructorsAndDestructorsReturnThis(); + return false; + } virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index dfa78bf59c65..dfa552161d7c 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/Transforms/Utils/Local.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -112,7 +113,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) { // variadic type. return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(), /*instanceMethod=*/false, - /*chainCall=*/false, None, + /*chainCall=*/false, std::nullopt, FTNP->getExtInfo(), {}, RequiredArgs(0)); } @@ -459,7 +460,8 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) { return arrangeLLVMFunctionInfo( noProto->getReturnType(), /*instanceMethod=*/false, - /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); + /*chainCall=*/false, std::nullopt, noProto->getExtInfo(), {}, + RequiredArgs::All); } return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>()); @@ -484,9 +486,11 @@ const CGFunctionInfo & CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, QualType receiverType) { SmallVector<CanQualType, 16> argTys; - SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos(2); + SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos( + MD->isDirectMethod() ? 1 : 2); argTys.push_back(Context.getCanonicalParamType(receiverType)); - argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType())); + if (!MD->isDirectMethod()) + argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType())); // FIXME: Kill copy? for (const auto *I : MD->parameters()) { argTys.push_back(Context.getCanonicalParamType(I->getType())); @@ -708,7 +712,7 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { return arrangeLLVMFunctionInfo( getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, - None, FunctionType::ExtInfo(), {}, RequiredArgs::All); + std::nullopt, FunctionType::ExtInfo(), {}, RequiredArgs::All); } const CGFunctionInfo & @@ -1144,7 +1148,7 @@ static Address CreateTempAllocaForCoercion(CodeGenFunction &CGF, llvm::Type *Ty, CharUnits MinAlign, const Twine &Name = "tmp") { // Don't use an alignment that's worse than what LLVM would prefer. - auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(Ty); + auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(Ty); CharUnits Align = std::max(MinAlign, CharUnits::fromQuantity(PrefAlign)); return CGF.CreateTempAlloca(Ty, Align, Name + ".coerce"); @@ -1257,7 +1261,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) { Src = EnterStructPointerForCoercedAccess(Src, SrcSTy, - DstSize.getFixedSize(), CGF); + DstSize.getFixedValue(), CGF); SrcTy = Src.getElementType(); } @@ -1273,7 +1277,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // If load is legal, just bitcast the src pointer. if (!SrcSize.isScalable() && !DstSize.isScalable() && - SrcSize.getFixedSize() >= DstSize.getFixedSize()) { + SrcSize.getFixedValue() >= DstSize.getFixedValue()) { // Generally SrcSize is never greater than DstSize, since this means we are // losing bits. However, this can happen in cases where the structure has // additional padding, for example due to a user specified alignment. @@ -1319,7 +1323,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, CGF.Builder.CreateMemCpy( Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), Src.getPointer(), Src.getAlignment().getAsAlign(), - llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize.getKnownMinSize())); + llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize.getKnownMinValue())); return CGF.Builder.CreateLoad(Tmp); } @@ -1362,7 +1366,7 @@ static void CreateCoercedStore(llvm::Value *Src, if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) { Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy, - SrcSize.getFixedSize(), CGF); + SrcSize.getFixedValue(), CGF); DstTy = Dst.getElementType(); } @@ -1389,7 +1393,7 @@ static void CreateCoercedStore(llvm::Value *Src, // If store is legal, just bitcast the src pointer. if (isa<llvm::ScalableVectorType>(SrcTy) || isa<llvm::ScalableVectorType>(DstTy) || - SrcSize.getFixedSize() <= DstSize.getFixedSize()) { + SrcSize.getFixedValue() <= DstSize.getFixedValue()) { Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); CGF.EmitAggregateStore(Src, Dst, DstIsVolatile); } else { @@ -1407,7 +1411,7 @@ static void CreateCoercedStore(llvm::Value *Src, CGF.Builder.CreateMemCpy( Dst.getPointer(), Dst.getAlignment().getAsAlign(), Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), - llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedSize())); + llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue())); } } @@ -1633,7 +1637,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // sret things on win32 aren't void, they return the sret pointer. QualType ret = FI.getReturnType(); llvm::Type *ty = ConvertType(ret); - unsigned addressSpace = Context.getTargetAddressSpace(ret); + unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret); resultType = llvm::PointerType::get(ty, addressSpace); } else { resultType = llvm::Type::getVoidTy(getLLVMContext()); @@ -1657,7 +1661,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { if (IRFunctionArgs.hasSRetArg()) { QualType Ret = FI.getReturnType(); llvm::Type *Ty = ConvertType(Ret); - unsigned AddressSpace = Context.getTargetAddressSpace(Ret); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret); ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get(Ty, AddressSpace); } @@ -1723,7 +1727,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { case ABIArgInfo::CoerceAndExpand: { auto ArgTypesIter = ArgTypes.begin() + FirstIRArg; - for (auto EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) { + for (auto *EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) { *ArgTypesIter++ = EltTy; } assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs); @@ -1781,7 +1785,7 @@ static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs, } bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context, - QualType ReturnType) { + QualType ReturnType) const { // We can't just discard the return value for a record type with a // complex destructor or a non-trivially copyable type. if (const RecordType *RT = @@ -1792,6 +1796,38 @@ bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context, return ReturnType.isTriviallyCopyableType(Context); } +static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy, + const Decl *TargetDecl) { + // As-is msan can not tolerate noundef mismatch between caller and + // implementation. Mismatch is possible for e.g. indirect calls from C-caller + // into C++. Such mismatches lead to confusing false reports. To avoid + // expensive workaround on msan we enforce initialization event in uncommon + // cases where it's allowed. + if (Module.getLangOpts().Sanitize.has(SanitizerKind::Memory)) + return true; + // C++ explicitly makes returning undefined values UB. C's rule only applies + // to used values, so we never mark them noundef for now. + if (!Module.getLangOpts().CPlusPlus) + return false; + if (TargetDecl) { + if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl)) { + if (FDecl->isExternC()) + return false; + } else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl)) { + // Function pointer. + if (VDecl->isExternC()) + return false; + } + } + + // We don't want to be too aggressive with the return checking, unless + // it's explicit in the code opts or we're using an appropriate sanitizer. + // Try to respect what the programmer intended. + return Module.getCodeGenOpts().StrictReturn || + !Module.MayDropFunctionReturn(Module.getContext(), RetTy) || + Module.getLangOpts().Sanitize.has(SanitizerKind::Return); +} + void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, bool HasOptnone, bool AttrOnCallSite, @@ -1820,19 +1856,16 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, if (!CodeGenOpts.TrapFuncName.empty()) FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); } else { - StringRef FpKind; switch (CodeGenOpts.getFramePointer()) { case CodeGenOptions::FramePointerKind::None: - FpKind = "none"; + // This is the default behavior. break; case CodeGenOptions::FramePointerKind::NonLeaf: - FpKind = "non-leaf"; - break; case CodeGenOptions::FramePointerKind::All: - FpKind = "all"; - break; + FuncAttrs.addAttribute("frame-pointer", + CodeGenOptions::getFramePointerKindName( + CodeGenOpts.getFramePointer())); } - FuncAttrs.addAttribute("frame-pointer", FpKind); if (CodeGenOpts.LessPreciseFPMAD) FuncAttrs.addAttribute("less-precise-fpmad", "true"); @@ -1860,7 +1893,12 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, FuncAttrs.addAttribute("no-nans-fp-math", "true"); if (LangOpts.ApproxFunc) FuncAttrs.addAttribute("approx-func-fp-math", "true"); - if (LangOpts.UnsafeFPMath) + if (LangOpts.AllowFPReassoc && LangOpts.AllowRecip && + LangOpts.NoSignedZero && LangOpts.ApproxFunc && + (LangOpts.getDefaultFPContractMode() == + LangOptions::FPModeKind::FPM_Fast || + LangOpts.getDefaultFPContractMode() == + LangOptions::FPModeKind::FPM_FastHonorPragmas)) FuncAttrs.addAttribute("unsafe-fp-math", "true"); if (CodeGenOpts.SoftFloat) FuncAttrs.addAttribute("use-soft-float", "true"); @@ -1931,11 +1969,11 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, FuncAttrs.addAttribute(llvm::Attribute::Convergent); } - // TODO: NoUnwind attribute should be added for other GPU modes OpenCL, HIP, + // TODO: NoUnwind attribute should be added for other GPU modes HIP, // SYCL, OpenMP offload. AFAIK, none of them support exceptions in device // code. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Exceptions aren't supported in CUDA device code. + if ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice) || + getLangOpts().OpenCL) { FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } @@ -2046,6 +2084,27 @@ static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types, return false; } +/// Check if the argument of a function has maybe_undef attribute. +static bool IsArgumentMaybeUndef(const Decl *TargetDecl, + unsigned NumRequiredArgs, unsigned ArgNo) { + const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl); + if (!FD) + return false; + + // Assume variadic arguments do not have maybe_undef attribute. + if (ArgNo >= NumRequiredArgs) + return false; + + // Check if argument has maybe_undef attribute. + if (ArgNo < FD->getNumParams()) { + const ParmVarDecl *Param = FD->getParamDecl(ArgNo); + if (Param && Param->hasAttr<MaybeUndefAttr>()) + return true; + } + + return false; +} + /// Construct the IR attribute list of a function or call. /// /// When adding an attribute, please consider where it should be handled: @@ -2094,6 +2153,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // The NoBuiltinAttr attached to the target FunctionDecl. const NoBuiltinAttr *NBA = nullptr; + // Some ABIs may result in additional accesses to arguments that may + // otherwise not be present. + auto AddPotentialArgAccess = [&]() { + llvm::Attribute A = FuncAttrs.getAttribute(llvm::Attribute::Memory); + if (A.isValid()) + FuncAttrs.addMemoryAttr(A.getMemoryEffects() | + llvm::MemoryEffects::argMemOnly()); + }; + // Collect function IR attributes based on declaration-specific // information. // FIXME: handle sseregparm someday... @@ -2140,18 +2208,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // 'const', 'pure' and 'noalias' attributed functions are also nounwind. if (TargetDecl->hasAttr<ConstAttr>()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadNone); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::none()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'const' functions have greater restrictions than // 'pure' functions, so they also cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr<PureAttr>()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadOnly); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'pure' functions cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr<NoAliasAttr>()) { - FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } if (TargetDecl->hasAttr<RestrictAttr>()) @@ -2168,7 +2236,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { - Optional<unsigned> NumElemsParam; + std::optional<unsigned> NumElemsParam; if (AllocSize->getNumElemsParam().isValid()) NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex(); FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(), @@ -2237,9 +2305,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // Add "sample-profile-suffix-elision-policy" attribute for internal linkage // functions with -funique-internal-linkage-names. if (TargetDecl && CodeGenOpts.UniqueInternalLinkageNames) { - if (isa<FunctionDecl>(TargetDecl)) { - if (this->getFunctionLinkage(CalleeInfo.getCalleeDecl()) == - llvm::GlobalValue::InternalLinkage) + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) { + if (!FD->isExternallyVisible()) FuncAttrs.addAttribute("sample-profile-suffix-elision-policy", "selected"); } @@ -2287,27 +2354,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, const ABIArgInfo &RetAI = FI.getReturnInfo(); const llvm::DataLayout &DL = getDataLayout(); - // C++ explicitly makes returning undefined values UB. C's rule only applies - // to used values, so we never mark them noundef for now. - bool HasStrictReturn = getLangOpts().CPlusPlus; - if (TargetDecl && HasStrictReturn) { - if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl)) - HasStrictReturn &= !FDecl->isExternC(); - else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl)) - // Function pointer - HasStrictReturn &= !VDecl->isExternC(); - } - - // We don't want to be too aggressive with the return checking, unless - // it's explicit in the code opts or we're using an appropriate sanitizer. - // Try to respect what the programmer intended. - HasStrictReturn &= getCodeGenOpts().StrictReturn || - !MayDropFunctionReturn(getContext(), RetTy) || - getLangOpts().Sanitize.has(SanitizerKind::Memory) || - getLangOpts().Sanitize.has(SanitizerKind::Return); - // Determine if the return type could be partially undef - if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) { + if (CodeGenOpts.EnableNoundefAttrs && + HasStrictReturn(*this, RetTy, TargetDecl)) { if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect && DetermineNoUndef(RetTy, getTypes(), DL, RetAI)) RetAttrs.addAttribute(llvm::Attribute::NoUndef); @@ -2319,7 +2368,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, RetAttrs.addAttribute(llvm::Attribute::SExt); else RetAttrs.addAttribute(llvm::Attribute::ZExt); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ABIArgInfo::Direct: if (RetAI.getInReg()) RetAttrs.addAttribute(llvm::Attribute::InReg); @@ -2330,8 +2379,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { // inalloca and sret disable readnone and readonly - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); + AddPotentialArgAccess(); break; } @@ -2350,7 +2398,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) RetAttrs.addDereferenceableAttr( getMinimumObjectSize(PTy).getQuantity()); - if (getContext().getTargetAddressSpace(PTy) == 0 && + if (getTypes().getTargetAddressSpace(PTy) == 0 && !CodeGenOpts.NullPointerIsValid) RetAttrs.addAttribute(llvm::Attribute::NonNull); if (PTy->isObjectType()) { @@ -2399,7 +2447,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, FI.arg_begin()->type.castAs<PointerType>()->getPointeeType(); if (!CodeGenOpts.NullPointerIsValid && - getContext().getTargetAddressSpace(FI.arg_begin()->type) == 0) { + getTypes().getTargetAddressSpace(FI.arg_begin()->type) == 0) { Attrs.addAttribute(llvm::Attribute::NonNull); Attrs.addDereferenceableAttr(getMinimumObjectSize(ThisTy).getQuantity()); } else { @@ -2455,7 +2503,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, Attrs.addAttribute(llvm::Attribute::SExt); else Attrs.addAttribute(llvm::Attribute::ZExt); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ABIArgInfo::Direct: if (ArgNo == 0 && FI.isChainCall()) Attrs.addAttribute(llvm::Attribute::Nest); @@ -2501,9 +2549,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, Attrs.addAlignmentAttr(Align.getQuantity()); // byval disables readnone and readonly. - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); - + AddPotentialArgAccess(); break; } case ABIArgInfo::IndirectAliased: { @@ -2519,8 +2565,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::InAlloca: // inalloca disables readnone and readonly. - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); + AddPotentialArgAccess(); continue; } @@ -2529,7 +2574,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) Attrs.addDereferenceableAttr( getMinimumObjectSize(PTy).getQuantity()); - if (getContext().getTargetAddressSpace(PTy) == 0 && + if (getTypes().getTargetAddressSpace(PTy) == 0 && !CodeGenOpts.NullPointerIsValid) Attrs.addAttribute(llvm::Attribute::NonNull); if (PTy->isObjectType()) { @@ -2851,7 +2896,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::Align Alignment = CGM.getNaturalTypeAlignment(ETy).getAsAlign(); AI->addAttrs(llvm::AttrBuilder(getLLVMContext()).addAlignmentAttr(Alignment)); - if (!getContext().getTargetAddressSpace(ETy) && + if (!getTypes().getTargetAddressSpace(ETy) && !CGM.getCodeGenOpts().NullPointerIsValid) AI->addAttr(llvm::Attribute::NonNull); } @@ -2860,7 +2905,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // Set `align` attribute if any. const auto *AVAttr = PVD->getAttr<AlignValueAttr>(); if (!AVAttr) - if (const auto *TOTy = dyn_cast<TypedefType>(OTy)) + if (const auto *TOTy = OTy->getAs<TypedefType>()) AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>(); if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) { // If alignment-assumption sanitizer is enabled, we do *not* add @@ -3509,7 +3554,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, switch (RetAI.getKind()) { case ABIArgInfo::InAlloca: - // Aggregrates get evaluated directly into the destination. Sometimes we + // Aggregates get evaluated directly into the destination. Sometimes we // need to return the sret value in a register, though. assert(hasAggregateEvaluationKind(RetTy)); if (RetAI.getInAllocaSRet()) { @@ -3537,7 +3582,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, break; } case TEK_Aggregate: - // Do nothing; aggregrates get evaluated directly into the destination. + // Do nothing; aggregates get evaluated directly into the destination. break; case TEK_Scalar: { LValueBaseInfo BaseInfo; @@ -4078,7 +4123,7 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, bool CanCheckNullability = false; if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) { - auto Nullability = PVD->getType()->getNullability(getContext()); + auto Nullability = PVD->getType()->getNullability(); CanCheckNullability = Nullability && *Nullability == NullabilityKind::NonNull && PVD->getTypeSourceInfo(); @@ -4106,7 +4151,7 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc), llvm::ConstantInt::get(Int32Ty, ArgNo + 1), }; - EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None); + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, std::nullopt); } // Check if the call is going to use the inalloca convention. This needs to @@ -4426,7 +4471,7 @@ QualType CodeGenFunction::getVarArgType(const Expr *Arg) { if (Arg->getType()->isIntegerType() && getContext().getTypeSize(Arg->getType()) < - getContext().getTargetInfo().getPointerWidth(0) && + getContext().getTargetInfo().getPointerWidth(LangAS::Default) && Arg->isNullPointerConstant(getContext(), Expr::NPC_ValueDependentIsNotNull)) { return getContext().getIntPtrType(); @@ -4449,7 +4494,7 @@ CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) { llvm::CallInst * CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { - return EmitNounwindRuntimeCall(callee, None, name); + return EmitNounwindRuntimeCall(callee, std::nullopt, name); } /// Emits a call to the given nounwind runtime function. @@ -4466,7 +4511,7 @@ CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, /// runtime function. llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { - return EmitRuntimeCall(callee, None, name); + return EmitRuntimeCall(callee, std::nullopt, name); } // Calls which may throw must have operand bundles indicating which funclet @@ -4530,7 +4575,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke( llvm::CallBase * CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, const Twine &name) { - return EmitRuntimeCallOrInvoke(callee, None, name); + return EmitRuntimeCallOrInvoke(callee, std::nullopt, name); } /// Emits a call or invoke instruction to the given runtime function. @@ -4580,7 +4625,7 @@ namespace { /// Specify given \p NewAlign as the alignment of return value attribute. If /// such attribute already exists, re-set it to the maximal one of two options. -LLVM_NODISCARD llvm::AttributeList +[[nodiscard]] llvm::AttributeList maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx, const llvm::AttributeList &Attrs, llvm::Align NewAlign) { @@ -4611,7 +4656,7 @@ protected: public: /// If we can, materialize the alignment as an attribute on return value. - LLVM_NODISCARD llvm::AttributeList + [[nodiscard]] llvm::AttributeList TryEmitAsCallSiteAttribute(const llvm::AttributeList &Attrs) { if (!AA || OffsetCI || CGF.SanOpts.has(SanitizerKind::Alignment)) return Attrs; @@ -4680,7 +4725,7 @@ public: static unsigned getMaxVectorWidth(const llvm::Type *Ty) { if (auto *VT = dyn_cast<llvm::VectorType>(Ty)) - return VT->getPrimitiveSizeInBits().getKnownMinSize(); + return VT->getPrimitiveSizeInBits().getKnownMinValue(); if (auto *AT = dyn_cast<llvm::ArrayType>(Ty)) return getMaxVectorWidth(AT->getElementType()); @@ -4821,6 +4866,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); + bool ArgHasMaybeUndefAttr = + IsArgumentMaybeUndef(TargetDecl, CallInfo.getNumRequiredArgs(), ArgNo); + switch (ArgInfo.getKind()) { case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); @@ -4879,7 +4927,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Make a temporary alloca to pass the argument. Address Addr = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); - IRCallArgs[FirstIRArg] = Addr.getPointer(); + + llvm::Value *Val = Addr.getPointer(); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(Addr.getPointer()); + IRCallArgs[FirstIRArg] = Val; I->copyInto(*this, Addr); } else { @@ -4937,7 +4989,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Create an aligned temporary, and copy to it. Address AI = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); - IRCallArgs[FirstIRArg] = AI.getPointer(); + llvm::Value *Val = AI.getPointer(); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(AI.getPointer()); + IRCallArgs[FirstIRArg] = Val; // Emit lifetime markers for the temporary alloca. llvm::TypeSize ByvalTempElementSize = @@ -4956,9 +5011,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto *T = llvm::PointerType::getWithSamePointeeType( cast<llvm::PointerType>(V->getType()), CGM.getDataLayout().getAllocaAddrSpace()); - IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast( + + llvm::Value *Val = getTargetHooks().performAddrSpaceCast( *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(Val); + IRCallArgs[FirstIRArg] = Val; } } break; @@ -5012,6 +5071,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, V->getType() != IRFuncTy->getParamType(FirstIRArg)) V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg)); + if (ArgHasMaybeUndefAttr) + V = Builder.CreateFreeze(V); IRCallArgs[FirstIRArg] = V; break; } @@ -5056,6 +5117,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Address EltPtr = Builder.CreateStructGEP(Src, i); llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMaybeUndefAttr) + LI = Builder.CreateFreeze(LI); IRCallArgs[FirstIRArg + i] = LI; } } else { @@ -5072,6 +5135,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType())) Load = EmitCMSEClearRecord(Load, ATy, I->Ty); } + + if (ArgHasMaybeUndefAttr) + Load = Builder.CreateFreeze(Load); IRCallArgs[FirstIRArg] = Load; } @@ -5095,15 +5161,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::Type *scalarType = RV.getScalarVal()->getType(); auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType); - auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); + auto scalarAlign = CGM.getDataLayout().getPrefTypeAlign(scalarType); // Materialize to a temporary. - addr = - CreateTempAlloca(RV.getScalarVal()->getType(), - CharUnits::fromQuantity(std::max( - layout->getAlignment().value(), scalarAlign)), - "tmp", - /*ArraySize=*/nullptr, &AllocaAddr); + addr = CreateTempAlloca( + RV.getScalarVal()->getType(), + CharUnits::fromQuantity(std::max(layout->getAlignment(), scalarAlign)), + "tmp", + /*ArraySize=*/nullptr, &AllocaAddr); tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); @@ -5117,6 +5182,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = Builder.CreateLoad(eltAddr); + if (ArgHasMaybeUndefAttr) + elt = Builder.CreateFreeze(elt); IRCallArgs[IRArgPos++] = elt; } assert(IRArgPos == FirstIRArg + NumIRArgs); @@ -5324,6 +5391,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SmallVector<llvm::OperandBundleDef, 1> BundleList = getBundlesForFunclet(CalleePtr); + if (SanOpts.has(SanitizerKind::KCFI) && + !isa_and_nonnull<FunctionDecl>(TargetDecl)) + EmitKCFIOperandBundle(ConcreteCallee, BundleList); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) if (FD->hasAttr<StrictFPAttr>()) // All calls within a strictfp function are marked strictfp @@ -5506,7 +5577,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateStore(elt, eltAddr); } // FALLTHROUGH - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ABIArgInfo::InAlloca: diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index c6696c4df775..0795ea598411 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" #include "llvm/Transforms/Utils/SanitizerStats.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -1505,7 +1506,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { } // Fallthrough: act like we're in the base variant. - LLVM_FALLTHROUGH; + [[fallthrough]]; case Dtor_Base: assert(Body); @@ -1649,23 +1650,58 @@ namespace { } }; - static void EmitSanitizerDtorCallback(CodeGenFunction &CGF, llvm::Value *Ptr, - CharUnits::QuantityType PoisonSize) { + class DeclAsInlineDebugLocation { + CGDebugInfo *DI; + llvm::MDNode *InlinedAt; + std::optional<ApplyDebugLocation> Location; + + public: + DeclAsInlineDebugLocation(CodeGenFunction &CGF, const NamedDecl &Decl) + : DI(CGF.getDebugInfo()) { + if (!DI) + return; + InlinedAt = DI->getInlinedAt(); + DI->setInlinedAt(CGF.Builder.getCurrentDebugLocation()); + Location.emplace(CGF, Decl.getLocation()); + } + + ~DeclAsInlineDebugLocation() { + if (!DI) + return; + Location.reset(); + DI->setInlinedAt(InlinedAt); + } + }; + + static void EmitSanitizerDtorCallback( + CodeGenFunction &CGF, StringRef Name, llvm::Value *Ptr, + std::optional<CharUnits::QuantityType> PoisonSize = {}) { CodeGenFunction::SanitizerScope SanScope(&CGF); // Pass in void pointer and size of region as arguments to runtime // function - llvm::Value *Args[] = {CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy), - llvm::ConstantInt::get(CGF.SizeTy, PoisonSize)}; + SmallVector<llvm::Value *, 2> Args = { + CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy)}; + SmallVector<llvm::Type *, 2> ArgTypes = {CGF.VoidPtrTy}; - llvm::Type *ArgTypes[] = {CGF.VoidPtrTy, CGF.SizeTy}; + if (PoisonSize.has_value()) { + Args.emplace_back(llvm::ConstantInt::get(CGF.SizeTy, *PoisonSize)); + ArgTypes.emplace_back(CGF.SizeTy); + } llvm::FunctionType *FnType = llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false); - llvm::FunctionCallee Fn = - CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback"); + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FnType, Name); + CGF.EmitNounwindRuntimeCall(Fn, Args); } + static void + EmitSanitizerDtorFieldsCallback(CodeGenFunction &CGF, llvm::Value *Ptr, + CharUnits::QuantityType PoisonSize) { + EmitSanitizerDtorCallback(CGF, "__sanitizer_dtor_callback_fields", Ptr, + PoisonSize); + } + /// Poison base class with a trivial destructor. struct SanitizeDtorTrivialBase final : EHScopeStack::Cleanup { const CXXRecordDecl *BaseClass; @@ -1687,7 +1723,11 @@ namespace { if (!BaseSize.isPositive()) return; - EmitSanitizerDtorCallback(CGF, Addr.getPointer(), BaseSize.getQuantity()); + // Use the base class declaration location as inline DebugLocation. All + // fields of the class are destroyed. + DeclAsInlineDebugLocation InlineHere(CGF, *BaseClass); + EmitSanitizerDtorFieldsCallback(CGF, Addr.getPointer(), + BaseSize.getQuantity()); // Prevent the current stack frame from disappearing from the stack trace. CGF.CurFn->addFnAttr("disable-tail-calls", "true"); @@ -1735,7 +1775,10 @@ namespace { if (!PoisonSize.isPositive()) return; - EmitSanitizerDtorCallback(CGF, OffsetPtr, PoisonSize.getQuantity()); + // Use the top field declaration location as inline DebugLocation. + DeclAsInlineDebugLocation InlineHere( + CGF, **std::next(Dtor->getParent()->field_begin(), StartIndex)); + EmitSanitizerDtorFieldsCallback(CGF, OffsetPtr, PoisonSize.getQuantity()); // Prevent the current stack frame from disappearing from the stack trace. CGF.CurFn->addFnAttr("disable-tail-calls", "true"); @@ -1752,15 +1795,13 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { assert(Dtor->getParent()->isDynamicClass()); (void)Dtor; - ASTContext &Context = CGF.getContext(); // Poison vtable and vtable ptr if they exist for this class. llvm::Value *VTablePtr = CGF.LoadCXXThis(); - CharUnits::QuantityType PoisonSize = - Context.toCharUnitsFromBits(CGF.PointerWidthInBits).getQuantity(); // Pass in void pointer and size of region as arguments to runtime // function - EmitSanitizerDtorCallback(CGF, VTablePtr, PoisonSize); + EmitSanitizerDtorCallback(CGF, "__sanitizer_dtor_callback_vptr", + VTablePtr); } }; @@ -1768,12 +1809,12 @@ namespace { ASTContext &Context; EHScopeStack &EHStack; const CXXDestructorDecl *DD; - llvm::Optional<unsigned> StartIndex; + std::optional<unsigned> StartIndex; public: SanitizeDtorCleanupBuilder(ASTContext &Context, EHScopeStack &EHStack, const CXXDestructorDecl *DD) - : Context(Context), EHStack(EHStack), DD(DD), StartIndex(llvm::None) {} + : Context(Context), EHStack(EHStack), DD(DD), StartIndex(std::nullopt) {} void PushCleanupForField(const FieldDecl *Field) { if (Field->isZeroSize(Context)) return; @@ -1782,15 +1823,15 @@ namespace { if (!StartIndex) StartIndex = FieldIndex; } else if (StartIndex) { - EHStack.pushCleanup<SanitizeDtorFieldRange>( - NormalAndEHCleanup, DD, StartIndex.value(), FieldIndex); - StartIndex = None; + EHStack.pushCleanup<SanitizeDtorFieldRange>(NormalAndEHCleanup, DD, + *StartIndex, FieldIndex); + StartIndex = std::nullopt; } } void End() { if (StartIndex) EHStack.pushCleanup<SanitizeDtorFieldRange>(NormalAndEHCleanup, DD, - StartIndex.value(), -1); + *StartIndex, -1); } }; } // end anonymous namespace @@ -2543,7 +2584,7 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true) ->getPointerTo(ProgAS) ->getPointerTo(GlobalsAS); - // vtable field is is derived from `this` pointer, therefore they should be in + // vtable field is derived from `this` pointer, therefore they should be in // the same addr space. Note that this might not be LLVM address space 0. VTableField = Builder.CreateElementBitCast(VTableField, VTablePtrTy); VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); @@ -2955,7 +2996,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); // Add the rest of the parameters. - for (auto param : BD->parameters()) + for (auto *param : BD->parameters()) EmitDelegateCallArg(CallArgs, param, param->getBeginLoc()); assert(!Lambda->isGenericLambda() && @@ -2969,12 +3010,13 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { // Start building arguments for forwarding call CallArgList CallArgs; - QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda)); - llvm::Value *ThisPtr = llvm::UndefValue::get(getTypes().ConvertType(ThisType)); - CallArgs.add(RValue::get(ThisPtr), ThisType); + QualType LambdaType = getContext().getRecordType(Lambda); + QualType ThisType = getContext().getPointerType(LambdaType); + Address ThisPtr = CreateMemTemp(LambdaType, "unused.capture"); + CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); // Add the rest of the parameters. - for (auto Param : MD->parameters()) + for (auto *Param : MD->parameters()) EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc()); const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 5035ed34358d..43758ac27e43 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -556,7 +556,7 @@ static llvm::BasicBlock *SimplifyCleanupEntry(CodeGenFunction &CGF, Entry->replaceAllUsesWith(Pred); // Merge the blocks. - Pred->getInstList().splice(Pred->end(), Entry->getInstList()); + Pred->splice(Pred->end(), Entry); // Kill the entry block. Entry->eraseFromParent(); @@ -942,7 +942,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // Append the prepared cleanup prologue from above. llvm::BasicBlock *NormalExit = Builder.GetInsertBlock(); for (unsigned I = 0, E = InstsToAppend.size(); I != E; ++I) - NormalExit->getInstList().push_back(InstsToAppend[I]); + InstsToAppend[I]->insertInto(NormalExit, NormalExit->end()); // Optimistically hope that any fixups will continue falling through. for (unsigned I = FixupDepth, E = EHStack.getNumBranchFixups(); @@ -1016,8 +1016,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // throwing cleanups. For funclet EH personalities, the cleanupendpad models // program termination when cleanups throw. bool PushedTerminate = false; - SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); llvm::CleanupPadInst *CPI = nullptr; const EHPersonality &Personality = EHPersonality::get(*this); @@ -1336,7 +1335,8 @@ static void EmitSehScope(CodeGenFunction &CGF, CGF.getBundlesForFunclet(SehCppScope.getCallee()); if (CGF.CurrentFuncletPad) BundleList.emplace_back("funclet", CGF.CurrentFuncletPad); - CGF.Builder.CreateInvoke(SehCppScope, Cont, InvokeDest, None, BundleList); + CGF.Builder.CreateInvoke(SehCppScope, Cont, InvokeDest, std::nullopt, + BundleList); CGF.EmitBlock(Cont); } diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 594c7d49df3c..775a4341558a 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -539,7 +539,7 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate()); // Create mapping between parameters and copy-params for coroutine function. - auto ParamMoves = S.getParamMoves(); + llvm::ArrayRef<const Stmt *> ParamMoves = S.getParamMoves(); assert( (ParamMoves.size() == 0 || (ParamMoves.size() == FnArgs.size())) && "ParamMoves and FnArgs should be the same size for coroutine function"); @@ -673,9 +673,23 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, } CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin " "has been used earlier in this function"); - auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); return RValue::get(NullPtr); } + case llvm::Intrinsic::coro_size: { + auto &Context = getContext(); + CanQualType SizeTy = Context.getSizeType(); + llvm::IntegerType *T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::coro_size, T); + return RValue::get(Builder.CreateCall(F)); + } + case llvm::Intrinsic::coro_align: { + auto &Context = getContext(); + CanQualType SizeTy = Context.getSizeType(); + llvm::IntegerType *T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::coro_align, T); + return RValue::get(Builder.CreateCall(F)); + } // The following three intrinsics take a token parameter referring to a token // returned by earlier call to @llvm.coro.id. Since we cannot represent it in // builtins, we patch it up here. @@ -689,7 +703,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_id has" " been used earlier in this function"); // Fallthrough to the next case to add TokenNone as the first argument. - LLVM_FALLTHROUGH; + [[fallthrough]]; } // @llvm.coro.suspend takes a token parameter. Add token 'none' as the first // argument. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 94c48316add7..3bde43cc1db3 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -26,6 +26,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/VTableBuilder.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" @@ -47,7 +48,10 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/SHA256.h" #include "llvm/Support/TimeProfiler.h" +#include <optional> using namespace clang; using namespace clang::CodeGen; @@ -342,35 +346,44 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { return StringRef(); } -Optional<llvm::DIFile::ChecksumKind> -CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { +std::optional<llvm::DIFile::ChecksumKind> +CGDebugInfo::computeChecksum(FileID FID, SmallString<64> &Checksum) const { Checksum.clear(); if (!CGM.getCodeGenOpts().EmitCodeView && CGM.getCodeGenOpts().DwarfVersion < 5) - return None; + return std::nullopt; SourceManager &SM = CGM.getContext().getSourceManager(); - Optional<llvm::MemoryBufferRef> MemBuffer = SM.getBufferOrNone(FID); + std::optional<llvm::MemoryBufferRef> MemBuffer = SM.getBufferOrNone(FID); if (!MemBuffer) - return None; + return std::nullopt; - llvm::toHex( - llvm::MD5::hash(llvm::arrayRefFromStringRef(MemBuffer->getBuffer())), - /*LowerCase*/ true, Checksum); - return llvm::DIFile::CSK_MD5; + auto Data = llvm::arrayRefFromStringRef(MemBuffer->getBuffer()); + switch (CGM.getCodeGenOpts().getDebugSrcHash()) { + case clang::CodeGenOptions::DSH_MD5: + llvm::toHex(llvm::MD5::hash(Data), /*LowerCase=*/true, Checksum); + return llvm::DIFile::CSK_MD5; + case clang::CodeGenOptions::DSH_SHA1: + llvm::toHex(llvm::SHA1::hash(Data), /*LowerCase=*/true, Checksum); + return llvm::DIFile::CSK_SHA1; + case clang::CodeGenOptions::DSH_SHA256: + llvm::toHex(llvm::SHA256::hash(Data), /*LowerCase=*/true, Checksum); + return llvm::DIFile::CSK_SHA256; + } + llvm_unreachable("Unhandled DebugSrcHashKind enum"); } -Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM, - FileID FID) { +std::optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM, + FileID FID) { if (!CGM.getCodeGenOpts().EmbedSource) - return None; + return std::nullopt; bool SourceInvalid = false; StringRef Source = SM.getBufferData(FID, &SourceInvalid); if (SourceInvalid) - return None; + return std::nullopt; return Source; } @@ -405,19 +418,20 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { return cast<llvm::DIFile>(V); } - SmallString<32> Checksum; + SmallString<64> Checksum; - Optional<llvm::DIFile::ChecksumKind> CSKind = computeChecksum(FID, Checksum); - Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; + std::optional<llvm::DIFile::ChecksumKind> CSKind = + computeChecksum(FID, Checksum); + std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; if (CSKind) CSInfo.emplace(*CSKind, Checksum); return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); } -llvm::DIFile * -CGDebugInfo::createFile(StringRef FileName, - Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, - Optional<StringRef> Source) { +llvm::DIFile *CGDebugInfo::createFile( + StringRef FileName, + std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, + std::optional<StringRef> Source) { StringRef Dir; StringRef File; std::string RemappedFile = remapDIPath(FileName); @@ -499,9 +513,9 @@ StringRef CGDebugInfo::getCurrentDirname() { } void CGDebugInfo::CreateCompileUnit() { - SmallString<32> Checksum; - Optional<llvm::DIFile::ChecksumKind> CSKind; - Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; + SmallString<64> Checksum; + std::optional<llvm::DIFile::ChecksumKind> CSKind; + std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; // Should we be asking the SourceManager for the main file name, instead of // accepting it as an argument? This just causes the main file name to @@ -512,7 +526,8 @@ void CGDebugInfo::CreateCompileUnit() { // Get absolute path name. SourceManager &SM = CGM.getContext().getSourceManager(); - std::string MainFileName = CGM.getCodeGenOpts().MainFileName; + auto &CGO = CGM.getCodeGenOpts(); + std::string MainFileName = CGO.MainFileName; if (MainFileName.empty()) MainFileName = "<stdin>"; @@ -521,7 +536,7 @@ void CGDebugInfo::CreateCompileUnit() { // a relative path, so we look into the actual file entry for the main // file to determine the real absolute path for the file. std::string MainFileDir; - if (Optional<FileEntryRef> MainFile = + if (OptionalFileEntryRef MainFile = SM.getFileEntryRefForID(SM.getMainFileID())) { MainFileDir = std::string(MainFile->getDir().getName()); if (!llvm::sys::path::is_absolute(MainFileName)) { @@ -548,11 +563,11 @@ void CGDebugInfo::CreateCompileUnit() { if (LO.CPlusPlus) { if (LO.ObjC) LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus; - else if (LO.CPlusPlus14 && (!CGM.getCodeGenOpts().DebugStrictDwarf || - CGM.getCodeGenOpts().DwarfVersion >= 5)) + else if (CGO.DebugStrictDwarf && CGO.DwarfVersion < 5) + LangTag = llvm::dwarf::DW_LANG_C_plus_plus; + else if (LO.CPlusPlus14) LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14; - else if (LO.CPlusPlus11 && (!CGM.getCodeGenOpts().DebugStrictDwarf || - CGM.getCodeGenOpts().DwarfVersion >= 5)) + else if (LO.CPlusPlus11) LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11; else LangTag = llvm::dwarf::DW_LANG_C_plus_plus; @@ -563,6 +578,8 @@ void CGDebugInfo::CreateCompileUnit() { LangTag = llvm::dwarf::DW_LANG_OpenCL; } else if (LO.RenderScript) { LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; + } else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) { + LangTag = llvm::dwarf::DW_LANG_C11; } else if (LO.C99) { LangTag = llvm::dwarf::DW_LANG_C99; } else { @@ -883,10 +900,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return DBuilder.createBasicType(BTName, Size, Encoding); } -llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) { - return DBuilder.createUnspecifiedType("auto"); -} - llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) { StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt"; @@ -1137,13 +1150,12 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, QualType PointeeTy, llvm::DIFile *Unit) { // Bit size, align and offset of the type. - // Size is always the size of a pointer. We can't use getTypeSize here - // because that does not return the correct value for references. - unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy); - uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace); + // Size is always the size of a pointer. + uint64_t Size = CGM.getContext().getTypeSize(Ty); auto Align = getTypeAlignIfRequired(Ty, CGM.getContext()); - Optional<unsigned> DWARFAddressSpace = - CGM.getTarget().getDWARFAddressSpace(AddressSpace); + std::optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace( + CGM.getTypes().getTargetAddressSpace(PointeeTy)); SmallVector<llvm::Metadata *, 4> Annots; auto *BTFAttrTy = dyn_cast<BTFTagAttributedType>(PointeeTy); @@ -1266,18 +1278,31 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, assert(Ty->isTypeAlias()); llvm::DIType *Src = getOrCreateType(Ty->getAliasedType(), Unit); - auto *AliasDecl = - cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl()) - ->getTemplatedDecl(); + const TemplateDecl *TD = Ty->getTemplateName().getAsTemplateDecl(); + if (isa<BuiltinTemplateDecl>(TD)) + return Src; + const auto *AliasDecl = cast<TypeAliasTemplateDecl>(TD)->getTemplatedDecl(); if (AliasDecl->hasAttr<NoDebugAttr>()) return Src; SmallString<128> NS; llvm::raw_svector_ostream OS(NS); - Ty->getTemplateName().print(OS, getPrintingPolicy(), - TemplateName::Qualified::None); - printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy()); + + auto PP = getPrintingPolicy(); + Ty->getTemplateName().print(OS, PP, TemplateName::Qualified::None); + + // Disable PrintCanonicalTypes here because we want + // the DW_AT_name to benefit from the TypePrinter's ability + // to skip defaulted template arguments. + // + // FIXME: Once -gsimple-template-names is enabled by default + // and we attach template parameters to alias template DIEs + // we don't need to worry about customizing the PrintingPolicy + // here anymore. + PP.PrintCanonicalTypes = false; + printTemplateArgumentList(OS, Ty->template_arguments(), PP, + TD->getTemplateParameters()); SourceLocation Loc = AliasDecl->getLocation(); return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc), @@ -1285,6 +1310,33 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, getDeclContextDescriptor(AliasDecl)); } +/// Convert an AccessSpecifier into the corresponding DINode flag. +/// As an optimization, return 0 if the access specifier equals the +/// default for the containing type. +static llvm::DINode::DIFlags getAccessFlag(AccessSpecifier Access, + const RecordDecl *RD) { + AccessSpecifier Default = clang::AS_none; + if (RD && RD->isClass()) + Default = clang::AS_private; + else if (RD && (RD->isStruct() || RD->isUnion())) + Default = clang::AS_public; + + if (Access == Default) + return llvm::DINode::FlagZero; + + switch (Access) { + case clang::AS_private: + return llvm::DINode::FlagPrivate; + case clang::AS_protected: + return llvm::DINode::FlagProtected; + case clang::AS_public: + return llvm::DINode::FlagPublic; + case clang::AS_none: + return llvm::DINode::FlagZero; + } + llvm_unreachable("unexpected access enumerator"); +} + llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile *Unit) { llvm::DIType *Underlying = @@ -1300,10 +1352,16 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, uint32_t Align = getDeclAlignIfRequired(Ty->getDecl(), CGM.getContext()); // Typedefs are derived from some other type. llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(Ty->getDecl()); + + llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + const DeclContext *DC = Ty->getDecl()->getDeclContext(); + if (isa<RecordDecl>(DC)) + Flags = getAccessFlag(Ty->getDecl()->getAccess(), cast<RecordDecl>(DC)); + return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc), getDeclContextDescriptor(Ty->getDecl()), Align, - Annotations); + Flags, Annotations); } static unsigned getDwarfCC(CallingConv CC) { @@ -1397,33 +1455,6 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, return F; } -/// Convert an AccessSpecifier into the corresponding DINode flag. -/// As an optimization, return 0 if the access specifier equals the -/// default for the containing type. -static llvm::DINode::DIFlags getAccessFlag(AccessSpecifier Access, - const RecordDecl *RD) { - AccessSpecifier Default = clang::AS_none; - if (RD && RD->isClass()) - Default = clang::AS_private; - else if (RD && (RD->isStruct() || RD->isUnion())) - Default = clang::AS_public; - - if (Access == Default) - return llvm::DINode::FlagZero; - - switch (Access) { - case clang::AS_private: - return llvm::DINode::FlagPrivate; - case clang::AS_protected: - return llvm::DINode::FlagProtected; - case clang::AS_public: - return llvm::DINode::FlagPublic; - case clang::AS_none: - return llvm::DINode::FlagZero; - } - llvm_unreachable("unexpected access enumerator"); -} - llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, llvm::DIScope *RecordTy, const RecordDecl *RD) { @@ -1497,7 +1528,7 @@ void CGDebugInfo::CollectRecordLambdaFields( if (C.capturesVariable()) { SourceLocation Loc = C.getLocation(); assert(!Field->isBitField() && "lambdas don't have bitfield members!"); - VarDecl *V = C.getCapturedVar(); + ValueDecl *V = C.getCapturedVar(); StringRef VName = V->getName(); llvm::DIFile *VUnit = getOrCreateFile(Loc); auto Align = getDeclAlignIfRequired(V, CGM.getContext()); @@ -1637,28 +1668,31 @@ void CGDebugInfo::CollectRecordFields( } else if (CGM.getCodeGenOpts().EmitCodeView) { // Debug info for nested types is included in the member list only for // CodeView. - if (const auto *nestedType = dyn_cast<TypeDecl>(I)) + if (const auto *nestedType = dyn_cast<TypeDecl>(I)) { + // MSVC doesn't generate nested type for anonymous struct/union. + if (isa<RecordDecl>(I) && + cast<RecordDecl>(I)->isAnonymousStructOrUnion()) + continue; if (!nestedType->isImplicit() && nestedType->getDeclContext() == record) CollectRecordNestedType(nestedType, elements); + } } } } llvm::DISubroutineType * CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method, - llvm::DIFile *Unit, bool decl) { - const auto *Func = Method->getType()->castAs<FunctionProtoType>(); + llvm::DIFile *Unit) { + const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>(); if (Method->isStatic()) return cast_or_null<llvm::DISubroutineType>( getOrCreateType(QualType(Func, 0), Unit)); - return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit, decl); + return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit); } -llvm::DISubroutineType * -CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr, - const FunctionProtoType *Func, - llvm::DIFile *Unit, bool decl) { +llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( + QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit) { FunctionProtoType::ExtProtoInfo EPI = Func->getExtProtoInfo(); Qualifiers &Qc = EPI.TypeQuals; Qc.removeConst(); @@ -1681,31 +1715,19 @@ CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr, assert(Args.size() && "Invalid number of arguments!"); SmallVector<llvm::Metadata *, 16> Elts; - // First element is always return type. For 'void' functions it is NULL. - QualType temp = Func->getReturnType(); - if (temp->getTypeClass() == Type::Auto && decl) { - const AutoType *AT = cast<AutoType>(temp); - // It may be tricky in some cases to link the specification back the lambda - // call operator and so we skip emitting "auto" for lambdas. This is - // consistent with gcc as well. - if (AT->isDeduced() && ThisPtr->getPointeeCXXRecordDecl()->isLambda()) - Elts.push_back(getOrCreateType(AT->getDeducedType(), Unit)); - else - Elts.push_back(CreateType(AT)); - } else - Elts.push_back(Args[0]); + // First element is always return type. For 'void' functions it is NULL. + Elts.push_back(Args[0]); // "this" pointer is always first argument. const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl(); if (isa<ClassTemplateSpecializationDecl>(RD)) { // Create pointer type directly in this case. const PointerType *ThisPtrTy = cast<PointerType>(ThisPtr); - QualType PointeeTy = ThisPtrTy->getPointeeType(); - unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy); - uint64_t Size = CGM.getTarget().getPointerWidth(AS); + uint64_t Size = CGM.getContext().getTypeSize(ThisPtrTy); auto Align = getTypeAlignIfRequired(ThisPtrTy, CGM.getContext()); - llvm::DIType *PointeeType = getOrCreateType(PointeeTy, Unit); + llvm::DIType *PointeeType = + getOrCreateType(ThisPtrTy->getPointeeType(), Unit); llvm::DIType *ThisPtrType = DBuilder.createPointerType(PointeeType, Size, Align); TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); @@ -1747,7 +1769,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method); StringRef MethodName = getFunctionName(Method); - llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit, true); + llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit); // Since a single ctor/dtor corresponds to multiple functions, it doesn't // make sense to give a single ctor/dtor a linkage name. @@ -1775,7 +1797,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; int ThisAdjustment = 0; - if (Method->isVirtual()) { + if (VTableContextBase::hasVtableSlot(Method)) { if (Method->isPure()) SPFlags |= llvm::DISubprogram::SPFlagPureVirtual; else @@ -1979,7 +2001,7 @@ void CGDebugInfo::CollectCXXBasesAux( } llvm::DINodeArray -CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs, +CGDebugInfo::CollectTemplateParams(std::optional<TemplateArgs> OArgs, llvm::DIFile *Unit) { if (!OArgs) return llvm::DINodeArray(); @@ -1989,35 +2011,23 @@ CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs, const TemplateArgument &TA = Args.Args[i]; StringRef Name; bool defaultParameter = false; - if (Args.TList) + if (Args.TList) { Name = Args.TList->getParam(i)->getName(); + + NamedDecl const *ND = Args.TList->getParam(i); + defaultParameter = clang::isSubstitutedDefaultArgument( + CGM.getContext(), TA, ND, Args.Args, Args.TList->getDepth()); + } + switch (TA.getKind()) { case TemplateArgument::Type: { llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit); - - if (Args.TList) - if (auto *templateType = - dyn_cast_or_null<TemplateTypeParmDecl>(Args.TList->getParam(i))) - if (templateType->hasDefaultArgument()) - defaultParameter = - templateType->getDefaultArgument() == TA.getAsType(); - TemplateParams.push_back(DBuilder.createTemplateTypeParameter( TheCU, Name, TTy, defaultParameter)); } break; case TemplateArgument::Integral: { llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit); - if (Args.TList && CGM.getCodeGenOpts().DwarfVersion >= 5) - if (auto *templateType = dyn_cast_or_null<NonTypeTemplateParmDecl>( - Args.TList->getParam(i))) - if (templateType->hasDefaultArgument() && - !templateType->getDefaultArgument()->isValueDependent()) - defaultParameter = llvm::APSInt::isSameValue( - templateType->getDefaultArgument()->EvaluateKnownConstInt( - CGM.getContext()), - TA.getAsIntegral()); - TemplateParams.push_back(DBuilder.createTemplateValueParameter( TheCU, Name, TTy, defaultParameter, llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral()))); @@ -2093,7 +2103,7 @@ CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs, TA.getAsTemplate().getAsTemplateDecl()->printQualifiedName( OS, getPrintingPolicy()); TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( - TheCU, Name, nullptr, OS.str())); + TheCU, Name, nullptr, OS.str(), defaultParameter)); break; } case TemplateArgument::Pack: @@ -2122,7 +2132,7 @@ CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs, return DBuilder.getOrCreateArray(TemplateParams); } -Optional<CGDebugInfo::TemplateArgs> +std::optional<CGDebugInfo::TemplateArgs> CGDebugInfo::GetTemplateArgs(const FunctionDecl *FD) const { if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplateSpecialization) { @@ -2131,22 +2141,22 @@ CGDebugInfo::GetTemplateArgs(const FunctionDecl *FD) const { ->getTemplateParameters(); return {{TList, FD->getTemplateSpecializationArgs()->asArray()}}; } - return None; + return std::nullopt; } -Optional<CGDebugInfo::TemplateArgs> +std::optional<CGDebugInfo::TemplateArgs> CGDebugInfo::GetTemplateArgs(const VarDecl *VD) const { // Always get the full list of parameters, not just the ones from the // specialization. A partial specialization may have fewer parameters than // there are arguments. auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VD); if (!TS) - return None; + return std::nullopt; VarTemplateDecl *T = TS->getSpecializedTemplate(); const TemplateParameterList *TList = T->getTemplateParameters(); auto TA = TS->getTemplateArgs().asArray(); return {{TList, TA}}; } -Optional<CGDebugInfo::TemplateArgs> +std::optional<CGDebugInfo::TemplateArgs> CGDebugInfo::GetTemplateArgs(const RecordDecl *RD) const { if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) { // Always get the full list of parameters, not just the ones from the @@ -2157,7 +2167,7 @@ CGDebugInfo::GetTemplateArgs(const RecordDecl *RD) const { const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); return {{TPList, TAList.asArray()}}; } - return None; + return std::nullopt; } llvm::DINodeArray @@ -2202,7 +2212,7 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements); unsigned Size = Context.getTypeSize(Context.VoidPtrTy); unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); - Optional<unsigned> DWARFAddressSpace = + std::optional<unsigned> DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); llvm::DIType *vtbl_ptr_type = DBuilder.createPointerType( @@ -2299,7 +2309,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData; unsigned VTableWidth = PtrWidth * VSlotCount; unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); - Optional<unsigned> DWARFAddressSpace = + std::optional<unsigned> DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); // Create a very wide void* type and insert it directly in the element list. @@ -2356,7 +2366,7 @@ void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI, return; llvm::MDNode *node; if (AllocatedTy->isVoidType()) - node = llvm::MDNode::get(CGM.getLLVMContext(), None); + node = llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt); else node = getOrCreateType(AllocatedTy, getOrCreateFile(Loc)); @@ -2766,8 +2776,12 @@ llvm::DIModule *CGDebugInfo::getOrCreateModuleRef(ASTSourceDescriptor Mod, llvm::DIBuilder DIB(CGM.getModule()); SmallString<0> PCM; - if (!llvm::sys::path::is_absolute(Mod.getASTFile())) - PCM = Mod.getPath(); + if (!llvm::sys::path::is_absolute(Mod.getASTFile())) { + if (CGM.getHeaderSearchOpts().ModuleFileHomeIsCwd) + PCM = getCurrentDirname(); + else + PCM = Mod.getPath(); + } llvm::sys::path::append(PCM, Mod.getASTFile()); DIB.createCompileUnit( TheCU->getSourceLanguage(), @@ -2928,6 +2942,9 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, else if (Field->getAccessControl() == ObjCIvarDecl::Public) Flags = llvm::DINode::FlagPublic; + if (Field->isBitField()) + Flags |= llvm::DINode::FlagBitField; + llvm::MDNode *PropertyNode = nullptr; if (ObjCImplementationDecl *ImpD = ID->getImplementation()) { if (ObjCPropertyImplDecl *PImpD = @@ -3160,7 +3177,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, return DBuilder.createMemberPointerType( getOrCreateInstanceMethodType( CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()), - FPT, U, false), + FPT, U), ClassType, Size, /*Align=*/0, Flags); } @@ -3285,7 +3302,7 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType(); break; case Type::TypeOf: - T = cast<TypeOfType>(T)->getUnderlyingType(); + T = cast<TypeOfType>(T)->getUnmodifiedType(); break; case Type::Decltype: T = cast<DecltypeType>(T)->getUnderlyingType(); @@ -3351,7 +3368,7 @@ void CGDebugInfo::completeTemplateDefinition( } void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { - if (DebugKind <= codegenoptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly || D.isDynamicClass()) return; completeClassData(&D); @@ -3613,7 +3630,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { // them distinct if they are ODR-uniqued. if (Identifier.empty()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case llvm::dwarf::DW_TAG_structure_type: case llvm::dwarf::DW_TAG_union_type: @@ -3910,7 +3927,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { return SP; } - for (auto NextFD : FD->redecls()) { + for (auto *NextFD : FD->redecls()) { auto MI = SPCache.find(NextFD->getCanonicalDecl()); if (MI != SPCache.end()) { auto *SP = dyn_cast_or_null<llvm::DISubprogram>(MI->second); @@ -3968,10 +3985,11 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, !CGM.getCodeGenOpts().EmitCodeView)) // Create fake but valid subroutine type. Otherwise -verify would fail, and // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields. - return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None)); + return DBuilder.createSubroutineType( + DBuilder.getOrCreateTypeArray(std::nullopt)); if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) - return getOrCreateMethodType(Method, F, false); + return getOrCreateMethodType(Method, F); const auto *FTy = FnType->getAs<FunctionType>(); CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C; @@ -4097,8 +4115,12 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (Name.startswith("\01")) Name = Name.substr(1); + assert((!D || !isa<VarDecl>(D) || + GD.getDynamicInitKind() != DynamicInitKind::NoStub) && + "Unexpected DynamicInitKind !"); + if (!HasDecl || D->isImplicit() || D->hasAttr<ArtificialAttr>() || - (isa<VarDecl>(D) && GD.getDynamicInitKind() != DynamicInitKind::NoStub)) { + isa<VarDecl>(D) || isa<CapturedDecl>(D)) { Flags |= llvm::DINode::FlagArtificial; // Artificial functions should not silently reuse CurLoc. CurLoc = SourceLocation(); @@ -4196,10 +4218,28 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, SPFlags |= llvm::DISubprogram::SPFlagOptimized; llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D); - llvm::DISubprogram *SP = DBuilder.createFunction( - FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, - TParamsArray.get(), getFunctionDeclaration(D), nullptr, Annotations); + llvm::DISubroutineType *STy = getOrCreateFunctionType(D, FnType, Unit); + llvm::DISubprogram *SP = + DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo, STy, + ScopeLine, Flags, SPFlags, TParamsArray.get(), + getFunctionDeclaration(D), nullptr, Annotations); + + // Preserve btf_decl_tag attributes for parameters of extern functions + // for BPF target. The parameters created in this loop are attached as + // DISubprogram's retainedNodes in the subsequent finalizeSubprogram call. + if (IsDeclForCallSite && CGM.getTarget().getTriple().isBPF()) { + if (auto *FD = dyn_cast<FunctionDecl>(D)) { + llvm::DITypeRefArray ParamTypes = STy->getTypeArray(); + unsigned ArgNo = 1; + for (ParmVarDecl *PD : FD->parameters()) { + llvm::DINodeArray ParamAnnotations = CollectBTFDeclTagAnnotations(PD); + DBuilder.createParameterVariable( + SP, PD->getName(), ArgNo, Unit, LineNo, ParamTypes[ArgNo], true, + llvm::DINode::FlagZero, ParamAnnotations); + ++ArgNo; + } + } + } if (IsDeclForCallSite) Fn->setSubprogram(SP); @@ -4218,17 +4258,11 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, if (Func->getSubprogram()) return; - // Do not emit a declaration subprogram for a builtin, a function with nodebug - // attribute, or if call site info isn't required. Also, elide declarations - // for functions with reserved names, as call site-related features aren't - // interesting in this case (& also, the compiler may emit calls to these - // functions without debug locations, which makes the verifier complain). - if (CalleeDecl->getBuiltinID() != 0 || CalleeDecl->hasAttr<NoDebugAttr>() || + // Do not emit a declaration subprogram for a function with nodebug + // attribute, or if call site info isn't required. + if (CalleeDecl->hasAttr<NoDebugAttr>() || getCallSiteRelatedAttrs() == llvm::DINode::FlagZero) return; - if (CalleeDecl->isReserved(CGM.getLangOpts()) != - ReservedIdentifierStatus::NotReserved) - return; // If there is no DISubprogram attached to the function being called, // create the one describing the function in order to have complete @@ -4282,7 +4316,7 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { void CGDebugInfo::AppendAddressSpaceXDeref( unsigned AddressSpace, SmallVectorImpl<uint64_t> &Expr) const { - Optional<unsigned> DWARFAddressSpace = + std::optional<unsigned> DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(AddressSpace); if (!DWARFAddressSpace) return; @@ -4379,7 +4413,7 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, CharUnits Align = CGM.getContext().getDeclAlign(VD); if (Align > CGM.getContext().toCharUnitsFromBits( - CGM.getTarget().getPointerAlign(0))) { + CGM.getTarget().getPointerAlign(LangAS::Default))) { CharUnits FieldOffsetInBytes = CGM.getContext().toCharUnitsFromBits(FieldOffset); CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align); @@ -4413,7 +4447,7 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, - llvm::Optional<unsigned> ArgNo, + std::optional<unsigned> ArgNo, CGBuilderTy &Builder, const bool UsePointerValue) { assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); @@ -4453,7 +4487,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); - unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType()); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(VD->getType()); AppendAddressSpaceXDeref(AddressSpace, Expr); // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an @@ -4479,7 +4513,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of __forwarding field offset = CGM.getContext().toCharUnitsFromBits( - CGM.getTarget().getPointerWidth(0)); + CGM.getTarget().getPointerWidth(LangAS::Default)); Expr.push_back(offset.getQuantity()); Expr.push_back(llvm::dwarf::DW_OP_deref); Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); @@ -4593,7 +4627,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD, llvm::Value *Storage, - llvm::Optional<unsigned> ArgNo, + std::optional<unsigned> ArgNo, CGBuilderTy &Builder, const bool UsePointerValue) { assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); @@ -4614,7 +4648,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD, return nullptr; auto Align = getDeclAlignIfRequired(BD, CGM.getContext()); - unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(BD->getType()); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(BD->getType()); SmallVector<uint64_t, 3> Expr; AppendAddressSpaceXDeref(AddressSpace, Expr); @@ -4684,11 +4718,11 @@ CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, if (auto *DD = dyn_cast<DecompositionDecl>(VD)) for (auto *B : DD->bindings()) { - EmitDeclare(B, Storage, llvm::None, Builder, + EmitDeclare(B, Storage, std::nullopt, Builder, VD->getType()->isReferenceType()); } - return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue); + return EmitDeclare(VD, Storage, std::nullopt, Builder, UsePointerValue); } void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) { @@ -5139,7 +5173,7 @@ std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const { if (!CGM.getCodeGenOpts().hasReducedDebugInfo()) TemplateNamesKind = codegenoptions::DebugTemplateNamesKind::Full; - Optional<TemplateArgs> Args; + std::optional<TemplateArgs> Args; bool IsOperatorOverload = false; // isa<CXXConversionDecl>(ND); if (auto *RD = dyn_cast<CXXRecordDecl>(ND)) { @@ -5293,8 +5327,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, auto Align = getDeclAlignIfRequired(D, CGM.getContext()); SmallVector<uint64_t, 4> Expr; - unsigned AddressSpace = - CGM.getContext().getTargetAddressSpace(D->getType()); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(D->getType()); if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { if (D->hasAttr<CUDASharedAttr>()) AddressSpace = @@ -5380,10 +5413,18 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { llvm::DIExpression *InitExpr = nullptr; if (CGM.getContext().getTypeSize(VD->getType()) <= 64) { // FIXME: Add a representation for integer constants wider than 64 bits. - if (Init.isInt()) - InitExpr = - DBuilder.createConstantValueExpression(Init.getInt().getExtValue()); - else if (Init.isFloat()) + if (Init.isInt()) { + const llvm::APSInt &InitInt = Init.getInt(); + std::optional<uint64_t> InitIntOpt; + if (InitInt.isUnsigned()) + InitIntOpt = InitInt.tryZExtValue(); + else if (auto tmp = InitInt.trySExtValue(); tmp.has_value()) + // Transform a signed optional to unsigned optional. When cpp 23 comes, + // use std::optional::transform + InitIntOpt = (uint64_t)tmp.value(); + if (InitIntOpt) + InitExpr = DBuilder.createConstantValueExpression(InitIntOpt.value()); + } else if (Init.isFloat()) InitExpr = DBuilder.createConstantValueExpression( Init.getFloat().bitcastToAPInt().getZExtValue()); } diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 38e3fa5b2fa9..95484a060cd8 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -25,11 +25,11 @@ #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Optional.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Allocator.h" +#include <optional> namespace llvm { class MDNode; @@ -177,7 +177,6 @@ class CGDebugInfo { /// ivars and property accessors. llvm::DIType *CreateType(const BuiltinType *Ty); llvm::DIType *CreateType(const ComplexType *Ty); - llvm::DIType *CreateType(const AutoType *Ty); llvm::DIType *CreateType(const BitIntType *Ty); llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg); llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty, @@ -231,10 +230,10 @@ class CGDebugInfo { /// not updated to include implicit \c this pointer. Use this routine /// to get a method type which includes \c this pointer. llvm::DISubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method, - llvm::DIFile *F, bool decl); + llvm::DIFile *F); llvm::DISubroutineType * getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func, - llvm::DIFile *Unit, bool decl); + llvm::DIFile *Unit); llvm::DISubroutineType * getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F); /// \return debug info descriptor for vtable. @@ -280,7 +279,7 @@ class CGDebugInfo { llvm::ArrayRef<TemplateArgument> Args; }; /// A helper function to collect template parameters. - llvm::DINodeArray CollectTemplateParams(Optional<TemplateArgs> Args, + llvm::DINodeArray CollectTemplateParams(std::optional<TemplateArgs> Args, llvm::DIFile *Unit); /// A helper function to collect debug info for function template /// parameters. @@ -292,9 +291,9 @@ class CGDebugInfo { llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD, llvm::DIFile *Unit); - Optional<TemplateArgs> GetTemplateArgs(const VarDecl *) const; - Optional<TemplateArgs> GetTemplateArgs(const RecordDecl *) const; - Optional<TemplateArgs> GetTemplateArgs(const FunctionDecl *) const; + std::optional<TemplateArgs> GetTemplateArgs(const VarDecl *) const; + std::optional<TemplateArgs> GetTemplateArgs(const RecordDecl *) const; + std::optional<TemplateArgs> GetTemplateArgs(const FunctionDecl *) const; /// A helper function to collect debug info for template /// parameters. @@ -587,7 +586,7 @@ private: /// Returns a pointer to the DILocalVariable associated with the /// llvm.dbg.declare, or nullptr otherwise. llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI, - llvm::Optional<unsigned> ArgNo, + std::optional<unsigned> ArgNo, CGBuilderTy &Builder, const bool UsePointerValue = false); @@ -595,7 +594,7 @@ private: /// Returns a pointer to the DILocalVariable associated with the /// llvm.dbg.declare, or nullptr otherwise. llvm::DILocalVariable *EmitDeclare(const BindingDecl *decl, llvm::Value *AI, - llvm::Optional<unsigned> ArgNo, + std::optional<unsigned> ArgNo, CGBuilderTy &Builder, const bool UsePointerValue = false); @@ -631,11 +630,11 @@ private: void CreateCompileUnit(); /// Compute the file checksum debug info for input file ID. - Optional<llvm::DIFile::ChecksumKind> - computeChecksum(FileID FID, SmallString<32> &Checksum) const; + std::optional<llvm::DIFile::ChecksumKind> + computeChecksum(FileID FID, SmallString<64> &Checksum) const; /// Get the source of the given file ID. - Optional<StringRef> getSource(const SourceManager &SM, FileID FID); + std::optional<StringRef> getSource(const SourceManager &SM, FileID FID); /// Convenience function to get the file debug info descriptor for the input /// location. @@ -644,8 +643,8 @@ private: /// Create a file debug info descriptor for a source file. llvm::DIFile * createFile(StringRef FileName, - Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, - Optional<StringRef> Source); + std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, + std::optional<StringRef> Source); /// Get the type from the cache or create a new type if necessary. llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg); diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index f04af0d2cdf8..ceaddc4e694a 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -90,6 +91,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Export: case Decl::ObjCPropertyImpl: case Decl::FileScopeAsm: + case Decl::TopLevelStmt: case Decl::Friend: case Decl::FriendTemplate: case Decl::Block: @@ -100,6 +102,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::ObjCTypeParam: case Decl::Binding: case Decl::UnresolvedUsingIfExists: + case Decl::HLSLBuffer: llvm_unreachable("Declaration should not be in declstmts!"); case Decl::Record: // struct/union/class X; case Decl::CXXRecord: // struct/union/class X; [C++] @@ -126,6 +129,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::OMPRequires: case Decl::Empty: case Decl::Concept: + case Decl::ImplicitConceptSpecialization: case Decl::LifetimeExtendedTemporary: case Decl::RequiresExprBody: // None of these decls require codegen support. @@ -755,7 +759,7 @@ void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, if (!SanOpts.has(SanitizerKind::NullabilityAssign)) return; - auto Nullability = LHS.getType()->getNullability(getContext()); + auto Nullability = LHS.getType()->getNullability(); if (!Nullability || *Nullability != NullabilityKind::NonNull) return; @@ -839,7 +843,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, // If D is pseudo-strong, treat it like __unsafe_unretained here. This means // that we omit the retain, and causes non-autoreleased return values to be // immediately released. - LLVM_FALLTHROUGH; + [[fallthrough]]; } case Qualifiers::OCL_ExplicitNone: @@ -2612,7 +2616,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // function satisfy their nullability preconditions. This makes it necessary // to emit null checks for args in the function body itself. if (requiresReturnValueNullabilityCheck()) { - auto Nullability = Ty->getNullability(getContext()); + auto Nullability = Ty->getNullability(); if (Nullability && *Nullability == NullabilityKind::NonNull) { SanitizerScope SanScope(this); RetValNullabilityPrecondition = @@ -2695,7 +2699,7 @@ void CodeGenModule::EmitOMPAllocateDecl(const OMPAllocateDecl *D) { } } -llvm::Optional<CharUnits> +std::optional<CharUnits> CodeGenModule::getOMPAllocateAlignment(const VarDecl *VD) { if (const auto *AA = VD->getAttr<OMPAllocateDeclAttr>()) { if (Expr *Alignment = AA->getAlignment()) { @@ -2711,5 +2715,5 @@ CodeGenModule::getOMPAllocateAlignment(const VarDecl *VD) { std::max<unsigned>(UserAlign, NaturalAlign.getQuantity())); } } - return llvm::None; + return std::nullopt; } diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 949112c63cc9..dcd811ea257b 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CGCXXABI.h" +#include "CGHLSLRuntime.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" @@ -194,7 +195,7 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, // For example, in the above CUDA code, the static local variable s has a // "shared" address space qualifier, but the constructor of StructWithCtor // expects "this" in the "generic" address space. - unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(T); + unsigned ExpectedAddrSpace = getTypes().getTargetAddressSpace(T); unsigned ActualAddrSpace = GV->getAddressSpace(); llvm::Constant *DeclPtr = GV; if (ActualAddrSpace != ExpectedAddrSpace) { @@ -552,7 +553,18 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, CXXThreadLocalInits.push_back(Fn); CXXThreadLocalInitVars.push_back(D); } else if (PerformInit && ISA) { - EmitPointerToInitFunc(D, Addr, Fn, ISA); + // Contract with backend that "init_seg(compiler)" corresponds to priority + // 200 and "init_seg(lib)" corresponds to priority 400. + int Priority = -1; + if (ISA->getSection() == ".CRT$XCC") + Priority = 200; + else if (ISA->getSection() == ".CRT$XCL") + Priority = 400; + + if (Priority != -1) + AddGlobalCtor(Fn, Priority, ~0U, COMDATKey); + else + EmitPointerToInitFunc(D, Addr, Fn, ISA); } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) { OrderGlobalInitsOrStermFinalizers Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size()); @@ -576,8 +588,16 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded // too. - - AddGlobalCtor(Fn, 65535, COMDATKey); + I = DelayedCXXInitPosition.find(D); + // CXXGlobalInits.size() is the lex order number for the next deferred + // VarDecl. Use it when the current VarDecl is non-deferred. Although this + // lex order number is shared between current VarDecl and some following + // VarDecls, their order of insertion into `llvm.global_ctors` is the same + // as the lexing order and the following stable sort would preserve such + // order. + unsigned LexOrder = + I == DelayedCXXInitPosition.end() ? CXXGlobalInits.size() : I->second; + AddGlobalCtor(Fn, 65535, LexOrder, COMDATKey); if (COMDATKey && (getTriple().isOSBinFormatELF() || getTarget().getCXXABI().isMicrosoft())) { // When COMDAT is used on ELF or in the MS C++ ABI, the key must be in @@ -620,7 +640,12 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() { /* Build the initializer for a C++20 module: This is arranged to be run only once regardless of how many times the module - might be included transitively. This arranged by using a control variable. + might be included transitively. This arranged by using a guard variable. + + If there are no initalizers at all (and also no imported modules) we reduce + this to an empty function (since the Itanium ABI requires that this function + be available to a caller, which might be produced by a different + implementation). First we call any initializers for imported modules. We then call initializers for the Global Module Fragment (if present) @@ -632,13 +657,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { while (!CXXGlobalInits.empty() && !CXXGlobalInits.back()) CXXGlobalInits.pop_back(); - // We create the function, even if it is empty, since an importer of this - // module will refer to it unconditionally (for the current implementation - // there is no way for the importer to know that an importee does not need - // an initializer to be run). - + // As noted above, we create the function, even if it is empty. // Module initializers for imported modules are emitted first. - // Collect the modules that we import + + // Collect all the modules that we import SmallVector<Module *> AllImports; // Ones that we export for (auto I : Primary->Exports) @@ -649,8 +671,8 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { SmallVector<llvm::Function *, 8> ModuleInits; for (Module *M : AllImports) { - // No Itanium initializer in module map modules. - if (M->isModuleMapModule()) + // No Itanium initializer in header like modules. + if (M->isHeaderLikeModule()) continue; // TODO: warn of mixed use of module map modules and C++20? llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); SmallString<256> FnName; @@ -665,7 +687,6 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { FTy, llvm::Function::ExternalLinkage, FnName.str(), &getModule()); ModuleInits.push_back(Fn); } - AllImports.clear(); // Add any initializers with specified priority; this uses the same approach // as EmitCXXGlobalInitFunc(). @@ -683,13 +704,11 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { for (; I < PrioE; ++I) ModuleInits.push_back(I->second); } - PrioritizedCXXGlobalInits.clear(); } // Now append the ones without specified priority. - for (auto F : CXXGlobalInits) + for (auto *F : CXXGlobalInits) ModuleInits.push_back(F); - CXXGlobalInits.clear(); llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction(); @@ -699,7 +718,6 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { // each init is run just once (even though a module might be imported // multiple times via nested use). llvm::Function *Fn; - llvm::GlobalVariable *Guard = nullptr; { SmallString<256> InitFnName; llvm::raw_svector_ostream Out(InitFnName); @@ -709,18 +727,26 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { FTy, llvm::Twine(InitFnName), FI, SourceLocation(), false, llvm::GlobalVariable::ExternalLinkage); - Guard = new llvm::GlobalVariable(getModule(), Int8Ty, /*isConstant=*/false, - llvm::GlobalVariable::InternalLinkage, - llvm::ConstantInt::get(Int8Ty, 0), - InitFnName.str() + "__in_chrg"); + // If we have a completely empty initializer then we do not want to create + // the guard variable. + ConstantAddress GuardAddr = ConstantAddress::invalid(); + if (!AllImports.empty() || !PrioritizedCXXGlobalInits.empty() || + !CXXGlobalInits.empty()) { + // Create the guard var. + llvm::GlobalVariable *Guard = new llvm::GlobalVariable( + getModule(), Int8Ty, /*isConstant=*/false, + llvm::GlobalVariable::InternalLinkage, + llvm::ConstantInt::get(Int8Ty, 0), InitFnName.str() + "__in_chrg"); + CharUnits GuardAlign = CharUnits::One(); + Guard->setAlignment(GuardAlign.getAsAlign()); + GuardAddr = ConstantAddress(Guard, Int8Ty, GuardAlign); + } + CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits, + GuardAddr); } - CharUnits GuardAlign = CharUnits::One(); - Guard->setAlignment(GuardAlign.getAsAlign()); - CodeGenFunction(*this).GenerateCXXGlobalInitFunc( - Fn, ModuleInits, ConstantAddress(Guard, Int8Ty, GuardAlign)); - // We allow for the case that a module object is added to a linked binary - // without a specific call to the the initializer. This also ensure that + // We allow for the case that a module object is added to a linked binary + // without a specific call to the the initializer. This also ensures that // implementation partition initializers are called when the partition // is not imported as an interface. AddGlobalCtor(Fn); @@ -739,6 +765,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { Fn->addFnAttr("device-init"); } + // We are done with the inits. + AllImports.clear(); + PrioritizedCXXGlobalInits.clear(); + CXXGlobalInits.clear(); ModuleInits.clear(); } @@ -778,8 +808,8 @@ CodeGenModule::EmitCXXGlobalInitFunc() { SmallVector<llvm::Function *, 8> ModuleInits; if (CXX20ModuleInits) for (Module *M : ImportedModules) { - // No Itanium initializer in module map modules. - if (M->isModuleMapModule()) + // No Itanium initializer in header like modules. + if (M->isHeaderLikeModule()) continue; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); SmallString<256> FnName; @@ -824,7 +854,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() { // Prepend the module inits to the highest priority set. if (!ModuleInits.empty()) { - for (auto F : ModuleInits) + for (auto *F : ModuleInits) LocalCXXGlobalInits.push_back(F); ModuleInits.clear(); } @@ -842,7 +872,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() { CXXGlobalInits.empty()) return; - for (auto F : CXXGlobalInits) + for (auto *F : CXXGlobalInits) ModuleInits.push_back(F); CXXGlobalInits.clear(); @@ -977,6 +1007,9 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit); } + if (getLangOpts().HLSL) + CGM.getHLSLRuntime().annotateHLSLResource(D, Addr); + FinishFunction(); } diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 76c6beb090a9..6fa7871588f7 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -158,7 +158,7 @@ static const EHPersonality &getObjCPersonality(const TargetInfo &Target, case ObjCRuntime::GNUstep: if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7)) return EHPersonality::GNUstep_ObjC; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: if (L.hasSjLjExceptions()) @@ -249,7 +249,7 @@ const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { // For outlined finallys and filters, use the SEH personality in case they // contain more SEH. This mostly only affects finallys. Filters could // hypothetically use gnu statement expressions to sneak in nested SEH. - FD = FD ? FD : CGF.CurSEHParent; + FD = FD ? FD : CGF.CurSEHParent.getDecl(); return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD)); } @@ -1223,8 +1223,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // Wasm uses Windows-style EH instructions, but merges all catch clauses into // one big catchpad. So we save the old funclet pad here before we traverse // each catch handler. - SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); llvm::BasicBlock *WasmCatchStartBlock = nullptr; if (EHPersonality::get(*this).isWasmPersonality()) { auto *CatchSwitch = @@ -1257,8 +1256,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { RunCleanupsScope CatchScope(*this); // Initialize the catch variable and set up the cleanups. - SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); CGM.getCXXABI().emitBeginCatch(*this, C); // Emit the PGO counter increment. @@ -1582,8 +1580,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() { // Create the cleanuppad using the current parent pad as its token. Use 'none' // if this is a top-level terminate scope, which is the common case. - SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); llvm::Value *ParentPad = CurrentFuncletPad; if (!ParentPad) ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); @@ -1628,7 +1625,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) { llvm::Value *Sel = getSelectorFromSlot(); llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), Sel->getType()); - llvm::Value *LPadVal = llvm::UndefValue::get(LPadType); + llvm::Value *LPadVal = llvm::PoisonValue::get(LPadType); LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val"); @@ -2005,7 +2002,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, SmallString<128> Name; { llvm::raw_svector_ostream OS(Name); - const NamedDecl *ParentSEHFn = ParentCGF.CurSEHParent; + GlobalDecl ParentSEHFn = ParentCGF.CurSEHParent; assert(ParentSEHFn && "No CurSEHParent!"); MangleContext &Mangler = CGM.getCXXABI().getMangleContext(); if (IsFilter) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index bf3dd812b9e8..c26dd1b23321 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/SaveAndRestore.h" #include "llvm/Transforms/Utils/SanitizerStats.h" +#include <optional> #include <string> using namespace clang; @@ -123,7 +124,7 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name) { CharUnits Align = - CharUnits::fromQuantity(CGM.getDataLayout().getPrefTypeAlignment(Ty)); + CharUnits::fromQuantity(CGM.getDataLayout().getPrefTypeAlign(Ty)); return CreateTempAlloca(Ty, Align, Name); } @@ -875,52 +876,6 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, } } -/// Determine whether this expression refers to a flexible array member in a -/// struct. We disable array bounds checks for such members. -static bool isFlexibleArrayMemberExpr(const Expr *E, - unsigned StrictFlexArraysLevel) { - // For compatibility with existing code, we treat arrays of length 0 or - // 1 as flexible array members. - // FIXME: This is inconsistent with the warning code in SemaChecking. Unify - // the two mechanisms. - const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe(); - if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) { - // FIXME: Sema doesn't treat [1] as a flexible array member if the bound - // was produced by macro expansion. - if (StrictFlexArraysLevel >= 2 && CAT->getSize().ugt(0)) - return false; - // FIXME: While the default -fstrict-flex-arrays=0 permits Size>1 trailing - // arrays to be treated as flexible-array-members, we still emit ubsan - // checks as if they are not. - if (CAT->getSize().ugt(1)) - return false; - } else if (!isa<IncompleteArrayType>(AT)) - return false; - - E = E->IgnoreParens(); - - // A flexible array member must be the last member in the class. - if (const auto *ME = dyn_cast<MemberExpr>(E)) { - // FIXME: If the base type of the member expr is not FD->getParent(), - // this should not be treated as a flexible array member access. - if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { - // FIXME: Sema doesn't treat a T[1] union member as a flexible array - // member, only a T[0] or T[] member gets that treatment. - // Under StrictFlexArraysLevel, obey c99+ that disallows FAM in union, see - // C11 6.7.2.1 §18 - if (FD->getParent()->isUnion()) - return StrictFlexArraysLevel < 2; - RecordDecl::field_iterator FI( - DeclContext::decl_iterator(const_cast<FieldDecl *>(FD))); - return ++FI == FD->getParent()->field_end(); - } - } else if (const auto *IRE = dyn_cast<ObjCIvarRefExpr>(E)) { - return IRE->getDecl()->getNextIvar() == nullptr; - } - - return false; -} - llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E, QualType EltTy) { ASTContext &C = getContext(); @@ -965,7 +920,8 @@ llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E, static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, const Expr *Base, QualType &IndexedType, - unsigned StrictFlexArraysLevel) { + LangOptions::StrictFlexArraysLevelKind + StrictFlexArraysLevel) { // For the vector indexing extension, the bound is the number of elements. if (const VectorType *VT = Base->getType()->getAs<VectorType>()) { IndexedType = Base->getType(); @@ -976,7 +932,8 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, if (const auto *CE = dyn_cast<CastExpr>(Base)) { if (CE->getCastKind() == CK_ArrayToPointerDecay && - !isFlexibleArrayMemberExpr(CE->getSubExpr(), StrictFlexArraysLevel)) { + !CE->getSubExpr()->isFlexibleArrayMemberLike(CGF.getContext(), + StrictFlexArraysLevel)) { IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) @@ -1003,7 +960,8 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, "should not be called unless adding bounds checks"); SanitizerScope SanScope(this); - const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays; + const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); QualType IndexedType; llvm::Value *Bound = @@ -1426,6 +1384,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E)); case Expr::ExtVectorElementExprClass: return EmitExtVectorElementExpr(cast<ExtVectorElementExpr>(E)); + case Expr::CXXThisExprClass: + return MakeAddrLValue(LoadCXXThisAddress(), E->getType()); case Expr::MemberExprClass: return EmitMemberExpr(cast<MemberExpr>(E)); case Expr::CompoundLiteralExprClass: @@ -1661,21 +1621,7 @@ static bool getRangeForType(CodeGenFunction &CGF, QualType Ty, End = llvm::APInt(CGF.getContext().getTypeSize(Ty), 2); } else { const EnumDecl *ED = ET->getDecl(); - llvm::Type *LTy = CGF.ConvertTypeForMem(ED->getIntegerType()); - unsigned Bitwidth = LTy->getScalarSizeInBits(); - unsigned NumNegativeBits = ED->getNumNegativeBits(); - unsigned NumPositiveBits = ED->getNumPositiveBits(); - - if (NumNegativeBits) { - unsigned NumBits = std::max(NumNegativeBits, NumPositiveBits + 1); - assert(NumBits <= Bitwidth); - End = llvm::APInt(Bitwidth, 1) << (NumBits - 1); - Min = -End; - } else { - assert(NumPositiveBits <= Bitwidth); - End = llvm::APInt(Bitwidth, 1) << NumPositiveBits; - Min = llvm::APInt::getZero(Bitwidth); - } + ED->getValueRange(End, Min); } return true; } @@ -1743,6 +1689,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, bool isNontemporal) { + if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr.getPointer())) + if (GV->isThreadLocal()) + Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV)); + if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { // Boolean vectors use `iN` as storage type. if (ClangVecTy->isExtVectorBoolType()) { @@ -1802,8 +1752,11 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, // In order to prevent the optimizer from throwing away the check, don't // attach range metadata to the load. } else if (CGM.getCodeGenOpts().OptimizationLevel > 0) - if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) + if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) { Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo); + Load->setMetadata(llvm::LLVMContext::MD_noundef, + llvm::MDNode::get(getLLVMContext(), std::nullopt)); + } return EmitFromMemory(Load, Ty); } @@ -1884,6 +1837,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, bool isInit, bool isNontemporal) { + if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr.getPointer())) + if (GV->isThreadLocal()) + Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV)); + llvm::Type *SrcTy = Value->getType(); if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy); @@ -2537,16 +2494,18 @@ static LValue EmitThreadPrivateVarDeclLValue( static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType T) { - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - // Return an invalid address if variable is MT_To and unified - // memory is not enabled. For all other cases: MT_Link and - // MT_To with unified memory, return a valid address. - if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To && + // Return an invalid address if variable is MT_To (or MT_Enter starting with + // OpenMP 5.2) and unified memory is not enabled. For all other cases: MT_Link + // and MT_To (or MT_Enter) with unified memory, return a valid address. + if (!Res || ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) return Address::invalid(); assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) && "Expected link clause OR to clause with unified memory enabled."); QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); @@ -2614,6 +2573,10 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, } llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD); + + if (VD->getTLSKind() != VarDecl::TLS_None) + V = CGF.Builder.CreateThreadLocalAddress(V); + llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); @@ -2785,7 +2748,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { getContext().getDeclAlign(VD)); llvm::Type *VarTy = getTypes().ConvertTypeForMem(VD->getType()); auto *PTy = llvm::PointerType::get( - VarTy, getContext().getTargetAddressSpace(VD->getType())); + VarTy, getTypes().getTargetAddressSpace(VD->getType())); Addr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PTy, VarTy); } else { // Should we be using the alignment of the constant pointer we emitted? @@ -2883,6 +2846,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); } + // Handle threadlocal function locals. + if (VD->getTLSKind() != VarDecl::TLS_None) + addr = + addr.withPointer(Builder.CreateThreadLocalAddress(addr.getPointer())); // Check for OpenMP threadprivate variables. if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd && @@ -2940,8 +2907,13 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // FIXME: While we're emitting a binding from an enclosing scope, all other // DeclRefExprs we see should be implicitly treated as if they also refer to // an enclosing scope. - if (const auto *BD = dyn_cast<BindingDecl>(ND)) + if (const auto *BD = dyn_cast<BindingDecl>(ND)) { + if (E->refersToEnclosingVariableOrCapture()) { + auto *FD = LambdaCaptureFields.lookup(BD); + return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); + } return EmitLValue(BD->getBinding()); + } // We can form DeclRefExprs naming GUID declarations when reconstituting // non-type template parameters into expressions. @@ -3090,10 +3062,9 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { // Format the type name as if for a diagnostic, including quotes and // optionally an 'aka'. SmallString<32> Buffer; - CGM.getDiags().ConvertArgToString(DiagnosticsEngine::ak_qualtype, - (intptr_t)T.getAsOpaquePtr(), - StringRef(), StringRef(), None, Buffer, - None); + CGM.getDiags().ConvertArgToString( + DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(), + StringRef(), std::nullopt, Buffer, std::nullopt); llvm::Constant *Components[] = { Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo), @@ -3122,7 +3093,7 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) { // Floating-point types which fit into intptr_t are bitcast to integers // and then passed directly (after zero-extension, if necessary). if (V->getType()->isFloatingPointTy()) { - unsigned Bits = V->getType()->getPrimitiveSizeInBits().getFixedSize(); + unsigned Bits = V->getType()->getPrimitiveSizeInBits().getFixedValue(); if (Bits <= TargetTy->getIntegerBitWidth()) V = Builder.CreateBitCast(V, llvm::Type::getIntNTy(getLLVMContext(), Bits)); @@ -3186,7 +3157,8 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) { auto FilenameGV = CGM.GetAddrOfConstantCString(std::string(FilenameString), ".src"); CGM.getSanitizerMetadata()->disableSanitizerForGlobal( - cast<llvm::GlobalVariable>(FilenameGV.getPointer())); + cast<llvm::GlobalVariable>( + FilenameGV.getPointer()->stripPointerCasts())); Filename = FilenameGV.getPointer(); Line = PLoc.getLine(); Column = PLoc.getColumn(); @@ -3244,7 +3216,7 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, CheckRecoverableKind RecoverKind, bool IsFatal, llvm::BasicBlock *ContBB) { assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable); - Optional<ApplyDebugLocation> DL; + std::optional<ApplyDebugLocation> DL; if (!CGF.Builder.getCurrentDebugLocation()) { // Ensure that the call has at least an artificial debug location. DL.emplace(CGF, SourceLocation()); @@ -3292,7 +3264,7 @@ void CodeGenFunction::EmitCheck( assert(IsSanitizerScope); assert(Checked.size() > 0); assert(CheckHandler >= 0 && - size_t(CheckHandler) < llvm::array_lengthof(SanitizerHandlers)); + size_t(CheckHandler) < std::size(SanitizerHandlers)); const StringRef CheckName = SanitizerHandlers[CheckHandler].Name; llvm::Value *FatalCond = nullptr; @@ -3354,13 +3326,15 @@ void CodeGenFunction::EmitCheck( // Emit handler arguments and create handler function type. if (!StaticArgs.empty()) { llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); - auto *InfoPtr = - new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, - llvm::GlobalVariable::PrivateLinkage, Info); + auto *InfoPtr = new llvm::GlobalVariable( + CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info, "", nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGM.getDataLayout().getDefaultGlobalsAddressSpace()); InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); - ArgTypes.push_back(Int8PtrTy); + Args.push_back(EmitCastToVoidPtr(InfoPtr)); + ArgTypes.push_back(Args.back()->getType()); } for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { @@ -3561,7 +3535,7 @@ void CodeGenFunction::EmitUnreachable(SourceLocation Loc) { EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), SanitizerKind::Unreachable), SanitizerHandler::BuiltinUnreachable, - EmitCheckSourceLocation(Loc), None); + EmitCheckSourceLocation(Loc), std::nullopt); } Builder.CreateUnreachable(); } @@ -3576,7 +3550,8 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, TrapBBs.resize(CheckHandlerID + 1); llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID]; - if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB) { + if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB || + (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) { TrapBB = createBasicBlock("trap"); Builder.CreateCondBr(Checked, Cont, TrapBB); EmitBlock(TrapBB); @@ -3755,7 +3730,7 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG - for (auto idx : indices.drop_back()) + for (auto *idx : indices.drop_back()) assert(isa<llvm::ConstantInt>(idx) && cast<llvm::ConstantInt>(idx)->isZero()); #endif @@ -4038,14 +4013,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, llvm::APSInt ConstLength; if (Length) { // Idx = LowerBound + Length - 1; - if (Optional<llvm::APSInt> CL = Length->getIntegerConstantExpr(C)) { + if (std::optional<llvm::APSInt> CL = Length->getIntegerConstantExpr(C)) { ConstLength = CL->zextOrTrunc(PointerWidthInBits); Length = nullptr; } auto *LowerBound = E->getLowerBound(); llvm::APSInt ConstLowerBound(PointerWidthInBits, /*isUnsigned=*/false); if (LowerBound) { - if (Optional<llvm::APSInt> LB = LowerBound->getIntegerConstantExpr(C)) { + if (std::optional<llvm::APSInt> LB = + LowerBound->getIntegerConstantExpr(C)) { ConstLowerBound = LB->zextOrTrunc(PointerWidthInBits); LowerBound = nullptr; } @@ -4085,7 +4061,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, : BaseTy; if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) { Length = VAT->getSizeExpr(); - if (Optional<llvm::APSInt> L = Length->getIntegerConstantExpr(C)) { + if (std::optional<llvm::APSInt> L = Length->getIntegerConstantExpr(C)) { ConstLength = *L; Length = nullptr; } @@ -4292,7 +4268,7 @@ unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex) { unsigned I = 0, Skipped = 0; - for (auto F : Rec->getDefinition()->fields()) { + for (auto *F : Rec->getDefinition()->fields()) { if (I == FieldIndex) break; if (F->isUnnamedBitfield()) @@ -4596,11 +4572,11 @@ LValue CodeGenFunction::EmitInitListLValue(const InitListExpr *E) { /// Emit the operand of a glvalue conditional operator. This is either a glvalue /// or a (possibly-parenthesized) throw-expression. If this is a throw, no /// LValue is returned and the current block has been terminated. -static Optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF, - const Expr *Operand) { +static std::optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF, + const Expr *Operand) { if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Operand->IgnoreParens())) { CGF.EmitCXXThrowExpr(ThrowExpr, /*KeepInsertionPoint*/false); - return None; + return std::nullopt; } return CGF.EmitLValue(Operand); @@ -4609,7 +4585,7 @@ static Optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF, namespace { // Handle the case where the condition is a constant evaluatable simple integer, // which means we don't have to separately handle the true/false blocks. -llvm::Optional<LValue> HandleConditionalOperatorLValueSimpleCase( +std::optional<LValue> HandleConditionalOperatorLValueSimpleCase( CodeGenFunction &CGF, const AbstractConditionalOperator *E) { const Expr *condExpr = E->getCond(); bool CondExprBool; @@ -4635,11 +4611,11 @@ llvm::Optional<LValue> HandleConditionalOperatorLValueSimpleCase( return CGF.EmitLValue(Live); } } - return llvm::None; + return std::nullopt; } struct ConditionalInfo { llvm::BasicBlock *lhsBlock, *rhsBlock; - Optional<LValue> LHS, RHS; + std::optional<LValue> LHS, RHS; }; // Create and generate the 3 blocks for a conditional operator. @@ -4649,8 +4625,8 @@ ConditionalInfo EmitConditionalBlocks(CodeGenFunction &CGF, const AbstractConditionalOperator *E, const FuncTy &BranchGenFunc) { ConditionalInfo Info{CGF.createBasicBlock("cond.true"), - CGF.createBasicBlock("cond.false"), llvm::None, - llvm::None}; + CGF.createBasicBlock("cond.false"), std::nullopt, + std::nullopt}; llvm::BasicBlock *endBlock = CGF.createBasicBlock("cond.end"); CodeGenFunction::ConditionalEvaluation eval(CGF); @@ -4708,7 +4684,7 @@ LValue CodeGenFunction::EmitConditionalOperatorLValue( } OpaqueValueMapping binding(*this, expr); - if (llvm::Optional<LValue> Res = + if (std::optional<LValue> Res = HandleConditionalOperatorLValueSimpleCase(*this, expr)) return *Res; @@ -5046,7 +5022,9 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { if (auto builtinID = FD->getBuiltinID()) { std::string NoBuiltinFD = ("no-builtin-" + FD->getName()).str(); std::string NoBuiltins = "no-builtins"; - std::string FDInlineName = (FD->getName() + ".inline").str(); + + StringRef Ident = CGF.CGM.getMangledName(GD); + std::string FDInlineName = (Ident + ".inline").str(); bool IsPredefinedLibFunction = CGF.getContext().BuiltinInfo.isPredefinedLibFunction(builtinID); @@ -5271,6 +5249,15 @@ llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface, return CGM.getObjCRuntime().EmitIvarOffset(*this, Interface, Ivar); } +llvm::Value * +CodeGenFunction::EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface, + const ObjCIvarDecl *Ivar) { + llvm::Value *OffsetValue = EmitIvarOffset(Interface, Ivar); + QualType PointerDiffType = getContext().getPointerDiffType(); + return Builder.CreateZExtOrTrunc(OffsetValue, + getTypes().ConvertType(PointerDiffType)); +} + LValue CodeGenFunction::EmitLValueForIvar(QualType ObjectTy, llvm::Value *BaseValue, const ObjCIvarDecl *Ivar, diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 73b05690537d..34e535a78dd6 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -87,8 +87,9 @@ public: void EmitMoveFromReturnSlot(const Expr *E, RValue Src); - void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, - QualType ArrayQTy, InitListExpr *E); + void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, QualType ArrayQTy, + Expr *ExprToVisit, ArrayRef<Expr *> Args, + Expr *ArrayFiller); AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) { if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T)) @@ -172,6 +173,9 @@ public: void VisitAbstractConditionalOperator(const AbstractConditionalOperator *CO); void VisitChooseExpr(const ChooseExpr *CE); void VisitInitListExpr(InitListExpr *E); + void VisitCXXParenListOrInitListExpr(Expr *ExprToVisit, ArrayRef<Expr *> Args, + FieldDecl *InitializedFieldInUnion, + Expr *ArrayFiller); void VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, llvm::Value *outerBegin = nullptr); void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E); @@ -201,10 +205,22 @@ public: return EmitFinalDestCopy(E->getType(), LV); } - CGF.EmitPseudoObjectRValue(E, EnsureSlot(E->getType())); + AggValueSlot Slot = EnsureSlot(E->getType()); + bool NeedsDestruction = + !Slot.isExternallyDestructed() && + E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct; + if (NeedsDestruction) + Slot.setExternallyDestructed(); + CGF.EmitPseudoObjectRValue(E, Slot); + if (NeedsDestruction) + CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Slot.getAddress(), + E->getType()); } void VisitVAArgExpr(VAArgExpr *E); + void VisitCXXParenListInitExpr(CXXParenListInitExpr *E); + void VisitCXXParenListOrInitListExpr(Expr *ExprToVisit, ArrayRef<Expr *> Args, + Expr *ArrayFiller); void EmitInitializationToLValue(Expr *E, LValue Address); void EmitNullInitializationToLValue(LValue Address); @@ -471,10 +487,12 @@ static bool isTrivialFiller(Expr *E) { return false; } -/// Emit initialization of an array from an initializer list. +/// Emit initialization of an array from an initializer list. ExprToVisit must +/// be either an InitListEpxr a CXXParenInitListExpr. void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, - QualType ArrayQTy, InitListExpr *E) { - uint64_t NumInitElements = E->getNumInits(); + QualType ArrayQTy, Expr *ExprToVisit, + ArrayRef<Expr *> Args, Expr *ArrayFiller) { + uint64_t NumInitElements = Args.size(); uint64_t NumArrayElements = AType->getNumElements(); assert(NumInitElements <= NumArrayElements); @@ -503,7 +521,8 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CodeGen::CodeGenModule &CGM = CGF.CGM; ConstantEmitter Emitter(CGF); LangAS AS = ArrayQTy.getAddressSpace(); - if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) { + if (llvm::Constant *C = + Emitter.tryEmitForInitializer(ExprToVisit, AS, ArrayQTy)) { auto GV = new llvm::GlobalVariable( CGM.getModule(), C->getType(), CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true), @@ -568,12 +587,11 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, LValue elementLV = CGF.MakeAddrLValue( Address(element, llvmElementType, elementAlign), elementType); - EmitInitializationToLValue(E->getInit(i), elementLV); + EmitInitializationToLValue(Args[i], elementLV); } // Check whether there's a non-trivial array-fill expression. - Expr *filler = E->getArrayFiller(); - bool hasTrivialFiller = isTrivialFiller(filler); + bool hasTrivialFiller = isTrivialFiller(ArrayFiller); // Any remaining elements need to be zero-initialized, possibly // using the filler expression. We can skip this if the we're @@ -616,8 +634,8 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CodeGenFunction::RunCleanupsScope CleanupsScope(CGF); LValue elementLV = CGF.MakeAddrLValue( Address(currentElement, llvmElementType, elementAlign), elementType); - if (filler) - EmitInitializationToLValue(filler, elementLV); + if (ArrayFiller) + EmitInitializationToLValue(ArrayFiller, elementLV); else EmitNullInitializationToLValue(elementLV); } @@ -850,7 +868,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { return; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case CK_NoOp: @@ -1591,46 +1609,64 @@ void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) { } } +void AggExprEmitter::VisitCXXParenListInitExpr(CXXParenListInitExpr *E) { + VisitCXXParenListOrInitListExpr(E, E->getInitExprs(), + E->getInitializedFieldInUnion(), + E->getArrayFiller()); +} + void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { + if (E->hadArrayRangeDesignator()) + CGF.ErrorUnsupported(E, "GNU array range designator extension"); + + if (E->isTransparent()) + return Visit(E->getInit(0)); + + VisitCXXParenListOrInitListExpr( + E, E->inits(), E->getInitializedFieldInUnion(), E->getArrayFiller()); +} + +void AggExprEmitter::VisitCXXParenListOrInitListExpr( + Expr *ExprToVisit, ArrayRef<Expr *> InitExprs, + FieldDecl *InitializedFieldInUnion, Expr *ArrayFiller) { #if 0 // FIXME: Assess perf here? Figure out what cases are worth optimizing here // (Length of globals? Chunks of zeroed-out space?). // // If we can, prefer a copy from a global; this is a lot less code for long // globals, and it's easier for the current optimizers to analyze. - if (llvm::Constant* C = CGF.CGM.EmitConstantExpr(E, E->getType(), &CGF)) { + if (llvm::Constant *C = + CGF.CGM.EmitConstantExpr(ExprToVisit, ExprToVisit->getType(), &CGF)) { llvm::GlobalVariable* GV = new llvm::GlobalVariable(CGF.CGM.getModule(), C->getType(), true, llvm::GlobalValue::InternalLinkage, C, ""); - EmitFinalDestCopy(E->getType(), CGF.MakeAddrLValue(GV, E->getType())); + EmitFinalDestCopy(ExprToVisit->getType(), + CGF.MakeAddrLValue(GV, ExprToVisit->getType())); return; } #endif - if (E->hadArrayRangeDesignator()) - CGF.ErrorUnsupported(E, "GNU array range designator extension"); - - if (E->isTransparent()) - return Visit(E->getInit(0)); - AggValueSlot Dest = EnsureSlot(E->getType()); + AggValueSlot Dest = EnsureSlot(ExprToVisit->getType()); - LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType()); + LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), ExprToVisit->getType()); // Handle initialization of an array. - if (E->getType()->isArrayType()) { + if (ExprToVisit->getType()->isArrayType()) { auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType()); - EmitArrayInit(Dest.getAddress(), AType, E->getType(), E); + EmitArrayInit(Dest.getAddress(), AType, ExprToVisit->getType(), ExprToVisit, + InitExprs, ArrayFiller); return; } - assert(E->getType()->isRecordType() && "Only support structs/unions here!"); + assert(ExprToVisit->getType()->isRecordType() && + "Only support structs/unions here!"); // Do struct initialization; this code just sets each individual member // to the approprate value. This makes bitfield support automatic; // the disadvantage is that the generated code is more difficult for // the optimizer, especially with bitfields. - unsigned NumInitElements = E->getNumInits(); - RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl(); + unsigned NumInitElements = InitExprs.size(); + RecordDecl *record = ExprToVisit->getType()->castAs<RecordType>()->getDecl(); // We'll need to enter cleanup scopes in case any of the element // initializers throws an exception. @@ -1648,7 +1684,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Emit initialization of base classes. if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) { - assert(E->getNumInits() >= CXXRD->getNumBases() && + assert(NumInitElements >= CXXRD->getNumBases() && "missing initializer for base class"); for (auto &Base : CXXRD->bases()) { assert(!Base.isVirtual() && "should not see vbases here"); @@ -1662,7 +1698,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, CGF.getOverlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); - CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); + CGF.EmitAggExpr(InitExprs[curInitIndex++], AggSlot); if (QualType::DestructionKind dtorKind = Base.getType().isDestructedType()) { @@ -1678,25 +1714,25 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { if (record->isUnion()) { // Only initialize one field of a union. The field itself is // specified by the initializer list. - if (!E->getInitializedFieldInUnion()) { + if (!InitializedFieldInUnion) { // Empty union; we have nothing to do. #ifndef NDEBUG // Make sure that it's really an empty and not a failure of // semantic analysis. for (const auto *Field : record->fields()) - assert(Field->isUnnamedBitfield() && "Only unnamed bitfields allowed"); + assert((Field->isUnnamedBitfield() || Field->isAnonymousStructOrUnion()) && "Only unnamed bitfields or ananymous class allowed"); #endif return; } // FIXME: volatility - FieldDecl *Field = E->getInitializedFieldInUnion(); + FieldDecl *Field = InitializedFieldInUnion; LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestLV, Field); if (NumInitElements) { // Store the initializer into the field - EmitInitializationToLValue(E->getInit(0), FieldLoc); + EmitInitializationToLValue(InitExprs[0], FieldLoc); } else { // Default-initialize to null. EmitNullInitializationToLValue(FieldLoc); @@ -1720,7 +1756,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // have a zeroed object, and the rest of the fields are // zero-initializable. if (curInitIndex == NumInitElements && Dest.isZeroed() && - CGF.getTypes().isZeroInitializable(E->getType())) + CGF.getTypes().isZeroInitializable(ExprToVisit->getType())) break; @@ -1730,7 +1766,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { if (curInitIndex < NumInitElements) { // Store the initializer into the field. - EmitInitializationToLValue(E->getInit(curInitIndex++), LV); + EmitInitializationToLValue(InitExprs[curInitIndex++], LV); } else { // We're out of initializers; default-initialize to null EmitNullInitializationToLValue(LV); @@ -1926,7 +1962,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { // Reference values are always non-null and have the width of a pointer. if (Field->getType()->isReferenceType()) NumNonZeroBytes += CGF.getContext().toCharUnitsFromBits( - CGF.getTarget().getPointerWidth(0)); + CGF.getTarget().getPointerWidth(LangAS::Default)); else NumNonZeroBytes += GetNumNonZeroBytesInInit(E, CGF); } diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 3cc144361542..b889a4e05ee1 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -646,7 +646,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, case CXXConstructExpr::CK_VirtualBase: ForVirtualBase = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case CXXConstructExpr::CK_NonVirtualBase: Type = Ctor_Base; diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 5409e82d437e..7a14a418c7b6 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -206,12 +206,13 @@ public: return VisitPrePostIncDec(E, true, true); } ComplexPairTy VisitUnaryDeref(const Expr *E) { return EmitLoadOfLValue(E); } - ComplexPairTy VisitUnaryPlus (const UnaryOperator *E) { - TestAndClearIgnoreReal(); - TestAndClearIgnoreImag(); - return Visit(E->getSubExpr()); - } - ComplexPairTy VisitUnaryMinus (const UnaryOperator *E); + + ComplexPairTy VisitUnaryPlus(const UnaryOperator *E, + QualType PromotionType = QualType()); + ComplexPairTy VisitPlus(const UnaryOperator *E, QualType PromotionType); + ComplexPairTy VisitUnaryMinus(const UnaryOperator *E, + QualType PromotionType = QualType()); + ComplexPairTy VisitMinus(const UnaryOperator *E, QualType PromotionType); ComplexPairTy VisitUnaryNot (const UnaryOperator *E); // LNot,Real,Imag never return complex. ComplexPairTy VisitUnaryExtension(const UnaryOperator *E) { @@ -251,9 +252,13 @@ public: ComplexPairTy LHS; ComplexPairTy RHS; QualType Ty; // Computation Type. + FPOptions FPFeatures; }; - BinOpInfo EmitBinOps(const BinaryOperator *E); + BinOpInfo EmitBinOps(const BinaryOperator *E, + QualType PromotionTy = QualType()); + ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy); + ComplexPairTy EmitPromotedComplexOperand(const Expr *E, QualType PromotionTy); LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, ComplexPairTy (ComplexExprEmitter::*Func) (const BinOpInfo &), @@ -270,19 +275,33 @@ public: ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - ComplexPairTy VisitBinAdd(const BinaryOperator *E) { - return EmitBinAdd(EmitBinOps(E)); - } - ComplexPairTy VisitBinSub(const BinaryOperator *E) { - return EmitBinSub(EmitBinOps(E)); - } - ComplexPairTy VisitBinMul(const BinaryOperator *E) { - return EmitBinMul(EmitBinOps(E)); + QualType getPromotionType(QualType Ty) { + if (auto *CT = Ty->getAs<ComplexType>()) { + QualType ElementType = CT->getElementType(); + if (ElementType.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + if (Ty.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().FloatTy; + return QualType(); } - ComplexPairTy VisitBinDiv(const BinaryOperator *E) { - return EmitBinDiv(EmitBinOps(E)); + +#define HANDLEBINOP(OP) \ + ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \ + QualType promotionTy = getPromotionType(E->getType()); \ + ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \ + if (!promotionTy.isNull()) \ + result = \ + CGF.EmitUnPromotedValue(result, E->getType()); \ + return result; \ } + HANDLEBINOP(Mul) + HANDLEBINOP(Div) + HANDLEBINOP(Add) + HANDLEBINOP(Sub) +#undef HANDLEBINOP + ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { return Visit(E->getSemanticForm()); } @@ -556,10 +575,45 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, llvm_unreachable("unknown cast resulting in complex value"); } -ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { +ComplexPairTy ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *E, + QualType PromotionType) { + QualType promotionTy = PromotionType.isNull() + ? getPromotionType(E->getSubExpr()->getType()) + : PromotionType; + ComplexPairTy result = VisitPlus(E, promotionTy); + if (!promotionTy.isNull()) + return CGF.EmitUnPromotedValue(result, E->getSubExpr()->getType()); + return result; +} + +ComplexPairTy ComplexExprEmitter::VisitPlus(const UnaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreReal(); TestAndClearIgnoreImag(); - ComplexPairTy Op = Visit(E->getSubExpr()); + if (!PromotionType.isNull()) + return CGF.EmitPromotedComplexExpr(E->getSubExpr(), PromotionType); + return Visit(E->getSubExpr()); +} + +ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E, + QualType PromotionType) { + QualType promotionTy = PromotionType.isNull() + ? getPromotionType(E->getSubExpr()->getType()) + : PromotionType; + ComplexPairTy result = VisitMinus(E, promotionTy); + if (!promotionTy.isNull()) + return CGF.EmitUnPromotedValue(result, E->getSubExpr()->getType()); + return result; +} +ComplexPairTy ComplexExprEmitter::VisitMinus(const UnaryOperator *E, + QualType PromotionType) { + TestAndClearIgnoreReal(); + TestAndClearIgnoreImag(); + ComplexPairTy Op; + if (!PromotionType.isNull()) + Op = CGF.EmitPromotedComplexExpr(E->getSubExpr(), PromotionType); + else + Op = Visit(E->getSubExpr()); llvm::Value *ResR, *ResI; if (Op.first->getType()->isFloatingPointTy()) { @@ -590,6 +644,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinAdd(const BinOpInfo &Op) { llvm::Value *ResR, *ResI; if (Op.LHS.first->getType()->isFloatingPointTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures); ResR = Builder.CreateFAdd(Op.LHS.first, Op.RHS.first, "add.r"); if (Op.LHS.second && Op.RHS.second) ResI = Builder.CreateFAdd(Op.LHS.second, Op.RHS.second, "add.i"); @@ -608,6 +663,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinAdd(const BinOpInfo &Op) { ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) { llvm::Value *ResR, *ResI; if (Op.LHS.first->getType()->isFloatingPointTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures); ResR = Builder.CreateFSub(Op.LHS.first, Op.RHS.first, "sub.r"); if (Op.LHS.second && Op.RHS.second) ResI = Builder.CreateFSub(Op.LHS.second, Op.RHS.second, "sub.i"); @@ -700,6 +756,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { // FIXME: C11 also provides for imaginary types which would allow folding // still more of this within the type system. + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures); if (Op.LHS.second && Op.RHS.second) { // If both operands are complex, emit the core math directly, and then // test for NaNs. If we find NaNs in the result, we delegate to a libcall @@ -801,6 +858,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { // // FIXME: We would be able to avoid the libcall in many places if we // supported imaginary types in addition to complex types. + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures); if (RHSi && !CGF.getLangOpts().FastMath) { BinOpInfo LibCallOp = Op; // If LHS was a real, supply a null imaginary part. @@ -876,21 +934,103 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { return ComplexPairTy(DSTr, DSTi); } +ComplexPairTy CodeGenFunction::EmitUnPromotedValue(ComplexPairTy result, + QualType UnPromotionType) { + llvm::Type *ComplexElementTy = + ConvertType(UnPromotionType->castAs<ComplexType>()->getElementType()); + if (result.first) + result.first = + Builder.CreateFPTrunc(result.first, ComplexElementTy, "unpromotion"); + if (result.second) + result.second = + Builder.CreateFPTrunc(result.second, ComplexElementTy, "unpromotion"); + return result; +} + +ComplexPairTy CodeGenFunction::EmitPromotedValue(ComplexPairTy result, + QualType PromotionType) { + llvm::Type *ComplexElementTy = + ConvertType(PromotionType->castAs<ComplexType>()->getElementType()); + if (result.first) + result.first = Builder.CreateFPExt(result.first, ComplexElementTy, "ext"); + if (result.second) + result.second = Builder.CreateFPExt(result.second, ComplexElementTy, "ext"); + + return result; +} + +ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E, + QualType PromotionType) { + E = E->IgnoreParens(); + if (auto BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return EmitBin##OP(EmitBinOps(BO, PromotionType)); + HANDLE_BINOP(Add) + HANDLE_BINOP(Sub) + HANDLE_BINOP(Mul) + HANDLE_BINOP(Div) +#undef HANDLE_BINOP + default: + break; + } + } else if (auto UO = dyn_cast<UnaryOperator>(E)) { + switch (UO->getOpcode()) { + case UO_Minus: + return VisitMinus(UO, PromotionType); + case UO_Plus: + return VisitPlus(UO, PromotionType); + default: + break; + } + } + auto result = Visit(const_cast<Expr *>(E)); + if (!PromotionType.isNull()) + return CGF.EmitPromotedValue(result, PromotionType); + else + return result; +} + +ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E, + QualType DstTy) { + return ComplexExprEmitter(*this).EmitPromoted(E, DstTy); +} + +ComplexPairTy +ComplexExprEmitter::EmitPromotedComplexOperand(const Expr *E, + QualType OverallPromotionType) { + if (E->getType()->isAnyComplexType()) { + if (!OverallPromotionType.isNull()) + return CGF.EmitPromotedComplexExpr(E, OverallPromotionType); + else + return Visit(const_cast<Expr *>(E)); + } else { + if (!OverallPromotionType.isNull()) { + QualType ComplexElementTy = + OverallPromotionType->castAs<ComplexType>()->getElementType(); + return ComplexPairTy(CGF.EmitPromotedScalarExpr(E, ComplexElementTy), + nullptr); + } else { + return ComplexPairTy(CGF.EmitScalarExpr(E), nullptr); + } + } +} + ComplexExprEmitter::BinOpInfo -ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) { +ComplexExprEmitter::EmitBinOps(const BinaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreReal(); TestAndClearIgnoreImag(); BinOpInfo Ops; - if (E->getLHS()->getType()->isRealFloatingType()) - Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr); - else - Ops.LHS = Visit(E->getLHS()); - if (E->getRHS()->getType()->isRealFloatingType()) - Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); - else - Ops.RHS = Visit(E->getRHS()); - Ops.Ty = E->getType(); + Ops.LHS = EmitPromotedComplexOperand(E->getLHS(), PromotionType); + Ops.RHS = EmitPromotedComplexOperand(E->getRHS(), PromotionType); + if (!PromotionType.isNull()) + Ops.Ty = PromotionType; + else + Ops.Ty = E->getType(); + Ops.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); return Ops; } @@ -905,41 +1045,74 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E, if (const AtomicType *AT = LHSTy->getAs<AtomicType>()) LHSTy = AT->getValueType(); - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); BinOpInfo OpInfo; + OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures); // Load the RHS and LHS operands. // __block variables need to have the rhs evaluated first, plus this should // improve codegen a little. - OpInfo.Ty = E->getComputationResultType(); - QualType ComplexElementTy = cast<ComplexType>(OpInfo.Ty)->getElementType(); + QualType PromotionTypeCR; + PromotionTypeCR = getPromotionType(E->getComputationResultType()); + if (PromotionTypeCR.isNull()) + PromotionTypeCR = E->getComputationResultType(); + OpInfo.Ty = PromotionTypeCR; + QualType ComplexElementTy = + OpInfo.Ty->castAs<ComplexType>()->getElementType(); + QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType()); // The RHS should have been converted to the computation type. if (E->getRHS()->getType()->isRealFloatingType()) { - assert( - CGF.getContext() - .hasSameUnqualifiedType(ComplexElementTy, E->getRHS()->getType())); - OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); + if (!PromotionTypeRHS.isNull()) + OpInfo.RHS = ComplexPairTy( + CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS), nullptr); + else { + assert(CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, + E->getRHS()->getType())); + + OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); + } } else { - assert(CGF.getContext() - .hasSameUnqualifiedType(OpInfo.Ty, E->getRHS()->getType())); - OpInfo.RHS = Visit(E->getRHS()); + if (!PromotionTypeRHS.isNull()) { + OpInfo.RHS = ComplexPairTy( + CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionTypeRHS)); + } else { + assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty, + E->getRHS()->getType())); + OpInfo.RHS = Visit(E->getRHS()); + } } LValue LHS = CGF.EmitLValue(E->getLHS()); // Load from the l-value and convert it. SourceLocation Loc = E->getExprLoc(); + QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType()); if (LHSTy->isAnyComplexType()) { ComplexPairTy LHSVal = EmitLoadOfLValue(LHS, Loc); - OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc); + if (!PromotionTypeLHS.isNull()) + OpInfo.LHS = + EmitComplexToComplexCast(LHSVal, LHSTy, PromotionTypeLHS, Loc); + else + OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc); } else { llvm::Value *LHSVal = CGF.EmitLoadOfScalar(LHS, Loc); // For floating point real operands we can directly pass the scalar form // to the binary operator emission and potentially get more efficient code. if (LHSTy->isRealFloatingType()) { - if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy)) - LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc); + QualType PromotedComplexElementTy; + if (!PromotionTypeLHS.isNull()) { + PromotedComplexElementTy = + cast<ComplexType>(PromotionTypeLHS)->getElementType(); + if (!CGF.getContext().hasSameUnqualifiedType(PromotedComplexElementTy, + PromotionTypeLHS)) + LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, + PromotedComplexElementTy, Loc); + } else { + if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy)) + LHSVal = + CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc); + } OpInfo.LHS = ComplexPairTy(LHSVal, nullptr); } else { OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index b83a87443250..c38feaaca35a 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -46,7 +47,7 @@ struct ConstantAggregateBuilderUtils { CharUnits getAlignment(const llvm::Constant *C) const { return CharUnits::fromQuantity( - CGM.getDataLayout().getABITypeAlignment(C->getType())); + CGM.getDataLayout().getABITypeAlign(C->getType())); } CharUnits getSize(llvm::Type *Ty) const { @@ -94,7 +95,7 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils { bool NaturalLayout = true; bool split(size_t Index, CharUnits Hint); - Optional<size_t> splitAt(CharUnits Pos); + std::optional<size_t> splitAt(CharUnits Pos); static llvm::Constant *buildFrom(CodeGenModule &CGM, ArrayRef<llvm::Constant *> Elems, @@ -158,12 +159,12 @@ bool ConstantAggregateBuilder::add(llvm::Constant *C, CharUnits Offset, } // Uncommon case: constant overlaps what we've already created. - llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset); + std::optional<size_t> FirstElemToReplace = splitAt(Offset); if (!FirstElemToReplace) return false; CharUnits CSize = getSize(C); - llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + CSize); + std::optional<size_t> LastElemToReplace = splitAt(Offset + CSize); if (!LastElemToReplace) return false; @@ -222,10 +223,10 @@ bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits, // Partial byte: update the existing integer if there is one. If we // can't split out a 1-CharUnit range to update, then we can't add // these bits and fail the entire constant emission. - llvm::Optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars); + std::optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars); if (!FirstElemToUpdate) return false; - llvm::Optional<size_t> LastElemToUpdate = + std::optional<size_t> LastElemToUpdate = splitAt(OffsetInChars + CharUnits::One()); if (!LastElemToUpdate) return false; @@ -283,8 +284,8 @@ bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits, /// Returns a position within Elems and Offsets such that all elements /// before the returned index end before Pos and all elements at or after /// the returned index begin at or after Pos. Splits elements as necessary -/// to ensure this. Returns None if we find something we can't split. -Optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) { +/// to ensure this. Returns std::nullopt if we find something we can't split. +std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) { if (Pos >= Size) return Offsets.size(); @@ -305,7 +306,7 @@ Optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) { // Try to decompose it into smaller constants. if (!split(LastAtOrBeforePosIndex, Pos)) - return None; + return std::nullopt; } } @@ -517,12 +518,12 @@ void ConstantAggregateBuilder::condense(CharUnits Offset, llvm::Type *DesiredTy) { CharUnits Size = getSize(DesiredTy); - llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset); + std::optional<size_t> FirstElemToReplace = splitAt(Offset); if (!FirstElemToReplace) return; size_t First = *FirstElemToReplace; - llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + Size); + std::optional<size_t> LastElemToReplace = splitAt(Offset + Size); if (!LastElemToReplace) return; size_t Last = *LastElemToReplace; @@ -543,8 +544,8 @@ void ConstantAggregateBuilder::condense(CharUnits Offset, } llvm::Constant *Replacement = buildFrom( - CGM, makeArrayRef(Elems).slice(First, Length), - makeArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy), + CGM, ArrayRef(Elems).slice(First, Length), + ArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy), /*known to have natural layout=*/false, DesiredTy, false); replace(Elems, First, Last, {Replacement}); replace(Offsets, First, Last, {Offset}); @@ -913,17 +914,16 @@ bool ConstStructBuilder::UpdateStruct(ConstantEmitter &Emitter, // ConstExprEmitter //===----------------------------------------------------------------------===// -static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, - CodeGenFunction *CGF, - const CompoundLiteralExpr *E) { +static ConstantAddress +tryEmitGlobalCompoundLiteral(ConstantEmitter &emitter, + const CompoundLiteralExpr *E) { + CodeGenModule &CGM = emitter.CGM; CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType()); if (llvm::GlobalVariable *Addr = CGM.getAddrOfConstantCompoundLiteralIfEmitted(E)) return ConstantAddress(Addr, Addr->getValueType(), Align); LangAS addressSpace = E->getType().getAddressSpace(); - - ConstantEmitter emitter(CGM, CGF); llvm::Constant *C = emitter.tryEmitForInitializer(E->getInitializer(), addressSpace, E->getType()); if (!C) { @@ -972,7 +972,7 @@ EmitArrayConstant(CodeGenModule &CGM, llvm::ArrayType *DesiredType, if (CommonElementType && NonzeroLength >= 8) { llvm::Constant *Initial = llvm::ConstantArray::get( llvm::ArrayType::get(CommonElementType, NonzeroLength), - makeArrayRef(Elements).take_front(NonzeroLength)); + ArrayRef(Elements).take_front(NonzeroLength)); Elements.resize(2); Elements[0] = Initial; } else { @@ -1395,15 +1395,12 @@ ConstantEmitter::tryEmitAbstract(const APValue &value, QualType destType) { llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) { if (!CE->hasAPValueResult()) return nullptr; - const Expr *Inner = CE->getSubExpr()->IgnoreImplicit(); - QualType RetType; - if (auto *Call = dyn_cast<CallExpr>(Inner)) - RetType = Call->getCallReturnType(CGM.getContext()); - else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Inner)) - RetType = Ctor->getType(); - llvm::Constant *Res = - emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType); - return Res; + + QualType RetType = CE->getType(); + if (CE->isGLValue()) + RetType = CGM.getContext().getLValueReferenceType(RetType); + + return emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType); } llvm::Constant * @@ -1970,7 +1967,9 @@ ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) { ConstantLValue ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { - return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E); + ConstantEmitter CompoundLiteralEmitter(CGM, Emitter.CGF); + CompoundLiteralEmitter.setInConstantContext(Emitter.isInConstantContext()); + return tryEmitGlobalCompoundLiteral(CompoundLiteralEmitter, E); } ConstantLValue @@ -2214,7 +2213,8 @@ void CodeGenModule::setAddrOfConstantCompoundLiteral( ConstantAddress CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) { assert(E->isFileScope() && "not a file-scope compound literal expr"); - return tryEmitGlobalCompoundLiteral(*this, nullptr, E); + ConstantEmitter emitter(*this); + return tryEmitGlobalCompoundLiteral(emitter, E); } llvm::Constant * diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index b150aaa376eb..a0dcb978b1ac 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -28,7 +28,6 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/APFixedPoint.h" -#include "llvm/ADT/Optional.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -43,6 +42,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/TypeSize.h" #include <cstdarg> +#include <optional> using namespace clang; using namespace CodeGen; @@ -152,16 +152,16 @@ static bool MustVisitNullValue(const Expr *E) { } /// If \p E is a widened promoted integer, get its base (unpromoted) type. -static llvm::Optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx, - const Expr *E) { +static std::optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx, + const Expr *E) { const Expr *Base = E->IgnoreImpCasts(); if (E == Base) - return llvm::None; + return std::nullopt; QualType BaseTy = Base->getType(); - if (!BaseTy->isPromotableIntegerType() || + if (!Ctx.isPromotableIntegerType(BaseTy) || Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType())) - return llvm::None; + return std::nullopt; return BaseTy; } @@ -255,7 +255,7 @@ public: if (VD->getType()->isReferenceType()) { if (const auto *TTy = - dyn_cast<TypedefType>(VD->getType().getNonReferenceType())) + VD->getType().getNonReferenceType()->getAs<TypedefType>()) AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>(); } else { // Assumptions for function parameters are emitted at the start of the @@ -271,8 +271,7 @@ public: } if (!AVAttr) - if (const auto *TTy = - dyn_cast<TypedefType>(E->getType())) + if (const auto *TTy = E->getType()->getAs<TypedefType>()) AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>(); if (!AVAttr) @@ -468,6 +467,9 @@ public: return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); } Value *VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E) { + if (E->getType()->isVoidType()) + return nullptr; + return EmitNullValue(E->getType()); } Value *VisitGNUNullExpr(const GNUNullExpr *E) { @@ -620,16 +622,22 @@ public: return Visit(E->getSubExpr()); // the actual value should be unused return EmitLoadOfLValue(E); } - Value *VisitUnaryPlus(const UnaryOperator *E) { - // This differs from gcc, though, most likely due to a bug in gcc. - TestAndClearIgnoreResultAssign(); - return Visit(E->getSubExpr()); - } - Value *VisitUnaryMinus (const UnaryOperator *E); + + Value *VisitUnaryPlus(const UnaryOperator *E, + QualType PromotionType = QualType()); + Value *VisitPlus(const UnaryOperator *E, QualType PromotionType); + Value *VisitUnaryMinus(const UnaryOperator *E, + QualType PromotionType = QualType()); + Value *VisitMinus(const UnaryOperator *E, QualType PromotionType); + Value *VisitUnaryNot (const UnaryOperator *E); Value *VisitUnaryLNot (const UnaryOperator *E); - Value *VisitUnaryReal (const UnaryOperator *E); - Value *VisitUnaryImag (const UnaryOperator *E); + Value *VisitUnaryReal(const UnaryOperator *E, + QualType PromotionType = QualType()); + Value *VisitReal(const UnaryOperator *E, QualType PromotionType); + Value *VisitUnaryImag(const UnaryOperator *E, + QualType PromotionType = QualType()); + Value *VisitImag(const UnaryOperator *E, QualType PromotionType); Value *VisitUnaryExtension(const UnaryOperator *E) { return Visit(E->getSubExpr()); } @@ -719,7 +727,7 @@ public: case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), Ops)) return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); @@ -791,7 +799,13 @@ public: // Helper functions for fixed point binary operations. Value *EmitFixedPointBinOp(const BinOpInfo &Ops); - BinOpInfo EmitBinOps(const BinaryOperator *E); + BinOpInfo EmitBinOps(const BinaryOperator *E, + QualType PromotionTy = QualType()); + + Value *EmitPromotedValue(Value *result, QualType PromotionType); + Value *EmitUnPromotedValue(Value *result, QualType ExprType); + Value *EmitPromoted(const Expr *E, QualType PromotionType); + LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*F)(const BinOpInfo &), Value *&Result); @@ -799,13 +813,28 @@ public: Value *EmitCompoundAssign(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); + QualType getPromotionType(QualType Ty) { + if (auto *CT = Ty->getAs<ComplexType>()) { + QualType ElementType = CT->getElementType(); + if (ElementType.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + if (Ty.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().FloatTy; + return QualType(); + } + // Binary operators and binary compound assignment operators. -#define HANDLEBINOP(OP) \ - Value *VisitBin ## OP(const BinaryOperator *E) { \ - return Emit ## OP(EmitBinOps(E)); \ - } \ - Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \ - return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \ +#define HANDLEBINOP(OP) \ + Value *VisitBin##OP(const BinaryOperator *E) { \ + QualType promotionTy = getPromotionType(E->getType()); \ + auto result = Emit##OP(EmitBinOps(E, promotionTy)); \ + if (result && !promotionTy.isNull()) \ + result = EmitUnPromotedValue(result, E->getType()); \ + return result; \ + } \ + Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) { \ + return EmitCompoundAssign(E, &ScalarExprEmitter::Emit##OP); \ } HANDLEBINOP(Mul) HANDLEBINOP(Div) @@ -1599,21 +1628,14 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) { Value * ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) { ASTContext &Context = CGF.getContext(); - llvm::Optional<LangAS> GlobalAS = - Context.getTargetInfo().getConstantAddressSpace(); + unsigned AddrSpace = + Context.getTargetAddressSpace(CGF.CGM.GetGlobalConstantAddressSpace()); llvm::Constant *GlobalConstStr = Builder.CreateGlobalStringPtr( - E->ComputeName(Context), "__usn_str", - static_cast<unsigned>(GlobalAS.value_or(LangAS::Default))); + E->ComputeName(Context), "__usn_str", AddrSpace); - unsigned ExprAS = Context.getTargetAddressSpace(E->getType()); - - if (GlobalConstStr->getType()->getPointerAddressSpace() == ExprAS) - return GlobalConstStr; - - llvm::PointerType *PtrTy = cast<llvm::PointerType>(GlobalConstStr->getType()); - llvm::PointerType *NewPtrTy = - llvm::PointerType::getWithSamePointeeType(PtrTy, ExprAS); - return Builder.CreateAddrSpaceCast(GlobalConstStr, NewPtrTy, "usn_addr_cast"); + llvm::Type *ExprTy = ConvertType(E->getType()); + return Builder.CreatePointerBitCastOrAddrSpaceCast(GlobalConstStr, ExprTy, + "usn_addr_cast"); } Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { @@ -1643,7 +1665,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { // newv = insert newv, x, i auto *RTy = llvm::FixedVectorType::get(LTy->getElementType(), MTy->getNumElements()); - Value* NewV = llvm::UndefValue::get(RTy); + Value* NewV = llvm::PoisonValue::get(RTy); for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) { Value *IIndx = llvm::ConstantInt::get(CGF.SizeTy, i); Value *Indx = Builder.CreateExtractElement(Mask, IIndx, "shuf_idx"); @@ -1999,6 +2021,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { Expr *E = CE->getSubExpr(); QualType DestTy = CE->getType(); CastKind Kind = CE->getCastKind(); + CodeGenFunction::CGFPOptionsRAII FPOptions(CGF, CE); // These cases are generally not written to ignore the result of // evaluating their sub-expressions, so we clear this now. @@ -2479,7 +2502,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWAdd(InVal, Amount, Name); - LLVM_FALLTHROUGH; + [[fallthrough]]; case LangOptions::SOB_Trapping: if (!E->canOverflow()) return Builder.CreateNSWAdd(InVal, Amount, Name); @@ -2584,7 +2607,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (type->isIntegerType()) { QualType promotedType; bool canPerformLossyDemotionCheck = false; - if (type->isPromotableIntegerType()) { + if (CGF.getContext().isPromotableIntegerType(type)) { promotedType = CGF.getContext().getPromotedIntegerType(type); assert(promotedType != type && "Shouldn't promote to the same type."); canPerformLossyDemotionCheck = true; @@ -2818,10 +2841,45 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } +Value *ScalarExprEmitter::VisitUnaryPlus(const UnaryOperator *E, + QualType PromotionType) { + QualType promotionTy = PromotionType.isNull() + ? getPromotionType(E->getSubExpr()->getType()) + : PromotionType; + Value *result = VisitPlus(E, promotionTy); + if (result && !promotionTy.isNull()) + result = EmitUnPromotedValue(result, E->getType()); + return result; +} -Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { +Value *ScalarExprEmitter::VisitPlus(const UnaryOperator *E, + QualType PromotionType) { + // This differs from gcc, though, most likely due to a bug in gcc. TestAndClearIgnoreResultAssign(); - Value *Op = Visit(E->getSubExpr()); + if (!PromotionType.isNull()) + return CGF.EmitPromotedScalarExpr(E->getSubExpr(), PromotionType); + return Visit(E->getSubExpr()); +} + +Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E, + QualType PromotionType) { + QualType promotionTy = PromotionType.isNull() + ? getPromotionType(E->getSubExpr()->getType()) + : PromotionType; + Value *result = VisitMinus(E, promotionTy); + if (result && !promotionTy.isNull()) + result = EmitUnPromotedValue(result, E->getType()); + return result; +} + +Value *ScalarExprEmitter::VisitMinus(const UnaryOperator *E, + QualType PromotionType) { + TestAndClearIgnoreResultAssign(); + Value *Op; + if (!PromotionType.isNull()) + Op = CGF.EmitPromotedScalarExpr(E->getSubExpr(), PromotionType); + else + Op = Visit(E->getSubExpr()); // Generate a unary FNeg for FP ops. if (Op->getType()->isFPOrFPVectorTy()) @@ -2841,7 +2899,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { Value *ScalarExprEmitter::VisitUnaryNot(const UnaryOperator *E) { TestAndClearIgnoreResultAssign(); Value *Op = Visit(E->getSubExpr()); - return Builder.CreateNot(Op, "neg"); + return Builder.CreateNot(Op, "not"); } Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) { @@ -3006,33 +3064,75 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( return Builder.getInt(E->EvaluateKnownConstInt(CGF.getContext())); } -Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E) { +Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E, + QualType PromotionType) { + QualType promotionTy = PromotionType.isNull() + ? getPromotionType(E->getSubExpr()->getType()) + : PromotionType; + Value *result = VisitReal(E, promotionTy); + if (result && !promotionTy.isNull()) + result = EmitUnPromotedValue(result, E->getType()); + return result; +} + +Value *ScalarExprEmitter::VisitReal(const UnaryOperator *E, + QualType PromotionType) { Expr *Op = E->getSubExpr(); if (Op->getType()->isAnyComplexType()) { // If it's an l-value, load through the appropriate subobject l-value. // Note that we have to ask E because Op might be an l-value that // this won't work for, e.g. an Obj-C property. - if (E->isGLValue()) - return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), - E->getExprLoc()).getScalarVal(); - + if (E->isGLValue()) { + if (!PromotionType.isNull()) { + CodeGenFunction::ComplexPairTy result = CGF.EmitComplexExpr( + Op, /*IgnoreReal*/ IgnoreResultAssign, /*IgnoreImag*/ true); + if (result.first) + result.first = CGF.EmitPromotedValue(result, PromotionType).first; + return result.first; + } else { + return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc()) + .getScalarVal(); + } + } // Otherwise, calculate and project. return CGF.EmitComplexExpr(Op, false, true).first; } + if (!PromotionType.isNull()) + return CGF.EmitPromotedScalarExpr(Op, PromotionType); return Visit(Op); } -Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) { +Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E, + QualType PromotionType) { + QualType promotionTy = PromotionType.isNull() + ? getPromotionType(E->getSubExpr()->getType()) + : PromotionType; + Value *result = VisitImag(E, promotionTy); + if (result && !promotionTy.isNull()) + result = EmitUnPromotedValue(result, E->getType()); + return result; +} + +Value *ScalarExprEmitter::VisitImag(const UnaryOperator *E, + QualType PromotionType) { Expr *Op = E->getSubExpr(); if (Op->getType()->isAnyComplexType()) { // If it's an l-value, load through the appropriate subobject l-value. // Note that we have to ask E because Op might be an l-value that // this won't work for, e.g. an Obj-C property. - if (Op->isGLValue()) - return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), - E->getExprLoc()).getScalarVal(); - + if (Op->isGLValue()) { + if (!PromotionType.isNull()) { + CodeGenFunction::ComplexPairTy result = CGF.EmitComplexExpr( + Op, /*IgnoreReal*/ true, /*IgnoreImag*/ IgnoreResultAssign); + if (result.second) + result.second = CGF.EmitPromotedValue(result, PromotionType).second; + return result.second; + } else { + return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc()) + .getScalarVal(); + } + } // Otherwise, calculate and project. return CGF.EmitComplexExpr(Op, true, false).second; } @@ -3041,8 +3141,12 @@ Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) { // effects are evaluated, but not the actual value. if (Op->isGLValue()) CGF.EmitLValue(Op); + else if (!PromotionType.isNull()) + CGF.EmitPromotedScalarExpr(Op, PromotionType); else CGF.EmitScalarExpr(Op, true); + if (!PromotionType.isNull()) + return llvm::Constant::getNullValue(ConvertType(PromotionType)); return llvm::Constant::getNullValue(ConvertType(E->getType())); } @@ -3050,12 +3154,65 @@ Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) { // Binary Operators //===----------------------------------------------------------------------===// -BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { +Value *ScalarExprEmitter::EmitPromotedValue(Value *result, + QualType PromotionType) { + return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext"); +} + +Value *ScalarExprEmitter::EmitUnPromotedValue(Value *result, + QualType ExprType) { + return CGF.Builder.CreateFPTrunc(result, ConvertType(ExprType), "unpromotion"); +} + +Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) { + E = E->IgnoreParens(); + if (auto BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return Emit##OP(EmitBinOps(BO, PromotionType)); + HANDLE_BINOP(Add) + HANDLE_BINOP(Sub) + HANDLE_BINOP(Mul) + HANDLE_BINOP(Div) +#undef HANDLE_BINOP + default: + break; + } + } else if (auto UO = dyn_cast<UnaryOperator>(E)) { + switch (UO->getOpcode()) { + case UO_Imag: + return VisitImag(UO, PromotionType); + case UO_Real: + return VisitReal(UO, PromotionType); + case UO_Minus: + return VisitMinus(UO, PromotionType); + case UO_Plus: + return VisitPlus(UO, PromotionType); + default: + break; + } + } + auto result = Visit(const_cast<Expr *>(E)); + if (result) { + if (!PromotionType.isNull()) + return EmitPromotedValue(result, PromotionType); + else + return EmitUnPromotedValue(result, E->getType()); + } + return result; +} + +BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreResultAssign(); BinOpInfo Result; - Result.LHS = Visit(E->getLHS()); - Result.RHS = Visit(E->getRHS()); - Result.Ty = E->getType(); + Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType); + Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType); + if (!PromotionType.isNull()) + Result.Ty = PromotionType; + else + Result.Ty = E->getType(); Result.Opcode = E->getOpcode(); Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); Result.E = E; @@ -3074,8 +3231,18 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Emit the RHS first. __block variables need to have the rhs evaluated // first, plus this should improve codegen a little. - OpInfo.RHS = Visit(E->getRHS()); - OpInfo.Ty = E->getComputationResultType(); + + QualType PromotionTypeCR; + PromotionTypeCR = getPromotionType(E->getComputationResultType()); + if (PromotionTypeCR.isNull()) + PromotionTypeCR = E->getComputationResultType(); + QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType()); + QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType()); + if (!PromotionTypeRHS.isNull()) + OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS); + else + OpInfo.RHS = Visit(E->getRHS()); + OpInfo.Ty = PromotionTypeCR; OpInfo.Opcode = E->getOpcode(); OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); OpInfo.E = E; @@ -3154,16 +3321,20 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures); SourceLocation Loc = E->getExprLoc(); - OpInfo.LHS = - EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc); + if (!PromotionTypeLHS.isNull()) + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionTypeLHS, + E->getExprLoc()); + else + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, + E->getComputationLHSType(), Loc); // Expand the binary operator. Result = (this->*Func)(OpInfo); // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, - Loc, ScalarConversionOpts(CGF.SanOpts)); + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3651,7 +3822,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), op)) return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); @@ -3801,7 +3972,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), op)) return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); @@ -4759,8 +4930,7 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) { static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF, Value *Src, unsigned NumElementsDst) { static constexpr int Mask[] = {0, 1, 2, -1}; - return Builder.CreateShuffleVector(Src, - llvm::makeArrayRef(Mask, NumElementsDst)); + return Builder.CreateShuffleVector(Src, llvm::ArrayRef(Mask, NumElementsDst)); } // Create cast instructions for converting LLVM value \p Src to LLVM type \p @@ -4897,6 +5067,16 @@ Value *CodeGenFunction::EmitComplexToScalarConversion(ComplexPairTy Src, } +Value * +CodeGenFunction::EmitPromotedScalarExpr(const Expr *E, + QualType PromotionType) { + if (!PromotionType.isNull()) + return ScalarExprEmitter(*this).EmitPromoted(E, PromotionType); + else + return ScalarExprEmitter(*this).Visit(const_cast<Expr *>(E)); +} + + llvm::Value *CodeGenFunction:: EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre) { diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index fdd2fa18bb4a..c39e0cc75f2d 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -162,7 +162,7 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF, // amdgpu llvm::Constant *Size = llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()), - static_cast<uint32_t>(r.second.getFixedSize())); + static_cast<uint32_t>(r.second.getFixedValue())); Vec.push_back(Size); } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 7dfcc65969a8..5882f491d597 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -13,16 +13,22 @@ //===----------------------------------------------------------------------===// #include "CGHLSLRuntime.h" +#include "CGDebugInfo.h" #include "CodeGenModule.h" +#include "clang/AST/Decl.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/Support/FormatVariadic.h" using namespace clang; using namespace CodeGen; +using namespace clang::hlsl; using namespace llvm; namespace { + void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { // The validation of ValVersionStr is done at HLSLToolChain::TranslateArgs. // Assume ValVersionStr is legal here. @@ -39,14 +45,415 @@ void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { IRBuilder<> B(M.getContext()); MDNode *Val = MDNode::get(Ctx, {ConstantAsMetadata::get(B.getInt32(Major)), ConstantAsMetadata::get(B.getInt32(Minor))}); - StringRef DxilValKey = "dx.valver"; - M.addModuleFlag(llvm::Module::ModFlagBehavior::AppendUnique, DxilValKey, Val); + StringRef DXILValKey = "dx.valver"; + auto *DXILValMD = M.getOrInsertNamedMetadata(DXILValKey); + DXILValMD->addOperand(Val); +} +void addDisableOptimizations(llvm::Module &M) { + StringRef Key = "dx.disable_optimizations"; + M.addModuleFlag(llvm::Module::ModFlagBehavior::Override, Key, 1); +} +// cbuffer will be translated into global variable in special address space. +// If translate into C, +// cbuffer A { +// float a; +// float b; +// } +// float foo() { return a + b; } +// +// will be translated into +// +// struct A { +// float a; +// float b; +// } cbuffer_A __attribute__((address_space(4))); +// float foo() { return cbuffer_A.a + cbuffer_A.b; } +// +// layoutBuffer will create the struct A type. +// replaceBuffer will replace use of global variable a and b with cbuffer_A.a +// and cbuffer_A.b. +// +void layoutBuffer(CGHLSLRuntime::Buffer &Buf, const DataLayout &DL) { + if (Buf.Constants.empty()) + return; + + std::vector<llvm::Type *> EltTys; + for (auto &Const : Buf.Constants) { + GlobalVariable *GV = Const.first; + Const.second = EltTys.size(); + llvm::Type *Ty = GV->getValueType(); + EltTys.emplace_back(Ty); + } + Buf.LayoutStruct = llvm::StructType::get(EltTys[0]->getContext(), EltTys); +} + +GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) { + // Create global variable for CB. + GlobalVariable *CBGV = new GlobalVariable( + Buf.LayoutStruct, /*isConstant*/ true, + GlobalValue::LinkageTypes::ExternalLinkage, nullptr, + llvm::formatv("{0}{1}", Buf.Name, Buf.IsCBuffer ? ".cb." : ".tb."), + GlobalValue::NotThreadLocal); + + IRBuilder<> B(CBGV->getContext()); + Value *ZeroIdx = B.getInt32(0); + // Replace Const use with CB use. + for (auto &[GV, Offset] : Buf.Constants) { + Value *GEP = + B.CreateGEP(Buf.LayoutStruct, CBGV, {ZeroIdx, B.getInt32(Offset)}); + + assert(Buf.LayoutStruct->getElementType(Offset) == GV->getValueType() && + "constant type mismatch"); + + // Replace. + GV->replaceAllUsesWith(GEP); + // Erase GV. + GV->removeDeadConstantUsers(); + GV->eraseFromParent(); + } + return CBGV; } + } // namespace +void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) { + if (D->getStorageClass() == SC_Static) { + // For static inside cbuffer, take as global static. + // Don't add to cbuffer. + CGM.EmitGlobal(D); + return; + } + + auto *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(D)); + // Add debug info for constVal. + if (CGDebugInfo *DI = CGM.getModuleDebugInfo()) + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::DebugInfoKind::LimitedDebugInfo) + DI->EmitGlobalVariable(cast<GlobalVariable>(GV), D); + + // FIXME: support packoffset. + // See https://github.com/llvm/llvm-project/issues/57914. + uint32_t Offset = 0; + bool HasUserOffset = false; + + unsigned LowerBound = HasUserOffset ? Offset : UINT_MAX; + CB.Constants.emplace_back(std::make_pair(GV, LowerBound)); +} + +void CGHLSLRuntime::addBufferDecls(const DeclContext *DC, Buffer &CB) { + for (Decl *it : DC->decls()) { + if (auto *ConstDecl = dyn_cast<VarDecl>(it)) { + addConstant(ConstDecl, CB); + } else if (isa<CXXRecordDecl, EmptyDecl>(it)) { + // Nothing to do for this declaration. + } else if (isa<FunctionDecl>(it)) { + // A function within an cbuffer is effectively a top-level function, + // as it only refers to globally scoped declarations. + CGM.EmitTopLevelDecl(it); + } + } +} + +void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *D) { + Buffers.emplace_back(Buffer(D)); + addBufferDecls(D, Buffers.back()); +} + void CGHLSLRuntime::finishCodeGen() { auto &TargetOpts = CGM.getTarget().getTargetOpts(); + llvm::Module &M = CGM.getModule(); + Triple T(M.getTargetTriple()); + if (T.getArch() == Triple::ArchType::dxil) + addDxilValVersion(TargetOpts.DxilValidatorVersion, M); + + generateGlobalCtorDtorCalls(); + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + addDisableOptimizations(M); + + const DataLayout &DL = M.getDataLayout(); + + for (auto &Buf : Buffers) { + layoutBuffer(Buf, DL); + GlobalVariable *GV = replaceBuffer(Buf); + M.getGlobalList().push_back(GV); + llvm::hlsl::ResourceClass RC = Buf.IsCBuffer + ? llvm::hlsl::ResourceClass::CBuffer + : llvm::hlsl::ResourceClass::SRV; + llvm::hlsl::ResourceKind RK = Buf.IsCBuffer + ? llvm::hlsl::ResourceKind::CBuffer + : llvm::hlsl::ResourceKind::TBuffer; + std::string TyName = + Buf.Name.str() + (Buf.IsCBuffer ? ".cb." : ".tb.") + "ty"; + addBufferResourceAnnotation(GV, TyName, RC, RK, Buf.Binding); + } +} + +CGHLSLRuntime::Buffer::Buffer(const HLSLBufferDecl *D) + : Name(D->getName()), IsCBuffer(D->isCBuffer()), + Binding(D->getAttr<HLSLResourceBindingAttr>()) {} +void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV, + llvm::StringRef TyName, + llvm::hlsl::ResourceClass RC, + llvm::hlsl::ResourceKind RK, + BufferResBinding &Binding) { llvm::Module &M = CGM.getModule(); - addDxilValVersion(TargetOpts.DxilValidatorVersion, M); + + NamedMDNode *ResourceMD = nullptr; + switch (RC) { + case llvm::hlsl::ResourceClass::UAV: + ResourceMD = M.getOrInsertNamedMetadata("hlsl.uavs"); + break; + case llvm::hlsl::ResourceClass::SRV: + ResourceMD = M.getOrInsertNamedMetadata("hlsl.srvs"); + break; + case llvm::hlsl::ResourceClass::CBuffer: + ResourceMD = M.getOrInsertNamedMetadata("hlsl.cbufs"); + break; + default: + assert(false && "Unsupported buffer type!"); + return; + } + + assert(ResourceMD != nullptr && + "ResourceMD must have been set by the switch above."); + + llvm::hlsl::FrontendResource Res( + GV, TyName, RK, Binding.Reg.value_or(UINT_MAX), Binding.Space); + ResourceMD->addOperand(Res.getMetadata()); +} + +static llvm::hlsl::ResourceKind +castResourceShapeToResourceKind(HLSLResourceAttr::ResourceKind RK) { + switch (RK) { + case HLSLResourceAttr::ResourceKind::Texture1D: + return llvm::hlsl::ResourceKind::Texture1D; + case HLSLResourceAttr::ResourceKind::Texture2D: + return llvm::hlsl::ResourceKind::Texture2D; + case HLSLResourceAttr::ResourceKind::Texture2DMS: + return llvm::hlsl::ResourceKind::Texture2DMS; + case HLSLResourceAttr::ResourceKind::Texture3D: + return llvm::hlsl::ResourceKind::Texture3D; + case HLSLResourceAttr::ResourceKind::TextureCube: + return llvm::hlsl::ResourceKind::TextureCube; + case HLSLResourceAttr::ResourceKind::Texture1DArray: + return llvm::hlsl::ResourceKind::Texture1DArray; + case HLSLResourceAttr::ResourceKind::Texture2DArray: + return llvm::hlsl::ResourceKind::Texture2DArray; + case HLSLResourceAttr::ResourceKind::Texture2DMSArray: + return llvm::hlsl::ResourceKind::Texture2DMSArray; + case HLSLResourceAttr::ResourceKind::TextureCubeArray: + return llvm::hlsl::ResourceKind::TextureCubeArray; + case HLSLResourceAttr::ResourceKind::TypedBuffer: + return llvm::hlsl::ResourceKind::TypedBuffer; + case HLSLResourceAttr::ResourceKind::RawBuffer: + return llvm::hlsl::ResourceKind::RawBuffer; + case HLSLResourceAttr::ResourceKind::StructuredBuffer: + return llvm::hlsl::ResourceKind::StructuredBuffer; + case HLSLResourceAttr::ResourceKind::CBufferKind: + return llvm::hlsl::ResourceKind::CBuffer; + case HLSLResourceAttr::ResourceKind::SamplerKind: + return llvm::hlsl::ResourceKind::Sampler; + case HLSLResourceAttr::ResourceKind::TBuffer: + return llvm::hlsl::ResourceKind::TBuffer; + case HLSLResourceAttr::ResourceKind::RTAccelerationStructure: + return llvm::hlsl::ResourceKind::RTAccelerationStructure; + case HLSLResourceAttr::ResourceKind::FeedbackTexture2D: + return llvm::hlsl::ResourceKind::FeedbackTexture2D; + case HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray: + return llvm::hlsl::ResourceKind::FeedbackTexture2DArray; + } + // Make sure to update HLSLResourceAttr::ResourceKind when add new Kind to + // hlsl::ResourceKind. Assume FeedbackTexture2DArray is the last enum for + // HLSLResourceAttr::ResourceKind. + static_assert( + static_cast<uint32_t>( + HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray) == + (static_cast<uint32_t>(llvm::hlsl::ResourceKind::NumEntries) - 2)); + llvm_unreachable("all switch cases should be covered"); +} + +void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) { + const Type *Ty = D->getType()->getPointeeOrArrayElementType(); + if (!Ty) + return; + const auto *RD = Ty->getAsCXXRecordDecl(); + if (!RD) + return; + const auto *Attr = RD->getAttr<HLSLResourceAttr>(); + if (!Attr) + return; + + HLSLResourceAttr::ResourceClass RC = Attr->getResourceType(); + llvm::hlsl::ResourceKind RK = + castResourceShapeToResourceKind(Attr->getResourceShape()); + + QualType QT(Ty, 0); + BufferResBinding Binding(D->getAttr<HLSLResourceBindingAttr>()); + addBufferResourceAnnotation(GV, QT.getAsString(), + static_cast<llvm::hlsl::ResourceClass>(RC), RK, + Binding); +} + +CGHLSLRuntime::BufferResBinding::BufferResBinding( + HLSLResourceBindingAttr *Binding) { + if (Binding) { + llvm::APInt RegInt(64, 0); + Binding->getSlot().substr(1).getAsInteger(10, RegInt); + Reg = RegInt.getLimitedValue(); + llvm::APInt SpaceInt(64, 0); + Binding->getSpace().substr(5).getAsInteger(10, SpaceInt); + Space = SpaceInt.getLimitedValue(); + } else { + Space = 0; + } +} + +void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes( + const FunctionDecl *FD, llvm::Function *Fn) { + const auto *ShaderAttr = FD->getAttr<HLSLShaderAttr>(); + assert(ShaderAttr && "All entry functions must have a HLSLShaderAttr"); + const StringRef ShaderAttrKindStr = "hlsl.shader"; + Fn->addFnAttr(ShaderAttrKindStr, + ShaderAttr->ConvertShaderTypeToStr(ShaderAttr->getType())); + if (HLSLNumThreadsAttr *NumThreadsAttr = FD->getAttr<HLSLNumThreadsAttr>()) { + const StringRef NumThreadsKindStr = "hlsl.numthreads"; + std::string NumThreadsStr = + formatv("{0},{1},{2}", NumThreadsAttr->getX(), NumThreadsAttr->getY(), + NumThreadsAttr->getZ()); + Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr); + } +} + +static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) { + if (const auto *VT = dyn_cast<FixedVectorType>(Ty)) { + Value *Result = PoisonValue::get(Ty); + for (unsigned I = 0; I < VT->getNumElements(); ++I) { + Value *Elt = B.CreateCall(F, {B.getInt32(I)}); + Result = B.CreateInsertElement(Result, Elt, I); + } + return Result; + } + return B.CreateCall(F, {B.getInt32(0)}); +} + +llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B, + const ParmVarDecl &D, + llvm::Type *Ty) { + assert(D.hasAttrs() && "Entry parameter missing annotation attribute!"); + if (D.hasAttr<HLSLSV_GroupIndexAttr>()) { + llvm::Function *DxGroupIndex = + CGM.getIntrinsic(Intrinsic::dx_flattened_thread_id_in_group); + return B.CreateCall(FunctionCallee(DxGroupIndex)); + } + if (D.hasAttr<HLSLSV_DispatchThreadIDAttr>()) { + llvm::Function *DxThreadID = CGM.getIntrinsic(Intrinsic::dx_thread_id); + return buildVectorInput(B, DxThreadID, Ty); + } + assert(false && "Unhandled parameter attribute"); + return nullptr; +} + +void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, + llvm::Function *Fn) { + llvm::Module &M = CGM.getModule(); + llvm::LLVMContext &Ctx = M.getContext(); + auto *EntryTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false); + Function *EntryFn = + Function::Create(EntryTy, Function::ExternalLinkage, FD->getName(), &M); + + // Copy function attributes over, we have no argument or return attributes + // that can be valid on the real entry. + AttributeList NewAttrs = AttributeList::get(Ctx, AttributeList::FunctionIndex, + Fn->getAttributes().getFnAttrs()); + EntryFn->setAttributes(NewAttrs); + setHLSLEntryAttributes(FD, EntryFn); + + // Set the called function as internal linkage. + Fn->setLinkage(GlobalValue::InternalLinkage); + + BasicBlock *BB = BasicBlock::Create(Ctx, "entry", EntryFn); + IRBuilder<> B(BB); + llvm::SmallVector<Value *> Args; + // FIXME: support struct parameters where semantics are on members. + // See: https://github.com/llvm/llvm-project/issues/57874 + unsigned SRetOffset = 0; + for (const auto &Param : Fn->args()) { + if (Param.hasStructRetAttr()) { + // FIXME: support output. + // See: https://github.com/llvm/llvm-project/issues/57874 + SRetOffset = 1; + Args.emplace_back(PoisonValue::get(Param.getType())); + continue; + } + const ParmVarDecl *PD = FD->getParamDecl(Param.getArgNo() - SRetOffset); + Args.push_back(emitInputSemantic(B, *PD, Param.getType())); + } + + CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args); + (void)CI; + // FIXME: Handle codegen for return type semantics. + // See: https://github.com/llvm/llvm-project/issues/57875 + B.CreateRetVoid(); +} + +static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M, + bool CtorOrDtor) { + const auto *GV = + M.getNamedGlobal(CtorOrDtor ? "llvm.global_ctors" : "llvm.global_dtors"); + if (!GV) + return; + const auto *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) + return; + // The global_ctor array elements are a struct [Priority, Fn *, COMDat]. + // HLSL neither supports priorities or COMDat values, so we will check those + // in an assert but not handle them. + + llvm::SmallVector<Function *> CtorFns; + for (const auto &Ctor : CA->operands()) { + if (isa<ConstantAggregateZero>(Ctor)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(Ctor); + + assert(cast<ConstantInt>(CS->getOperand(0))->getValue() == 65535 && + "HLSL doesn't support setting priority for global ctors."); + assert(isa<ConstantPointerNull>(CS->getOperand(2)) && + "HLSL doesn't support COMDat for global ctors."); + Fns.push_back(cast<Function>(CS->getOperand(1))); + } +} + +void CGHLSLRuntime::generateGlobalCtorDtorCalls() { + llvm::Module &M = CGM.getModule(); + SmallVector<Function *> CtorFns; + SmallVector<Function *> DtorFns; + gatherFunctions(CtorFns, M, true); + gatherFunctions(DtorFns, M, false); + + // Insert a call to the global constructor at the beginning of the entry block + // to externally exported functions. This is a bit of a hack, but HLSL allows + // global constructors, but doesn't support driver initialization of globals. + for (auto &F : M.functions()) { + if (!F.hasFnAttribute("hlsl.shader")) + continue; + IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin()); + for (auto *Fn : CtorFns) + B.CreateCall(FunctionCallee(Fn)); + + // Insert global dtors before the terminator of the last instruction + B.SetInsertPoint(F.back().getTerminator()); + for (auto *Fn : DtorFns) + B.CreateCall(FunctionCallee(Fn)); + } + + // No need to keep global ctors/dtors for non-lib profile after call to + // ctors/dtors added for entry. + Triple T(M.getTargetTriple()); + if (T.getEnvironment() != Triple::EnvironmentType::Library) { + if (auto *GV = M.getNamedGlobal("llvm.global_ctors")) + GV->eraseFromParent(); + if (auto *GV = M.getNamedGlobal("llvm.global_dtors")) + GV->eraseFromParent(); + } } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 268810f2ec9e..67413fbd4a78 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -15,21 +15,88 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H +#include "llvm/IR/IRBuilder.h" + +#include "clang/Basic/HLSLRuntime.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" + +#include <optional> +#include <vector> + +namespace llvm { +class GlobalVariable; +class Function; +class StructType; +} // namespace llvm + namespace clang { +class VarDecl; +class ParmVarDecl; +class HLSLBufferDecl; +class HLSLResourceBindingAttr; +class Type; +class DeclContext; + +class FunctionDecl; namespace CodeGen { class CodeGenModule; class CGHLSLRuntime { +public: + struct BufferResBinding { + // The ID like 2 in register(b2, space1). + std::optional<unsigned> Reg; + // The Space like 1 is register(b2, space1). + // Default value is 0. + unsigned Space; + BufferResBinding(HLSLResourceBindingAttr *Attr); + }; + struct Buffer { + Buffer(const HLSLBufferDecl *D); + llvm::StringRef Name; + // IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer). + bool IsCBuffer; + BufferResBinding Binding; + // Global variable and offset for each constant. + std::vector<std::pair<llvm::GlobalVariable *, unsigned>> Constants; + llvm::StructType *LayoutStruct = nullptr; + }; + protected: CodeGenModule &CGM; + llvm::Value *emitInputSemantic(llvm::IRBuilder<> &B, const ParmVarDecl &D, + llvm::Type *Ty); + public: CGHLSLRuntime(CodeGenModule &CGM) : CGM(CGM) {} virtual ~CGHLSLRuntime() {} + void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV); + void generateGlobalCtorDtorCalls(); + + void addBuffer(const HLSLBufferDecl *D); void finishCodeGen(); + + void setHLSLEntryAttributes(const FunctionDecl *FD, llvm::Function *Fn); + + void emitEntryFunction(const FunctionDecl *FD, llvm::Function *Fn); + void setHLSLFunctionAttributes(llvm::Function *, const FunctionDecl *); + +private: + void addBufferResourceAnnotation(llvm::GlobalVariable *GV, + llvm::StringRef TyName, + llvm::hlsl::ResourceClass RC, + llvm::hlsl::ResourceKind RK, + BufferResBinding &Binding); + void addConstant(VarDecl *D, Buffer &CB); + void addBufferDecls(const DeclContext *DC, Buffer &CB); + llvm::SmallVector<Buffer> Buffers; }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index 12a6cd8da603..e5d9db273c2d 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include <optional> using namespace clang::CodeGen; using namespace llvm; @@ -37,7 +38,7 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); - Optional<bool> Enabled; + std::optional<bool> Enabled; if (Attrs.PipelineDisabled) Enabled = false; else if (Attrs.PipelineInitiationInterval != 0) @@ -82,11 +83,11 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); - Optional<bool> Enabled; + std::optional<bool> Enabled; if (Attrs.UnrollEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.UnrollEnable == LoopAttributes::Full) - Enabled = None; + Enabled = std::nullopt; else if (Attrs.UnrollEnable != LoopAttributes::Unspecified || Attrs.UnrollCount != 0) Enabled = true; @@ -144,7 +145,7 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); - Optional<bool> Enabled; + std::optional<bool> Enabled; if (Attrs.UnrollAndJamEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable || @@ -212,7 +213,7 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); - Optional<bool> Enabled; + std::optional<bool> Enabled; if (Attrs.VectorizeEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || @@ -330,7 +331,7 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); - Optional<bool> Enabled; + std::optional<bool> Enabled; if (Attrs.DistributeEnable == LoopAttributes::Disable) Enabled = false; if (Attrs.DistributeEnable == LoopAttributes::Enable) @@ -380,7 +381,7 @@ MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); - Optional<bool> Enabled; + std::optional<bool> Enabled; if (Attrs.UnrollEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.UnrollEnable == LoopAttributes::Full) @@ -496,7 +497,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, !EndLoc && !Attrs.MustProgress) return; - TempLoopID = MDNode::getTemporary(Header->getContext(), None); + TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt); } void LoopInfo::finish() { diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 1b6acb2b7212..7df2088a81d7 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -22,11 +22,14 @@ #include "clang/AST/StmtObjC.h" #include "clang/Basic/Diagnostic.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -138,7 +141,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc()); cast<llvm::LoadInst>(Ptr)->setMetadata( CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(getLLVMContext(), None)); + llvm::MDNode::get(getLLVMContext(), std::nullopt)); return Builder.CreateBitCast(Ptr, ConvertType(E->getType())); } @@ -370,16 +373,14 @@ static const Expr *findWeakLValue(const Expr *E) { /// /// If the runtime does support a required entrypoint, then this method will /// generate a call and return the resulting value. Otherwise it will return -/// None and the caller can generate a msgSend instead. -static Optional<llvm::Value *> -tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, - llvm::Value *Receiver, - const CallArgList& Args, Selector Sel, - const ObjCMethodDecl *method, - bool isClassMessage) { +/// std::nullopt and the caller can generate a msgSend instead. +static std::optional<llvm::Value *> tryGenerateSpecializedMessageSend( + CodeGenFunction &CGF, QualType ResultType, llvm::Value *Receiver, + const CallArgList &Args, Selector Sel, const ObjCMethodDecl *method, + bool isClassMessage) { auto &CGM = CGF.CGM; if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls) - return None; + return std::nullopt; auto &Runtime = CGM.getLangOpts().ObjCRuntime; switch (Sel.getMethodFamily()) { @@ -400,7 +401,7 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, if (isa<llvm::ConstantPointerNull>(arg)) return CGF.EmitObjCAllocWithZone(Receiver, CGF.ConvertType(ResultType)); - return None; + return std::nullopt; } } break; @@ -431,7 +432,7 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, default: break; } - return None; + return std::nullopt; } CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend( @@ -439,7 +440,7 @@ CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend( Selector Sel, llvm::Value *Receiver, const CallArgList &Args, const ObjCInterfaceDecl *OID, const ObjCMethodDecl *Method, bool isClassMessage) { - if (Optional<llvm::Value *> SpecializedResult = + if (std::optional<llvm::Value *> SpecializedResult = tryGenerateSpecializedMessageSend(CGF, ResultType, Receiver, Args, Sel, Method, isClassMessage)) { return RValue::get(*SpecializedResult); @@ -520,36 +521,36 @@ CGObjCRuntime::GetRuntimeProtocolList(ObjCProtocolDecl::protocol_iterator begin, /// Instead of '[[MyClass alloc] init]', try to generate /// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the /// caller side, as well as the optimized objc_alloc. -static Optional<llvm::Value *> +static std::optional<llvm::Value *> tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { auto &Runtime = CGF.getLangOpts().ObjCRuntime; if (!Runtime.shouldUseRuntimeFunctionForCombinedAllocInit()) - return None; + return std::nullopt; // Match the exact pattern '[[MyClass alloc] init]'. Selector Sel = OME->getSelector(); if (OME->getReceiverKind() != ObjCMessageExpr::Instance || !OME->getType()->isObjCObjectPointerType() || !Sel.isUnarySelector() || Sel.getNameForSlot(0) != "init") - return None; + return std::nullopt; // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' // with 'cls' a Class. auto *SubOME = dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParenCasts()); if (!SubOME) - return None; + return std::nullopt; Selector SubSel = SubOME->getSelector(); if (!SubOME->getType()->isObjCObjectPointerType() || !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc") - return None; + return std::nullopt; llvm::Value *Receiver = nullptr; switch (SubOME->getReceiverKind()) { case ObjCMessageExpr::Instance: if (!SubOME->getInstanceReceiver()->getType()->isObjCClassType()) - return None; + return std::nullopt; Receiver = CGF.EmitScalarExpr(SubOME->getInstanceReceiver()); break; @@ -563,7 +564,7 @@ tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { } case ObjCMessageExpr::SuperInstance: case ObjCMessageExpr::SuperClass: - return None; + return std::nullopt; } return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType())); @@ -590,7 +591,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, } } - if (Optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E)) + if (std::optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E)) return AdjustObjCObjectType(*this, E->getType(), RValue::get(*Val)); // We don't retain the receiver in delegate init calls, and this is @@ -768,7 +769,8 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, } args.push_back(OMD->getSelfDecl()); - args.push_back(OMD->getCmdDecl()); + if (!OMD->isDirectMethod()) + args.push_back(OMD->getCmdDecl()); args.append(OMD->param_begin(), OMD->param_end()); @@ -1110,11 +1112,47 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, callee, ReturnValueSlot(), args); } +// emitCmdValueForGetterSetterBody - Handle emitting the load necessary for +// the `_cmd` selector argument for getter/setter bodies. For direct methods, +// this returns an undefined/poison value; this matches behavior prior to `_cmd` +// being removed from the direct method ABI as the getter/setter caller would +// never load one. For non-direct methods, this emits a load of the implicit +// `_cmd` storage. +static llvm::Value *emitCmdValueForGetterSetterBody(CodeGenFunction &CGF, + ObjCMethodDecl *MD) { + if (MD->isDirectMethod()) { + // Direct methods do not have a `_cmd` argument. Emit an undefined/poison + // value. This will be passed to objc_getProperty/objc_setProperty, which + // has not appeared bothered by the `_cmd` argument being undefined before. + llvm::Type *selType = CGF.ConvertType(CGF.getContext().getObjCSelType()); + return llvm::PoisonValue::get(selType); + } + + return CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(MD->getCmdDecl()), "cmd"); +} + void CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyImplDecl *propImpl, const ObjCMethodDecl *GetterMethodDecl, llvm::Constant *AtomicHelperFn) { + + ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); + + if (ivar->getType().isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) { + if (!AtomicHelperFn) { + LValue Src = + EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, 0); + LValue Dst = MakeAddrLValue(ReturnValue, ivar->getType()); + callCStructCopyConstructor(Dst, Src); + } else { + ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); + emitCPPObjectAtomicGetterCall(*this, ReturnValue.getPointer(), ivar, + AtomicHelperFn); + } + return; + } + // If there's a non-trivial 'get' expression, we just have to emit that. if (!hasTrivialGetExpr(propImpl)) { if (!AtomicHelperFn) { @@ -1135,8 +1173,6 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, QualType propType = prop->getType(); ObjCMethodDecl *getterMethod = propImpl->getGetterMethodDecl(); - ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); - // Pick an implementation strategy. PropertyImplStrategy strategy(CGM, propImpl); switch (strategy.getKind()) { @@ -1188,11 +1224,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // Return (ivar-type) objc_getProperty((id) self, _cmd, offset, true). // FIXME: Can't this be simpler? This might even be worse than the // corresponding gcc code. - llvm::Value *cmd = - Builder.CreateLoad(GetAddrOfLocalVar(getterMethod->getCmdDecl()), "cmd"); + llvm::Value *cmd = emitCmdValueForGetterSetterBody(*this, getterMethod); llvm::Value *self = Builder.CreateBitCast(LoadObjCSelf(), VoidPtrTy); llvm::Value *ivarOffset = - EmitIvarOffset(classImpl->getClassInterface(), ivar); + EmitIvarOffsetAsPointerDiff(classImpl->getClassInterface(), ivar); CallArgList args; args.add(RValue::get(self), getContext().getObjCIdType()); @@ -1404,6 +1439,24 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); ObjCMethodDecl *setterMethod = propImpl->getSetterMethodDecl(); + if (ivar->getType().isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) { + ParmVarDecl *PVD = *setterMethod->param_begin(); + if (!AtomicHelperFn) { + // Call the move assignment operator instead of calling the copy + // assignment operator and destructor. + LValue Dst = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, + /*quals*/ 0); + LValue Src = MakeAddrLValue(GetAddrOfLocalVar(PVD), ivar->getType()); + callCStructMoveAssignmentOperator(Dst, Src); + } else { + // If atomic, assignment is called via a locking api. + emitCPPObjectAtomicSetterCall(*this, setterMethod, ivar, AtomicHelperFn); + } + // Decativate the destructor for the setter parameter. + DeactivateCleanupBlock(CalleeDestructedParamCleanups[PVD], AllocaInsertPt); + return; + } + // Just use the setter expression if Sema gave us one and it's // non-trivial. if (!hasTrivialSetExpr(propImpl)) { @@ -1474,12 +1527,11 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, // Emit objc_setProperty((id) self, _cmd, offset, arg, // <is-atomic>, <is-copy>). - llvm::Value *cmd = - Builder.CreateLoad(GetAddrOfLocalVar(setterMethod->getCmdDecl())); + llvm::Value *cmd = emitCmdValueForGetterSetterBody(*this, setterMethod); llvm::Value *self = Builder.CreateBitCast(LoadObjCSelf(), VoidPtrTy); llvm::Value *ivarOffset = - EmitIvarOffset(classImpl->getClassInterface(), ivar); + EmitIvarOffsetAsPointerDiff(classImpl->getClassInterface(), ivar); Address argAddr = GetAddrOfLocalVar(*setterMethod->param_begin()); llvm::Value *arg = Builder.CreateLoad(argAddr, "arg"); arg = Builder.CreateBitCast(arg, VoidPtrTy); @@ -1749,7 +1801,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ &CGM.getContext().Idents.get("count") }; Selector FastEnumSel = - CGM.getContext().Selectors.getSelector(llvm::array_lengthof(II), &II[0]); + CGM.getContext().Selectors.getSelector(std::size(II), &II[0]); QualType ItemsTy = getContext().getConstantArrayType(getContext().getObjCIdType(), @@ -2290,7 +2342,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, CGM.getObjCEntrypoints().objc_retainBlock); call->setMetadata("clang.arc.copy_on_escape", - llvm::MDNode::get(Builder.getContext(), None)); + llvm::MDNode::get(Builder.getContext(), std::nullopt)); } return result; @@ -2332,7 +2384,8 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // Call the marker asm if we made one, which we do only at -O0. if (marker) - CGF.Builder.CreateCall(marker, None, CGF.getBundlesForFunclet(marker)); + CGF.Builder.CreateCall(marker, std::nullopt, + CGF.getBundlesForFunclet(marker)); } static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value, @@ -2418,7 +2471,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, if (precise == ARCImpreciseLifetime) { call->setMetadata("clang.imprecise_release", - llvm::MDNode::get(Builder.getContext(), None)); + llvm::MDNode::get(Builder.getContext(), std::nullopt)); } } @@ -2816,7 +2869,7 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value, if (precise == ARCImpreciseLifetime) { call->setMetadata("clang.imprecise_release", - llvm::MDNode::get(Builder.getContext(), None)); + llvm::MDNode::get(Builder.getContext(), std::nullopt)); } } @@ -3661,15 +3714,27 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) { llvm::Constant * CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( const ObjCPropertyImplDecl *PID) { + const ObjCPropertyDecl *PD = PID->getPropertyDecl(); + if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic))) + return nullptr; + + QualType Ty = PID->getPropertyIvarDecl()->getType(); + ASTContext &C = getContext(); + + if (Ty.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) { + // Call the move assignment operator instead of calling the copy assignment + // operator and destructor. + CharUnits Alignment = C.getTypeAlignInChars(Ty); + llvm::Constant *Fn = getNonTrivialCStructMoveAssignmentOperator( + CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty); + return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + } + if (!getLangOpts().CPlusPlus || !getLangOpts().ObjCRuntime.hasAtomicCopyHelper()) return nullptr; - QualType Ty = PID->getPropertyIvarDecl()->getType(); if (!Ty->isRecordType()) return nullptr; - const ObjCPropertyDecl *PD = PID->getPropertyDecl(); - if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic))) - return nullptr; llvm::Constant *HelperFn = nullptr; if (hasTrivialSetExpr(PID)) return nullptr; @@ -3677,7 +3742,6 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( if ((HelperFn = CGM.getAtomicSetterHelperFnMap(Ty))) return HelperFn; - ASTContext &C = getContext(); IdentifierInfo *II = &CGM.getContext().Idents.get("__assign_helper_atomic_property_"); @@ -3748,18 +3812,27 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( return HelperFn; } -llvm::Constant * -CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( - const ObjCPropertyImplDecl *PID) { +llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( + const ObjCPropertyImplDecl *PID) { + const ObjCPropertyDecl *PD = PID->getPropertyDecl(); + if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic))) + return nullptr; + + QualType Ty = PD->getType(); + ASTContext &C = getContext(); + + if (Ty.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) { + CharUnits Alignment = C.getTypeAlignInChars(Ty); + llvm::Constant *Fn = getNonTrivialCStructCopyConstructor( + CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty); + return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + } + if (!getLangOpts().CPlusPlus || !getLangOpts().ObjCRuntime.hasAtomicCopyHelper()) return nullptr; - const ObjCPropertyDecl *PD = PID->getPropertyDecl(); - QualType Ty = PD->getType(); if (!Ty->isRecordType()) return nullptr; - if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic))) - return nullptr; llvm::Constant *HelperFn = nullptr; if (hasTrivialGetExpr(PID)) return nullptr; @@ -3767,7 +3840,6 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty))) return HelperFn; - ASTContext &C = getContext(); IdentifierInfo *II = &CGM.getContext().Idents.get("__copy_helper_atomic_property_"); @@ -3911,7 +3983,8 @@ static llvm::Value *emitIsPlatformVersionAtLeast(CodeGenFunction &CGF, llvm::SmallVector<llvm::Value *, 8> Args; auto EmitArgs = [&](const VersionTuple &Version, const llvm::Triple &TT) { - Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor(); + std::optional<unsigned> Min = Version.getMinor(), + SMin = Version.getSubminor(); Args.push_back( llvm::ConstantInt::get(CGM.Int32Ty, getBaseMachOPlatformID(TT))); Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor())); @@ -3949,7 +4022,8 @@ CodeGenFunction::EmitBuiltinAvailable(const VersionTuple &Version) { CGM.CreateRuntimeFunction(FTy, "__isOSVersionAtLeast"); } - Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor(); + std::optional<unsigned> Min = Version.getMinor(), + SMin = Version.getSubminor(); llvm::Value *Args[] = { llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor()), llvm::ConstantInt::get(CGM.Int32Ty, Min.value_or(0)), diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 7bbe9af7ed59..c7b193e34ea0 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -71,7 +71,7 @@ public: FTy = llvm::FunctionType::get(RetTy, ArgTys, false); } else { - FTy = llvm::FunctionType::get(RetTy, None, false); + FTy = llvm::FunctionType::get(RetTy, std::nullopt, false); } } @@ -985,7 +985,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto LiteralLength = SL->getLength(); - if ((CGM.getTarget().getPointerWidth(0) == 64) && + if ((CGM.getTarget().getPointerWidth(LangAS::Default) == 64) && (LiteralLength < 9) && !isNonASCII) { // Tiny strings are only used on 64-bit platforms. They store 8 7-bit // ASCII characters in the high 56 bits, followed by a 4-bit length and a @@ -1064,7 +1064,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Hash. Not currently initialised by the compiler. Fields.addInt(Int32Ty, 0); // pointer to the data string. - auto Arr = llvm::makeArrayRef(&ToBuf[0], ToPtr+1); + auto Arr = llvm::ArrayRef(&ToBuf[0], ToPtr + 1); auto *C = llvm::ConstantDataArray::get(VMContext, Arr); auto *Buffer = new llvm::GlobalVariable(TheModule, C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); @@ -3316,7 +3316,7 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) { auto fields = builder.beginStruct(); fields.addInt(Int32Ty, values.size()); auto array = fields.beginArray(); - for (auto v : values) array.add(v); + for (auto *v : values) array.add(v); array.finishAndAddTo(fields); llvm::Constant *GS = @@ -3851,9 +3851,9 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { llvm::Type *moduleEltTys[] = { LongTy, LongTy, PtrToInt8Ty, symtab->getType(), IntTy }; - llvm::StructType *moduleTy = - llvm::StructType::get(CGM.getLLVMContext(), - makeArrayRef(moduleEltTys).drop_back(unsigned(RuntimeVersion < 10))); + llvm::StructType *moduleTy = llvm::StructType::get( + CGM.getLLVMContext(), + ArrayRef(moduleEltTys).drop_back(unsigned(RuntimeVersion < 10))); ConstantInitBuilder builder(CGM); auto module = builder.beginStruct(moduleTy); @@ -3864,8 +3864,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { // The path to the source file where this module was declared SourceManager &SM = CGM.getContext().getSourceManager(); - Optional<FileEntryRef> mainFile = - SM.getFileEntryRefForID(SM.getMainFileID()); + OptionalFileEntryRef mainFile = SM.getFileEntryRefForID(SM.getMainFileID()); std::string path = (mainFile->getDir().getName() + "/" + mainFile->getName()).str(); module.add(MakeConstantString(path, ".objc_source_file_name")); diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 46e65eb1ed43..c739d3742f80 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -174,6 +174,7 @@ protected: public: llvm::IntegerType *ShortTy, *IntTy, *LongTy; llvm::PointerType *Int8PtrTy, *Int8PtrPtrTy; + llvm::PointerType *Int8PtrProgramASTy; llvm::Type *IvarOffsetVarTy; /// ObjectPtrTy - LLVM type for object handles (typeof(id)) @@ -736,14 +737,17 @@ public: // Also it is safe to make it readnone, since we never load or store the // classref except by calling this function. llvm::Type *params[] = { Int8PtrPtrTy }; + llvm::LLVMContext &C = CGM.getLLVMContext(); + llvm::AttributeSet AS = llvm::AttributeSet::get(C, { + llvm::Attribute::get(C, llvm::Attribute::NonLazyBind), + llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none()), + llvm::Attribute::get(C, llvm::Attribute::NoUnwind), + }); llvm::FunctionCallee F = CGM.CreateRuntimeFunction( llvm::FunctionType::get(ClassnfABIPtrTy, params, false), "objc_loadClassref", llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, - {llvm::Attribute::NonLazyBind, - llvm::Attribute::ReadNone, - llvm::Attribute::NoUnwind})); + llvm::AttributeList::FunctionIndex, AS)); if (!CGM.getTriple().isOSBinFormatCOFF()) cast<llvm::Function>(F.getCallee())->setLinkage( llvm::Function::ExternalWeakLinkage); @@ -1170,7 +1174,7 @@ public: static ProtocolMethodLists get(const ObjCProtocolDecl *PD) { ProtocolMethodLists result; - for (auto MD : PD->methods()) { + for (auto *MD : PD->methods()) { size_t index = (2 * size_t(MD->isOptional())) + (size_t(MD->isClassMethod())); result.Methods[index].push_back(MD); @@ -2144,7 +2148,8 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, if (!IsSuper) Arg0 = CGF.Builder.CreateBitCast(Arg0, ObjCTypes.ObjectPtrTy); ActualArgs.add(RValue::get(Arg0), Arg0Ty); - ActualArgs.add(RValue::get(SelValue), selTy); + if (!Method || !Method->isDirectMethod()) + ActualArgs.add(RValue::get(SelValue), selTy); ActualArgs.addFrom(CallArgs); // If we're calling a method, use the formal signature. @@ -2402,8 +2407,8 @@ void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) { Qualifiers::GC GCAttr = GetGCAttrTypeForType(CGM.getContext(), type); if (GCAttr == Qualifiers::Strong) { - assert(CGM.getContext().getTypeSize(type) - == CGM.getTarget().getPointerWidth(0)); + assert(CGM.getContext().getTypeSize(type) == + CGM.getTarget().getPointerWidth(LangAS::Default)); IvarsInfo.push_back(IvarInfo(fieldOffset, /*size in words*/ 1)); } } @@ -2696,7 +2701,7 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) { llvm::Constant *nullPtr = llvm::Constant::getNullValue(CGM.Int8PtrTy); if (RunSkipBlockVars.empty()) return nullPtr; - unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(0); + unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(LangAS::Default); unsigned ByteSizeInBits = CGM.getTarget().getCharWidth(); unsigned WordSizeInBytes = WordSizeInBits/ByteSizeInBits; @@ -2882,7 +2887,7 @@ void CGObjCCommonMac::fillRunSkipBlockVars(CodeGenModule &CGM, RunSkipBlockVars.clear(); bool hasUnion = false; - unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(0); + unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(LangAS::Default); unsigned ByteSizeInBits = CGM.getTarget().getCharWidth(); unsigned WordSizeInBytes = WordSizeInBits/ByteSizeInBits; @@ -3453,7 +3458,7 @@ static bool hasWeakMember(QualType type) { } if (auto recType = type->getAs<RecordType>()) { - for (auto field : recType->getDecl()->fields()) { + for (auto *field : recType->getDecl()->fields()) { if (hasWeakMember(field->getType())) return true; } @@ -4102,6 +4107,9 @@ void CGObjCCommonMac::GenerateDirectMethodPrologue( // only synthesize _cmd if it's referenced if (OMD->getCmdDecl()->isUsed()) { + // `_cmd` is not a parameter to direct methods, so storage must be + // explicitly declared for it. + CGF.EmitVarDecl(*OMD->getCmdDecl()); Builder.CreateStore(GetSelector(CGF, OMD), CGF.GetAddrOfLocalVar(OMD->getCmdDecl())); } @@ -5739,11 +5747,13 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); + unsigned ProgramAS = CGM.getDataLayout().getProgramAddressSpace(); ShortTy = cast<llvm::IntegerType>(Types.ConvertType(Ctx.ShortTy)); IntTy = CGM.IntTy; LongTy = cast<llvm::IntegerType>(Types.ConvertType(Ctx.LongTy)); Int8PtrTy = CGM.Int8PtrTy; + Int8PtrProgramASTy = llvm::PointerType::get(CGM.Int8Ty, ProgramAS); Int8PtrPtrTy = CGM.Int8PtrPtrTy; // arm64 targets use "int" ivar offset variables. All others, @@ -5812,7 +5822,7 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // char *_imp; // } MethodTy = llvm::StructType::create("struct._objc_method", SelectorPtrTy, - Int8PtrTy, Int8PtrTy); + Int8PtrTy, Int8PtrProgramASTy); // struct _objc_cache * CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache"); @@ -6198,8 +6208,7 @@ void CGObjCNonFragileABIMac::AddModuleClassList( llvm::GlobalVariable *GV = new llvm::GlobalVariable( CGM.getModule(), Init->getType(), false, llvm::GlobalValue::PrivateLinkage, Init, SymbolName); - GV->setAlignment( - llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType()))); + GV->setAlignment(CGM.getDataLayout().getABITypeAlign(Init->getType())); GV->setSection(SectionName); CGM.addCompilerUsedGlobal(GV); } @@ -6431,8 +6440,7 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI, if (CGM.getTriple().isOSBinFormatMachO()) GV->setSection("__DATA, __objc_data"); - GV->setAlignment(llvm::Align( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy))); + GV->setAlignment(CGM.getDataLayout().getABITypeAlign(ObjCTypes.ClassnfABITy)); if (!CGM.getTriple().isOSBinFormatCOFF()) if (HiddenVisibility) GV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -6771,11 +6779,11 @@ void CGObjCNonFragileABIMac::emitMethodConstant(ConstantArrayBuilder &builder, if (forProtocol) { // Protocol methods have no implementation. So, this entry is always NULL. - method.addNullPointer(ObjCTypes.Int8PtrTy); + method.addNullPointer(ObjCTypes.Int8PtrProgramASTy); } else { llvm::Function *fn = GetMethodDefinition(MD); assert(fn && "no definition for method?"); - method.addBitCast(fn, ObjCTypes.Int8PtrTy); + method.addBitCast(fn, ObjCTypes.Int8PtrProgramASTy); } method.finishAndAddTo(builder); @@ -6893,8 +6901,8 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar); IvarOffsetGV->setInitializer( llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset)); - IvarOffsetGV->setAlignment(llvm::Align( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy))); + IvarOffsetGV->setAlignment( + CGM.getDataLayout().getABITypeAlign(ObjCTypes.IvarOffsetVarTy)); if (!CGM.getTriple().isOSBinFormatCOFF()) { // FIXME: This matches gcc, but shouldn't the visibility be set on the use @@ -7122,8 +7130,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( ProtocolRef); if (!CGM.getTriple().isOSBinFormatMachO()) PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef)); - PTGV->setAlignment(llvm::Align( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy))); + PTGV->setAlignment( + CGM.getDataLayout().getABITypeAlign(ObjCTypes.ProtocolnfABIPtrTy)); PTGV->setSection(GetSectionName("__objc_protolist", "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -7222,7 +7230,7 @@ CGObjCNonFragileABIMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) cast<llvm::LoadInst>(IvarOffsetValue) ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(VMContext, None)); + llvm::MDNode::get(VMContext, std::nullopt)); } // This could be 32bit int or 64bit integer depending on the architecture. @@ -7622,7 +7630,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF, llvm::LoadInst* LI = CGF.Builder.CreateLoad(Addr); LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(VMContext, None)); + llvm::MDNode::get(VMContext, std::nullopt)); return LI; } diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp index 550fd3d70bdc..9097a8cf7009 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -22,6 +22,7 @@ #include "clang/AST/StmtObjC.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/CodeGenABITypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/Support/SaveAndRestore.h" using namespace clang; @@ -227,13 +228,18 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, CatchHandler &Handler = Handlers[I]; CGF.EmitBlock(Handler.Block); - llvm::CatchPadInst *CPI = nullptr; - SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(CGF.CurrentFuncletPad); - if (useFunclets) - if ((CPI = dyn_cast_or_null<llvm::CatchPadInst>(Handler.Block->getFirstNonPHI()))) { + + CodeGenFunction::LexicalScope Cleanups(CGF, Handler.Body->getSourceRange()); + SaveAndRestore RevertAfterScope(CGF.CurrentFuncletPad); + if (useFunclets) { + llvm::Instruction *CPICandidate = Handler.Block->getFirstNonPHI(); + if (auto *CPI = dyn_cast_or_null<llvm::CatchPadInst>(CPICandidate)) { CGF.CurrentFuncletPad = CPI; CPI->setOperand(2, CGF.getExceptionSlot().getPointer()); + CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI); } + } + llvm::Value *RawExn = CGF.getExceptionFromSlot(); // Enter the catch. @@ -241,8 +247,6 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, if (beginCatchFn) Exn = CGF.EmitNounwindRuntimeCall(beginCatchFn, RawExn, "exn.adjusted"); - CodeGenFunction::LexicalScope cleanups(CGF, Handler.Body->getSourceRange()); - if (endCatchFn) { // Add a cleanup to leave the catch. bool EndCatchMightThrow = (Handler.Variable == nullptr); @@ -260,15 +264,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, CGF.EmitAutoVarDecl(*CatchParam); EmitInitOfCatchParam(CGF, CastExn, CatchParam); } - if (CPI) - CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI); CGF.ObjCEHValueStack.push_back(Exn); CGF.EmitStmt(Handler.Body); CGF.ObjCEHValueStack.pop_back(); // Leave any cleanups associated with the catch. - cleanups.ForceCleanup(); + Cleanups.ForceCleanup(); CGF.EmitBranchThroughCleanup(Cont); } @@ -293,7 +295,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF, switch (paramDecl->getType().getQualifiers().getObjCLifetime()) { case Qualifiers::OCL_Strong: exn = CGF.EmitARCRetainNonBlock(exn); - LLVM_FALLTHROUGH; + [[fallthrough]]; case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: @@ -360,13 +362,15 @@ CGObjCRuntime::MessageSendInfo CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, QualType resultType, CallArgList &callArgs) { + unsigned ProgramAS = CGM.getDataLayout().getProgramAddressSpace(); + // If there's a method, use information from that. if (method) { const CGFunctionInfo &signature = CGM.getTypes().arrangeObjCMessageSendSignature(method, callArgs[0].Ty); llvm::PointerType *signatureType = - CGM.getTypes().GetFunctionType(signature)->getPointerTo(); + CGM.getTypes().GetFunctionType(signature)->getPointerTo(ProgramAS); const CGFunctionInfo &signatureForCall = CGM.getTypes().arrangeCall(signature, callArgs); @@ -380,7 +384,7 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, // Derive the signature to call from that. llvm::PointerType *signatureType = - CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo(); + CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo(ProgramAS); return MessageSendInfo(argsInfo, signatureType); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 091eb9da5af4..2284aa1d1eb6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <numeric> +#include <optional> using namespace clang; using namespace CodeGen; @@ -409,7 +410,7 @@ private: /// RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; - llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; + llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; const CodeGen::CGBlockInfo *BlockInfo = nullptr; bool NoInheritance = false; @@ -1057,14 +1058,16 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, return Field; } -CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, - StringRef Separator) - : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), - OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { +CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) + : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); - + llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, + hasRequiresUnifiedSharedMemory(), + CGM.getLangOpts().OpenMPOffloadMandatory); // Initialize Types used in OpenMPIRBuilder from OMPKinds.def OMPBuilder.initialize(); + OMPBuilder.setConfig(Config); + OffloadEntriesInfoManager.setConfig(Config); loadOffloadInfoMetadata(); } @@ -1084,14 +1087,7 @@ void CGOpenMPRuntime::clear() { } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { - SmallString<128> Buffer; - llvm::raw_svector_ostream OS(Buffer); - StringRef Sep = FirstSeparator; - for (StringRef Part : Parts) { - OS << Sep << Part; - Sep = Separator; - } - return std::string(OS.str()); + return OMPBuilder.createPlatformSpecificName(Parts); } static llvm::Function * @@ -1369,10 +1365,11 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - unsigned Flags) { + unsigned Flags, bool EmitLoc) { uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; - if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || + if ((!EmitLoc && + CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) || Loc.isInvalid()) { SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); } else { @@ -1595,9 +1592,9 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { /// Obtain information that uniquely identifies a target entry. This /// consists of the file and device IDs as well as line number associated with /// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { +static llvm::TargetRegionEntryInfo +getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + StringRef ParentName = "") { SourceManager &SM = C.getSourceManager(); // The loc should be always valid and have a file ID (the user cannot use @@ -1617,29 +1614,27 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, << PLoc.getFilename() << EC.message(); } - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); + return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(), + PLoc.getLine()); } Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory))) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); OS << CGM.getMangledName(GlobalDecl(VD)); if (!VD->isExternallyVisible()) { - unsigned DeviceID, FileID, Line; - getTargetEntryUniqueInfo(CGM.getContext(), - VD->getCanonicalDecl()->getBeginLoc(), - DeviceID, FileID, Line); - OS << llvm::format("_%x", FileID); + auto EntryInfo = getTargetEntryUniqueInfo( + CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc()); + OS << llvm::format("_%x", EntryInfo.FileID); } OS << "_decl_tgt_ref_ptr"; } @@ -1647,7 +1642,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); if (!Ptr) { - Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); + Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName); auto *GV = cast<llvm::GlobalVariable>(Ptr); GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); @@ -1667,8 +1662,8 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. std::string Suffix = getName({"cache", ""}); - return getOrCreateInternalVariable( - CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); + return OMPBuilder.getOrCreateInternalVariable( + CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); } Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, @@ -1840,10 +1835,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, if (CGM.getLangOpts().OMPTargetTriples.empty() && !CGM.getLangOpts().OpenMPIsDevice) return false; - Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); @@ -1858,16 +1854,10 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, // Produce the unique prefix to identify the new target regions. We use // the source location of the variable declaration which we know to not // conflict with any target region. - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); + auto EntryInfo = + getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); SmallString<128> Buffer, Out; - { - llvm::raw_svector_ostream OS(Buffer); - OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) - << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; - } + OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { @@ -1883,6 +1873,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( FTy, Twine(Buffer, "_ctor"), FI, Loc, false, llvm::GlobalValue::WeakODRLinkage); + Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); if (CGM.getTriple().isAMDGCN()) Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); @@ -1912,9 +1903,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, // Register the information for the entry associated with the constructor. Out.clear(); + auto CtorEntryInfo = EntryInfo; + CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, - ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); + CtorEntryInfo, Ctor, ID, + llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); } if (VD->getType().isDestructedType() != QualType::DK_none) { llvm::Constant *Dtor; @@ -1929,6 +1922,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( FTy, Twine(Buffer, "_dtor"), FI, Loc, false, llvm::GlobalValue::WeakODRLinkage); + Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); if (CGM.getTriple().isAMDGCN()) Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); @@ -1958,9 +1952,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, } // Register the information for the entry associated with the destructor. Out.clear(); + auto DtorEntryInfo = EntryInfo; + DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, - ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); + DtorEntryInfo, Dtor, ID, + llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); } return CGM.getLangOpts().OpenMPIsDevice; } @@ -1970,8 +1966,8 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, StringRef Name) { std::string Suffix = getName({"artificial", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); - llvm::GlobalVariable *GAddr = - getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); + llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( + VarLVType, Twine(Name).concat(Suffix).str()); if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && CGM.getTarget().isTLSSupported()) { GAddr->setThreadLocal(/*Val=*/true); @@ -1985,8 +1981,9 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, /*isSigned=*/false), - getOrCreateInternalVariable( - CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; + OMPBuilder.getOrCreateInternalVariable( + CGM.VoidPtrPtrTy, + Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; return Address( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitRuntimeCall( @@ -2131,30 +2128,10 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, return ThreadIDTemp; } -llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( - llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { - SmallString<256> Buffer; - llvm::raw_svector_ostream Out(Buffer); - Out << Name; - StringRef RuntimeName = Out.str(); - auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; - if (Elem.second) { - assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && - "OMP internal variable has different type than requested"); - return &*Elem.second; - } - - return Elem.second = new llvm::GlobalVariable( - CGM.getModule(), Ty, /*IsConstant*/ false, - llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), - Elem.first(), /*InsertBefore=*/nullptr, - llvm::GlobalValue::NotThreadLocal, AddressSpace); -} - llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); std::string Name = getName({Prefix, "var"}); - return getOrCreateInternalVariable(KmpCriticalNameTy, Name); + return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); } namespace { @@ -2583,6 +2560,22 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Args); } +void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, + Expr *ME, bool IsFatal) { + llvm::Value *MVL = + ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + // Build call void __kmpc_error(ident_t *loc, int severity, const char + // *message) + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), + llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), + CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_error), + Args); +} + /// Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { @@ -2951,328 +2944,55 @@ enum KmpTaskTFields { }; } // anonymous namespace -bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { - return OffloadEntriesTargetRegion.empty() && - OffloadEntriesDeviceGlobalVar.empty(); -} - -/// Initialize target region entry. -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - unsigned Order) { - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the device " - "code generation."); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = - OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, - OMPTargetRegionEntryTargetRegion); - ++OffloadingEntriesNum; -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - llvm::Constant *Addr, llvm::Constant *ID, - OMPTargetRegionEntryKind Flags) { - // If we are emitting code for a target, the entry is already initialized, - // only has to be registered. - if (CGM.getLangOpts().OpenMPIsDevice) { - // This could happen if the device compilation is invoked standalone. - if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) - return; - auto &Entry = - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; - Entry.setAddress(Addr); - Entry.setID(ID); - Entry.setFlags(Flags); - } else { - if (Flags == - OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && - hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, - /*IgnoreAddressId*/ true)) - return; - assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && - "Target region entry already registered!"); - OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; - ++OffloadingEntriesNum; - } -} - -bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( - unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - bool IgnoreAddressId) const { - auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); - if (PerDevice == OffloadEntriesTargetRegion.end()) - return false; - auto PerFile = PerDevice->second.find(FileID); - if (PerFile == PerDevice->second.end()) - return false; - auto PerParentName = PerFile->second.find(ParentName); - if (PerParentName == PerFile->second.end()) - return false; - auto PerLine = PerParentName->second.find(LineNum); - if (PerLine == PerParentName->second.end()) - return false; - // Fail if this entry is already registered. - if (!IgnoreAddressId && - (PerLine->second.getAddress() || PerLine->second.getID())) - return false; - return true; -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( - const OffloadTargetRegionEntryInfoActTy &Action) { - // Scan all target region entries and perform the provided action. - for (const auto &D : OffloadEntriesTargetRegion) - for (const auto &F : D.second) - for (const auto &P : F.second) - for (const auto &L : P.second) - Action(D.first, F.first, P.first(), L.first, L.second); -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - initializeDeviceGlobalVarEntryInfo(StringRef Name, - OMPTargetGlobalVarEntryKind Flags, - unsigned Order) { - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the device " - "code generation."); - OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); - ++OffloadingEntriesNum; -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, - CharUnits VarSize, - OMPTargetGlobalVarEntryKind Flags, - llvm::GlobalValue::LinkageTypes Linkage) { - if (CGM.getLangOpts().OpenMPIsDevice) { - // This could happen if the device compilation is invoked standalone. - if (!hasDeviceGlobalVarEntryInfo(VarName)) - return; - auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; - if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { - if (Entry.getVarSize().isZero()) { - Entry.setVarSize(VarSize); - Entry.setLinkage(Linkage); - } - return; - } - Entry.setVarSize(VarSize); - Entry.setLinkage(Linkage); - Entry.setAddress(Addr); - } else { - if (hasDeviceGlobalVarEntryInfo(VarName)) { - auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; - assert(Entry.isValid() && Entry.getFlags() == Flags && - "Entry not initialized!"); - if (Entry.getVarSize().isZero()) { - Entry.setVarSize(VarSize); - Entry.setLinkage(Linkage); - } - return; - } - OffloadEntriesDeviceGlobalVar.try_emplace( - VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); - ++OffloadingEntriesNum; - } -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - actOnDeviceGlobalVarEntriesInfo( - const OffloadDeviceGlobalVarEntryInfoActTy &Action) { - // Scan all target region entries and perform the provided action. - for (const auto &E : OffloadEntriesDeviceGlobalVar) - Action(E.getKey(), E.getValue()); -} - -void CGOpenMPRuntime::createOffloadEntry( - llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) { - OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); -} - void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { - // Emit the offloading entries and metadata so that the device codegen side - // can easily figure out what to emit. The produced metadata looks like - // this: - // - // !omp_offload.info = !{!1, ...} - // - // Right now we only generate metadata for function that contain target - // regions. - // If we are in simd mode or there are no entries, we don't need to do // anything. if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &C = M.getContext(); - SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, - SourceLocation, StringRef>, - 16> - OrderedEntries(OffloadEntriesInfoManager.size()); - llvm::SmallVector<StringRef, 16> ParentFunctions( - OffloadEntriesInfoManager.size()); - - // Auxiliary methods to create metadata values and strings. - auto &&GetMDInt = [this](unsigned V) { - return llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(CGM.Int32Ty, V)); - }; - - auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; - - // Create the offloading info metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - - // Create function that emits metadata for each target region entry; - auto &&TargetRegionMetadataEmitter = - [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, - &GetMDString]( - unsigned DeviceID, unsigned FileID, StringRef ParentName, - unsigned Line, - const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { - // Generate metadata for target regions. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (0). - // - Entry 1 -> Device ID of the file where the entry was identified. - // - Entry 2 -> File ID of the file where the entry was identified. - // - Entry 3 -> Mangled name of the function where the entry was - // identified. - // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), - GetMDInt(FileID), GetMDString(ParentName), - GetMDInt(Line), GetMDInt(E.getOrder())}; - - SourceLocation Loc; - for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), - E = CGM.getContext().getSourceManager().fileinfo_end(); - I != E; ++I) { - if (I->getFirst()->getUniqueID().getDevice() == DeviceID && - I->getFirst()->getUniqueID().getFile() == FileID) { - Loc = CGM.getContext().getSourceManager().translateFileLineCol( - I->getFirst(), Line, 1); - break; - } - } - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); - ParentFunctions[E.getOrder()] = ParentName; - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( - TargetRegionMetadataEmitter); - - // Create function that emits metadata for each device global variable entry; - auto &&DeviceGlobalVarMetadataEmitter = - [&C, &OrderedEntries, &GetMDInt, &GetMDString, - MD](StringRef MangledName, - const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar - &E) { - // Generate metadata for global variables. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (1). - // - Entry 1 -> Mangled name of the variable. - // - Entry 2 -> Declare target kind. - // - Entry 3 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = { - GetMDInt(E.getKind()), GetMDString(MangledName), - GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; - - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = - std::make_tuple(&E, SourceLocation(), MangledName); - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( - DeviceGlobalVarMetadataEmitter); - - for (const auto &E : OrderedEntries) { - assert(std::get<0>(E) && "All ordered entries must exist!"); - if (const auto *CE = - dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( - std::get<0>(E))) { - if (!CE->getID() || !CE->getAddress()) { - // Do not blame the entry if the parent funtion is not emitted. - StringRef FnName = ParentFunctions[CE->getOrder()]; - if (!CGM.GetGlobalValue(FnName)) - continue; - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for target region in %0 is incorrect: either the " - "address or the ID is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; - continue; - } - createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, - CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: - OffloadEntryInfoDeviceGlobalVar>( - std::get<0>(E))) { - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = - static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( - CE->getFlags()); - switch (Flags) { - case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { - if (CGM.getLangOpts().OpenMPIsDevice && - CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, "Offloading entry for declare target " - "variable %0 is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); - continue; - } - // The vaiable has no definition - no need to add the entry. - if (CE->getVarSize().isZero()) - continue; - break; - } - case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: - assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || - (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && - "Declaret target link address is set."); - if (CGM.getLangOpts().OpenMPIsDevice) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); - continue; + llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = + [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, + const llvm::TargetRegionEntryInfo &EntryInfo) -> void { + SourceLocation Loc; + if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && + I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), EntryInfo.Line, 1); + break; } - break; } - - // Hidden or internal symbols on the device are not externally visible. We - // should not attempt to register them by creating an offloading entry. - if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - continue; - - createOffloadEntry(CE->getAddress(), CE->getAddress(), - CE->getVarSize().getQuantity(), Flags, - CE->getLinkage()); - } else { - llvm_unreachable("Unsupported entry kind."); } - } + switch (Kind) { + case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for target region in " + "%0 is incorrect: either the " + "address or the ID is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + } break; + } + }; + + OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager, + ErrorReportFn); } /// Loads all the offload entries information from the host IR @@ -3306,42 +3026,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; } - llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); - if (!MD) - return; - - for (llvm::MDNode *MN : MD->operands()) { - auto &&GetMDInt = [MN](unsigned Idx) { - auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); - return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); - }; - - auto &&GetMDString = [MN](unsigned Idx) { - auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); - return V->getString(); - }; - - switch (GetMDInt(0)) { - default: - llvm_unreachable("Unexpected metadata!"); - break; - case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: - OffloadingEntryInfoTargetRegion: - OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), - /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), - /*Order=*/GetMDInt(5)); - break; - case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: - OffloadingEntryInfoDeviceGlobalVar: - OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( - /*MangledName=*/GetMDString(1), - static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( - /*Flags=*/GetMDInt(2)), - /*Order=*/GetMDInt(3)); - break; - } - } + OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { @@ -4501,39 +4186,26 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, return Result; } -namespace { -/// Dependence kind for RTL. -enum RTLDependenceKindTy { - DepIn = 0x01, - DepInOut = 0x3, - DepMutexInOutSet = 0x4, - DepInOutSet = 0x8, - DepOmpAllMem = 0x80, -}; -/// Fields ids in kmp_depend_info record. -enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; -} // namespace - /// Translates internal dependency kind into the runtime kind. static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { RTLDependenceKindTy DepKind; switch (K) { case OMPC_DEPEND_in: - DepKind = DepIn; + DepKind = RTLDependenceKindTy::DepIn; break; // Out and InOut dependencies must use the same code. case OMPC_DEPEND_out: case OMPC_DEPEND_inout: - DepKind = DepInOut; + DepKind = RTLDependenceKindTy::DepInOut; break; case OMPC_DEPEND_mutexinoutset: - DepKind = DepMutexInOutSet; + DepKind = RTLDependenceKindTy::DepMutexInOutSet; break; case OMPC_DEPEND_inoutset: - DepKind = DepInOutSet; + DepKind = RTLDependenceKindTy::DepInOutSet; break; case OMPC_DEPEND_outallmemory: - DepKind = DepOmpAllMem; + DepKind = RTLDependenceKindTy::DepOmpAllMem; break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: @@ -4581,7 +4253,9 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( - NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + NumDepsBase, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); return std::make_pair(NumDeps, Base); } @@ -4627,18 +4301,24 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, } // deps[i].base_addr = &<Dependencies[i].second>; LValue BaseAddrLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + Base, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); // deps[i].len = sizeof(<Dependencies[i].second>); LValue LenLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Len)); + Base, *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::Len))); CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = <Dependencies[i].first>; RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); LValue FlagsLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), - FlagsLVal); + Base, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::Flags))); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), + FlagsLVal); if (unsigned *P = Pos.dyn_cast<unsigned *>()) { ++(*P); } else { @@ -4655,7 +4335,7 @@ SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data) { assert(Data.DepKind == OMPC_DEPEND_depobj && - "Expected depobj dependecy kind."); + "Expected depobj dependency kind."); SmallVector<llvm::Value *, 4> Sizes; SmallVector<LValue, 4> SizeLVals; ASTContext &C = CGF.getContext(); @@ -4695,7 +4375,7 @@ void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Data, Address DependenciesArray) { assert(Data.DepKind == OMPC_DEPEND_depobj && - "Expected depobj dependecy kind."); + "Expected depobj dependency kind."); llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); { OMPIteratorGeneratorScope IteratorScope( @@ -4751,7 +4431,8 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); llvm::Value *NumOfRegularWithIterators = llvm::ConstantInt::get(CGF.IntPtrTy, 0); - // Calculate number of depobj dependecies and regular deps with the iterators. + // Calculate number of depobj dependencies and regular deps with the + // iterators. for (const OMPTaskDataTy::DependData &D : Dependencies) { if (D.DepKind == OMPC_DEPEND_depobj) { SmallVector<llvm::Value *, 4> Sizes = @@ -4825,7 +4506,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], DependenciesArray); } - // Copy regular dependecies with iterators. + // Copy regular dependencies with iterators. LValue PosLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); @@ -4913,7 +4594,9 @@ Address CGOpenMPRuntime::emitDepobjDependClause( LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); // deps[i].base_addr = NumDependencies; LValue BaseAddrLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + Base, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); llvm::PointerUnion<unsigned *, LValue *> Pos; unsigned Idx = 1; @@ -4993,9 +4676,11 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, // deps[i].flags = NewDepKind; RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); LValue FlagsLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), - FlagsLVal); + Base, *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::Flags))); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), + FlagsLVal); // Shift the address forward by one element. Address ElementNext = @@ -5073,7 +4758,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, Region->emitUntiedSwitch(CGF); }; - llvm::Value *DepWaitTaskArgs[6]; + llvm::Value *DepWaitTaskArgs[7]; if (!Data.Dependences.empty()) { DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; @@ -5081,6 +4766,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepWaitTaskArgs[3] = DependenciesArray.getPointer(); DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); } auto &M = CGM.getModule(); auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, @@ -5092,9 +4779,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. if (!Data.Dependences.empty()) - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), - DepWaitTaskArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -5595,7 +5282,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, }; RegionCodeGenTy RCG(CodeGen); CommonActionTy Action( - nullptr, llvm::None, + nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait : OMPRTL___kmpc_end_reduce), @@ -5717,7 +5404,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ThreadId, // i32 <gtid> Lock // kmp_critical_name *&<lock> }; - CommonActionTy Action(nullptr, llvm::None, + CommonActionTy Action(nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_end_reduce), EndArgs); @@ -6142,24 +5829,26 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *NumOfElements; std::tie(NumOfElements, DependenciesArray) = emitDependClause(CGF, Data.Dependences, Loc); - llvm::Value *DepWaitTaskArgs[6]; if (!Data.Dependences.empty()) { + llvm::Value *DepWaitTaskArgs[7]; DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; DepWaitTaskArgs[2] = NumOfElements; DepWaitTaskArgs[3] = DependenciesArray.getPointer(); DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); CodeGenFunction::RunCleanupsScope LocalScope(CGF); - // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, + // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 - // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info - // is specified. - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), - DepWaitTaskArgs); + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, + // kmp_int32 has_no_wait); if dependence info is specified. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); } else { @@ -6333,7 +6022,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - assert(!ParentName.empty() && "Invalid target region parent name!"); + assert(!ParentName.empty() && "Invalid target entry parent name!"); HasEmittedTargetRegion = true; SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { @@ -6405,99 +6094,32 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - // Create a unique name for the entry function using the source location - // information of the current target region. The name will be something like: - // - // __omp_offloading_DD_FFFF_PP_lBB - // - // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the - // mangled name of the function that encloses the target region and BB is the - // line number of the target region. - const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || - !CGM.getLangOpts().OpenMPOffloadMandatory; - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, - Line); - SmallString<64> EntryFnName; - { - llvm::raw_svector_ostream OS(EntryFnName); - OS << "__omp_offloading" << llvm::format("_%x", DeviceID) - << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; - } - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + auto EntryInfo = + getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName); CodeGenFunction CGF(CGM, true); - CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - - if (BuildOutlinedFn) - OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); - - // If this target outline function is not an offload entry, we don't need to - // register it. - if (!IsOffloadEntry) - return; - - // The target region ID is used by the runtime library to identify the current - // target region, so it only has to be unique and not necessarily point to - // anything. It could be the pointer to the outlined function that implements - // the target region, but we aren't using that so that the compiler doesn't - // need to keep that, and could therefore inline the host function if proven - // worthwhile during optimization. In the other hand, if emitting code for the - // device, the ID has to be the function address so that it can retrieved from - // the offloading entry and launched by the runtime library. We also mark the - // outlined function to have external linkage in case we are emitting code for - // the device, because these functions will be entry points to the device. - - if (CGM.getLangOpts().OpenMPIsDevice) { - OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); - OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage); - OutlinedFn->setDSOLocal(false); - if (CGM.getTriple().isAMDGCN()) - OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - } else { - std::string Name = getName({EntryFnName, "region_id"}); - OutlinedFnID = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Name); - } - - // If we do not allow host fallback we still need a named address to use. - llvm::Constant *TargetRegionEntryAddr = OutlinedFn; - if (!BuildOutlinedFn) { - assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && - "Named kernel already exists?"); - TargetRegionEntryAddr = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); - } + llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = + [&CGF, &D, &CodeGen](StringRef EntryFnName) { + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - // Register the information for the entry associated with this target region. - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, - OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + }; - // Add NumTeams and ThreadLimit attributes to the outlined GPU function + // Get NumTeams and ThreadLimit attributes int32_t DefaultValTeams = -1; - getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); - if (DefaultValTeams > 0 && OutlinedFn) { - OutlinedFn->addFnAttr("omp_target_num_teams", - std::to_string(DefaultValTeams)); - } int32_t DefaultValThreads = -1; + getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); - if (DefaultValThreads > 0 && OutlinedFn) { - OutlinedFn->addFnAttr("omp_target_thread_limit", - std::to_string(DefaultValThreads)); - } - if (BuildOutlinedFn) + OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo, + GenerateOutlinedFunction, DefaultValTeams, + DefaultValThreads, IsOffloadEntry, + OutlinedFn, OutlinedFnID); + + if (OutlinedFn != nullptr) CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); } @@ -6808,10 +6430,8 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, } if (isOpenMPSimdDirective(Dir->getDirectiveKind())) return CGF.Builder.getInt32(1); - return DefaultThreadLimitVal; } - return DefaultThreadLimitVal ? DefaultThreadLimitVal - : CGF.Builder.getInt32(0); + return DefaultThreadLimitVal; } const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( @@ -6954,12 +6574,14 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( return NumThreads; const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); + // TODO: The standard is not clear how to resolve two thread limit clauses, + // let's pick the teams one if it's present, otherwise the target one. + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { - if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { + if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { + ThreadLimitClause = TLC; CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const auto *ThreadLimitClause = - Dir->getSingleClause<OMPThreadLimitClause>(); CodeGenFunction::LexicalScope Scope( CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); if (const auto *PreInit = @@ -6974,11 +6596,15 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( } } } - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } + } + if (ThreadLimitClause) { + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + } + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { CS = Dir->getInnermostCapturedStmt(); @@ -7029,7 +6655,10 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } - return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); + if (llvm::Value *NumThreads = + getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) + return NumThreads; + return Bld.getInt32(0); case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -7164,67 +6793,13 @@ LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); // code for that information. class MappableExprsHandler { public: - /// Values for bit flags used to specify the mapping type for - /// offloading. - enum OpenMPOffloadMappingFlags : uint64_t { - /// No flags - OMP_MAP_NONE = 0x0, - /// Allocate memory on the device and move data from host to device. - OMP_MAP_TO = 0x01, - /// Allocate memory on the device and move data from device to host. - OMP_MAP_FROM = 0x02, - /// Always perform the requested mapping action on the element, even - /// if it was already mapped before. - OMP_MAP_ALWAYS = 0x04, - /// Delete the element from the device environment, ignoring the - /// current reference count associated with the element. - OMP_MAP_DELETE = 0x08, - /// The element being mapped is a pointer-pointee pair; both the - /// pointer and the pointee should be mapped. - OMP_MAP_PTR_AND_OBJ = 0x10, - /// This flags signals that the base address of an entry should be - /// passed to the target kernel as an argument. - OMP_MAP_TARGET_PARAM = 0x20, - /// Signal that the runtime library has to return the device pointer - /// in the current position for the data being mapped. Used when we have the - /// use_device_ptr or use_device_addr clause. - OMP_MAP_RETURN_PARAM = 0x40, - /// This flag signals that the reference being passed is a pointer to - /// private data. - OMP_MAP_PRIVATE = 0x80, - /// Pass the element to the device by value. - OMP_MAP_LITERAL = 0x100, - /// Implicit map - OMP_MAP_IMPLICIT = 0x200, - /// Close is a hint to the runtime to allocate memory close to - /// the target device. - OMP_MAP_CLOSE = 0x400, - /// 0x800 is reserved for compatibility with XLC. - /// Produce a runtime error if the data is not already allocated. - OMP_MAP_PRESENT = 0x1000, - // Increment and decrement a separate reference counter so that the data - // cannot be unmapped within the associated region. Thus, this flag is - // intended to be used on 'target' and 'target data' directives because they - // are inherently structured. It is not intended to be used on 'target - // enter data' and 'target exit data' directives because they are inherently - // dynamic. - // This is an OpenMP extension for the sake of OpenACC support. - OMP_MAP_OMPX_HOLD = 0x2000, - /// Signal that the runtime library should use args as an array of - /// descriptor_dim pointers and use args_size as dims. Used when we have - /// non-contiguous list items in target update directive - OMP_MAP_NON_CONTIG = 0x100000000000, - /// The 16 MSBs of the flags indicate whether the entry is member of some - /// struct/class. - OMP_MAP_MEMBER_OF = 0xffff000000000000, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), - }; - /// Get the offset of the OMP_MAP_MEMBER_OF field. static unsigned getFlagMemberOffset() { unsigned Offset = 0; - for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); - Remain = Remain >> 1) + for (uint64_t Remain = + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); + !(Remain & 1); Remain = Remain >> 1) Offset++; return Offset; } @@ -7388,6 +6963,13 @@ private: SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> DevPointersMap; + /// Map between device addr declarations and their expression components. + /// The key value for declarations in 'this' is null. + llvm::DenseMap< + const ValueDecl *, + SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> + HasDevAddrsMap; + /// Map between lambda declarations and their map type. llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; @@ -7475,7 +7057,8 @@ private: ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { OpenMPOffloadMappingFlags Bits = - IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; + IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT + : OpenMPOffloadMappingFlags::OMP_MAP_NONE; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: @@ -7485,35 +7068,36 @@ private: // type modifiers. break; case OMPC_MAP_to: - Bits |= OMP_MAP_TO; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; break; case OMPC_MAP_from: - Bits |= OMP_MAP_FROM; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; break; case OMPC_MAP_tofrom: - Bits |= OMP_MAP_TO | OMP_MAP_FROM; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM; break; case OMPC_MAP_delete: - Bits |= OMP_MAP_DELETE; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; break; case OMPC_MAP_unknown: llvm_unreachable("Unexpected map type!"); } if (AddPtrFlag) - Bits |= OMP_MAP_PTR_AND_OBJ; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; if (AddIsTargetParamFlag) - Bits |= OMP_MAP_TARGET_PARAM; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) - Bits |= OMP_MAP_ALWAYS; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) - Bits |= OMP_MAP_CLOSE; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) - Bits |= OMP_MAP_PRESENT; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) - Bits |= OMP_MAP_OMPX_HOLD; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; if (IsNonContiguous) - Bits |= OMP_MAP_NON_CONTIG; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; return Bits; } @@ -7570,7 +7154,7 @@ private: const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> - OverlappedElements = llvm::None) const { + OverlappedElements = std::nullopt) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -7780,10 +7364,11 @@ private: BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { - if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { RequiresReference = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -7998,7 +7583,7 @@ private: std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); // Emit data for non-overlapped data. OpenMPOffloadMappingFlags Flags = - OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, IsNonContiguous); @@ -8084,13 +7669,16 @@ private: // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. if (IsPointer || (IsMemberReference && Next != CE)) - Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | - OMP_MAP_DELETE | OMP_MAP_CLOSE); + Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM | + OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | + OpenMPOffloadMappingFlags::OMP_MAP_DELETE | + OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); if (ShouldBeMemberOf) { // Set placeholder value MEMBER_OF=FFFF to indicate that the flag // should be later updated with the correct value of MEMBER_OF. - Flags |= OMP_MAP_MEMBER_OF; + Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; // From now on, all subsequent PTR_AND_OBJ entries should not be // marked as MEMBER_OF. ShouldBeMemberOf = false; @@ -8337,7 +7925,7 @@ private: /// Return the adjusted map modifiers if the declaration a capture refers to /// appears in a first-private clause. This is expected to be used only with /// directives that start with 'target'. - MappableExprsHandler::OpenMPOffloadMappingFlags + OpenMPOffloadMappingFlags getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { assert(Cap.capturesVariable() && "Expected capture by reference only!"); @@ -8346,22 +7934,22 @@ private: // declaration is known as first-private in this handler. if (FirstPrivateDecls.count(Cap.getCapturedVar())) { if (Cap.getCapturedVar()->getType()->isAnyPointerType()) - return MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; - return MappableExprsHandler::OMP_MAP_PRIVATE | - MappableExprsHandler::OMP_MAP_TO; + return OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | + OpenMPOffloadMappingFlags::OMP_MAP_TO; } auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); if (I != LambdasMap.end()) // for map(to: lambda): using user specified map type. return getMapTypeBits( I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), - /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), + /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, /*isNonContiguous=*/false); - return MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM; + return OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM; } static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { @@ -8375,13 +7963,16 @@ private: // If the entry is PTR_AND_OBJ but has not been marked with the special // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be // marked as MEMBER_OF. - if ((Flags & OMP_MAP_PTR_AND_OBJ) && - ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) + if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) && + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF)) return; // Reset the placeholder value to prepare the flag for the assignment of the // proper MEMBER_OF value. - Flags &= ~OMP_MAP_MEMBER_OF; + Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; Flags |= MemberOfFlag; } @@ -8500,7 +8091,7 @@ private: for (const auto L : C->component_lists()) { const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), - C->getMapTypeModifiers(), llvm::None, + C->getMapTypeModifiers(), std::nullopt, /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), E); ++EI; @@ -8516,7 +8107,7 @@ private: Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, C->getMotionModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), *EI); ++EI; @@ -8532,56 +8123,101 @@ private: Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, - C->getMotionModifiers(), /*ReturnDevicePointer=*/false, - C->isImplicit(), std::get<2>(L), *EI); + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, + std::nullopt, C->getMotionModifiers(), + /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), + *EI); ++EI; } } - // Look at the use_device_ptr clause information and mark the existing map - // entries as such. If there is no map information for an entry in the - // use_device_ptr list, we create one with map type 'alloc' and zero size - // section. It is the user fault if that was not mapped before. If there is - // no map information and the pointer is a struct member, then we defer the - // emission of that entry until the whole struct has been processed. + // Look at the use_device_ptr and use_device_addr clauses information and + // mark the existing map entries as such. If there is no map information for + // an entry in the use_device_ptr and use_device_addr list, we create one + // with map type 'alloc' and zero size section. It is the user fault if that + // was not mapped before. If there is no map information and the pointer is + // a struct member, then we defer the emission of that entry until the whole + // struct has been processed. llvm::MapVector<CanonicalDeclPtr<const Decl>, SmallVector<DeferredDevicePtrEntryTy, 4>> DeferredInfo; - MapCombinedInfoTy UseDevicePtrCombinedInfo; + MapCombinedInfoTy UseDeviceDataCombinedInfo; - for (const auto *Cl : Clauses) { - const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); - if (!C) - continue; - for (const auto L : C->component_lists()) { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components = - std::get<1>(L); - assert(!Components.empty() && - "Not expecting empty list of components!"); - const ValueDecl *VD = Components.back().getAssociatedDeclaration(); - VD = cast<ValueDecl>(VD->getCanonicalDecl()); - const Expr *IE = Components.back().getAssociatedExpression(); - // If the first component is a member expression, we have to look into - // 'this', which maps to null in the map of map information. Otherwise - // look directly for the information. - auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); + auto &&UseDeviceDataCombinedInfoGen = + [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, + CodeGenFunction &CGF) { + UseDeviceDataCombinedInfo.Exprs.push_back(VD); + UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD); + UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); + UseDeviceDataCombinedInfo.Sizes.push_back( + llvm::Constant::getNullValue(CGF.Int64Ty)); + UseDeviceDataCombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); + UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); + }; + + auto &&MapInfoGen = + [&DeferredInfo, &UseDeviceDataCombinedInfoGen, + &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, + OMPClauseMappableExprCommon::MappableExprComponentListRef + Components, + bool IsImplicit, bool IsDevAddr) { + // We didn't find any match in our map information - generate a zero + // size array section - if the pointer is a struct member we defer + // this action until the whole struct has been processed. + if (isa<MemberExpr>(IE)) { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been + // processed. Nonetheless, generateInfoForComponentList must be + // called to take the pointer into account for the calculation of + // the range of the partial struct. + InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, + std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, + nullptr, nullptr, IsDevAddr); + DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); + } else { + llvm::Value *Ptr; + if (IsDevAddr) { + if (IE->isGLValue()) + Ptr = CGF.EmitLValue(IE).getPointer(CGF); + else + Ptr = CGF.EmitScalarExpr(IE); + } else { + Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); + } + UseDeviceDataCombinedInfoGen(VD, Ptr, CGF); + } + }; - // We potentially have map information for this declaration already. - // Look for the first set of components that refer to it. - if (It != Info.end()) { - bool Found = false; - for (auto &Data : It->second) { - auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { - return MI.Components.back().getAssociatedDeclaration() == VD; - }); - // If we found a map entry, signal that the pointer has to be - // returned and move on to the next declaration. Exclude cases where - // the base pointer is mapped as array subscript, array section or - // array shaping. The base address is passed as a pointer to base in - // this case and cannot be used as a base for use_device_ptr list - // item. - if (CI != Data.end()) { + auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, + const Expr *IE, bool IsDevAddr) -> bool { + // We potentially have map information for this declaration already. + // Look for the first set of components that refer to it. If found, + // return true. + // If the first component is a member expression, we have to look into + // 'this', which maps to null in the map of map information. Otherwise + // look directly for the information. + auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); + if (It != Info.end()) { + bool Found = false; + for (auto &Data : It->second) { + auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { + return MI.Components.back().getAssociatedDeclaration() == VD; + }); + // If we found a map entry, signal that the pointer has to be + // returned and move on to the next declaration. Exclude cases where + // the base pointer is mapped as array subscript, array section or + // array shaping. The base address is passed as a pointer to base in + // this case and cannot be used as a base for use_device_ptr list + // item. + if (CI != Data.end()) { + if (IsDevAddr) { + CI->ReturnDevicePointer = true; + Found = true; + break; + } else { auto PrevCI = std::next(CI->Components.rbegin()); const auto *VarD = dyn_cast<VarDecl>(VD); if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || @@ -8596,51 +8232,45 @@ private: } } } - if (Found) - continue; - } - - // We didn't find any match in our map information - generate a zero - // size array section - if the pointer is a struct member we defer this - // action until the whole struct has been processed. - if (isa<MemberExpr>(IE)) { - // Insert the pointer into Info to be processed by - // generateInfoForComponentList. Because it is a member pointer - // without a pointee, no entry will be generated for it, therefore - // we need to generate one after the whole struct has been processed. - // Nonetheless, generateInfoForComponentList must be called to take - // the pointer into account for the calculation of the range of the - // partial struct. - InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, - llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), - nullptr); - DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); - } else { - llvm::Value *Ptr = - CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); - UseDevicePtrCombinedInfo.Exprs.push_back(VD); - UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); - UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); - UseDevicePtrCombinedInfo.Sizes.push_back( - llvm::Constant::getNullValue(CGF.Int64Ty)); - UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); - UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); } + return Found; } - } + return false; + }; - // Look at the use_device_addr clause information and mark the existing map + // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the - // use_device_addr list, we create one with map type 'alloc' and zero size + // use_device_ptr list, we create one with map type 'alloc' and zero size // section. It is the user fault if that was not mapped before. If there is // no map information and the pointer is a struct member, then we defer the // emission of that entry until the whole struct has been processed. + for (const auto *Cl : Clauses) { + const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); + if (!C) + continue; + for (const auto L : C->component_lists()) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components = + std::get<1>(L); + assert(!Components.empty() && + "Not expecting empty list of components!"); + const ValueDecl *VD = Components.back().getAssociatedDeclaration(); + VD = cast<ValueDecl>(VD->getCanonicalDecl()); + const Expr *IE = Components.back().getAssociatedExpression(); + if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) + continue; + MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), + /*IsDevAddr=*/false); + } + } + llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; for (const auto *Cl : Clauses) { const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); if (!C) continue; for (const auto L : C->component_lists()) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components = + std::get<1>(L); assert(!std::get<1>(L).empty() && "Not expecting empty list of components!"); const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); @@ -8648,60 +8278,10 @@ private: continue; VD = cast<ValueDecl>(VD->getCanonicalDecl()); const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); - // If the first component is a member expression, we have to look into - // 'this', which maps to null in the map of map information. Otherwise - // look directly for the information. - auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); - - // We potentially have map information for this declaration already. - // Look for the first set of components that refer to it. - if (It != Info.end()) { - bool Found = false; - for (auto &Data : It->second) { - auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { - return MI.Components.back().getAssociatedDeclaration() == VD; - }); - // If we found a map entry, signal that the pointer has to be - // returned and move on to the next declaration. - if (CI != Data.end()) { - CI->ReturnDevicePointer = true; - Found = true; - break; - } - } - if (Found) - continue; - } - - // We didn't find any match in our map information - generate a zero - // size array section - if the pointer is a struct member we defer this - // action until the whole struct has been processed. - if (isa<MemberExpr>(IE)) { - // Insert the pointer into Info to be processed by - // generateInfoForComponentList. Because it is a member pointer - // without a pointee, no entry will be generated for it, therefore - // we need to generate one after the whole struct has been processed. - // Nonetheless, generateInfoForComponentList must be called to take - // the pointer into account for the calculation of the range of the - // partial struct. - InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, - llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), - nullptr, nullptr, /*ForDeviceAddr=*/true); - DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); - } else { - llvm::Value *Ptr; - if (IE->isGLValue()) - Ptr = CGF.EmitLValue(IE).getPointer(CGF); - else - Ptr = CGF.EmitScalarExpr(IE); - CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.emplace_back(Ptr, VD); - CombinedInfo.Pointers.push_back(Ptr); - CombinedInfo.Sizes.push_back( - llvm::Constant::getNullValue(CGF.Int64Ty)); - CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); - CombinedInfo.Mappers.push_back(nullptr); - } + if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) + continue; + MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), + /*IsDevAddr=*/true); } } @@ -8738,7 +8318,8 @@ private: CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( RelevantVD); - CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; + CurInfo.Types[CurrentBasePointersIdx] |= + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; } } } @@ -8759,7 +8340,9 @@ private: // Entry is RETURN_PARAM. Also, set the placeholder value // MEMBER_OF=FFFF so that the entry is later updated with the // correct value of MEMBER_OF. - CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); } else { BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), @@ -8767,8 +8350,10 @@ private: // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the // placeholder value MEMBER_OF=FFFF so that the entry is later // updated with the correct value of MEMBER_OF. - CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | - OMP_MAP_MEMBER_OF); + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); } CurInfo.Exprs.push_back(L.VD); CurInfo.BasePointers.emplace_back(BasePtr, L.VD); @@ -8790,7 +8375,7 @@ private: CombinedInfo.append(CurInfo); } // Append data for use_device_ptr clauses. - CombinedInfo.append(UseDevicePtrCombinedInfo); + CombinedInfo.append(UseDeviceDataCombinedInfo); } public: @@ -8818,6 +8403,10 @@ public: for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); + // Extract device addr clause information. + for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) + for (auto L : C->component_lists()) + HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); // Extract map information. for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { if (C->getMapType() != OMPC_MAP_to) @@ -8848,7 +8437,8 @@ public: const ValueDecl *VD = nullptr, bool NotTargetParams = true) const { if (CurTypes.size() == 1 && - ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && + ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && !PartialStruct.IsArraySection) return; Address LBAddr = PartialStruct.LowestElem.second; @@ -8862,31 +8452,53 @@ public: CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); // Pointer is the address of the lowest element llvm::Value *LB = LBAddr.getPointer(); - CombinedInfo.Pointers.push_back(LB); + const CXXMethodDecl *MD = + CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; + const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; + bool HasBaseClass = RD ? RD->getNumBases() > 0 : false; // There should not be a mapper for a combined entry. + if (HasBaseClass) { + // OpenMP 5.2 148:21: + // If the target construct is within a class non-static member function, + // and a variable is an accessible data member of the object for which the + // non-static data member function is invoked, the variable is treated as + // if the this[:1] expression had appeared in a map clause with a map-type + // of tofrom. + // Emit this[:1] + CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); + QualType Ty = MD->getThisType()->getPointeeType(); + llvm::Value *Size = + CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, + /*isSigned=*/true); + CombinedInfo.Sizes.push_back(Size); + } else { + CombinedInfo.Pointers.push_back(LB); + // Size is (addr of {highest+1} element) - (addr of lowest element) + llvm::Value *HB = HBAddr.getPointer(); + llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( + HBAddr.getElementType(), HB, /*Idx0=*/1); + llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); + llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); + llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); + llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, + /*isSigned=*/false); + CombinedInfo.Sizes.push_back(Size); + } CombinedInfo.Mappers.push_back(nullptr); - // Size is (addr of {highest+1} element) - (addr of lowest element) - llvm::Value *HB = HBAddr.getPointer(); - llvm::Value *HAddr = - CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); - llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); - llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); - llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); - llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, - /*isSigned=*/false); - CombinedInfo.Sizes.push_back(Size); // Map type is always TARGET_PARAM, if generate info for captures. - CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE - : OMP_MAP_TARGET_PARAM); + CombinedInfo.Types.push_back( + NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE + : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); // If any element has the present modifier, then make sure the runtime // doesn't attempt to allocate the struct. if (CurTypes.end() != llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { - return Type & OMP_MAP_PRESENT; + return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); })) - CombinedInfo.Types.back() |= OMP_MAP_PRESENT; + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; // Remove TARGET_PARAM flag from the first element - (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; + (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; // If any element has the ompx_hold modifier, then make sure the runtime // uses the hold reference count for the struct as a whole so that it won't // be unmapped by an extra dynamic reference count decrement. Add it to all @@ -8895,11 +8507,12 @@ public: // individual elements. if (CurTypes.end() != llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { - return Type & OMP_MAP_OMPX_HOLD; + return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); })) { - CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; for (auto &M : CurTypes) - M |= OMP_MAP_OMPX_HOLD; + M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; } // All other current entries will be MEMBER_OF the combined entry @@ -8947,7 +8560,7 @@ public: Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), CGF.getContext().getDeclAlign(VD)); LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); - llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; FieldDecl *ThisCapture = nullptr; RD->getCaptureFields(Captures, ThisCapture); if (ThisCapture) { @@ -8962,14 +8575,17 @@ public: CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); CombinedInfo.Mappers.push_back(nullptr); } for (const LambdaCapture &LC : RD->captures()) { if (!LC.capturesVariable()) continue; - const VarDecl *VD = LC.getCapturedVar(); + const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) continue; auto It = Captures.find(VD); @@ -8995,8 +8611,11 @@ public: CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } - CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); CombinedInfo.Mappers.push_back(nullptr); } } @@ -9008,8 +8627,10 @@ public: MapFlagsArrayTy &Types) const { for (unsigned I = 0, E = Types.size(); I < E; ++I) { // Set correct member_of idx for all implicit lambda captures. - if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) + if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) continue; llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); assert(BasePtr && "Unable to find base lambda address."); @@ -9051,7 +8672,7 @@ public: // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just // pass its value. - if (DevPointersMap.count(VD)) { + if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.emplace_back(Arg, VD); CombinedInfo.Pointers.push_back(Arg); @@ -9059,8 +8680,10 @@ public: CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back( - (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | - OMP_MAP_TARGET_PARAM); + (Cap->capturesVariable() + ? OpenMPOffloadMappingFlags::OMP_MAP_TO + : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); CombinedInfo.Mappers.push_back(nullptr); return; } @@ -9070,6 +8693,21 @@ public: OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, const ValueDecl *, const Expr *>; SmallVector<MapData, 4> DeclComponentLists; + // For member fields list in is_device_ptr, store it in + // DeclComponentLists for generating components info. + static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; + auto It = DevPointersMap.find(VD); + if (It != DevPointersMap.end()) + for (const auto &MCL : It->second) + DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, + /*IsImpicit = */ true, nullptr, + nullptr); + auto I = HasDevAddrsMap.find(VD); + if (I != HasDevAddrsMap.end()) + for (const auto &MCL : I->second) + DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, + /*IsImpicit = */ true, nullptr, + nullptr); assert(CurDir.is<const OMPExecutableDirective *>() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); @@ -9123,7 +8761,7 @@ public: std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = L; ++Count; - for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { + for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = L1; @@ -9243,7 +8881,7 @@ public: ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedComponents = Pair.getSecond(); generateInfoForComponentList( - MapType, MapModifiers, llvm::None, Components, CombinedInfo, + MapType, MapModifiers, std::nullopt, Components, CombinedInfo, PartialStruct, IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); IsFirstComponentList = false; @@ -9260,7 +8898,7 @@ public: L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, llvm::None, + generateInfoForComponentList(MapType, MapModifiers, std::nullopt, Components, CombinedInfo, PartialStruct, IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false, VD, VarRef); @@ -9284,7 +8922,8 @@ public: CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), CGF.Int64Ty, /*isSigned=*/true)); // Default map type. - CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); + CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM); } else if (CI.capturesVariableByCopy()) { const VarDecl *VD = CI.getCapturedVar(); CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); @@ -9293,13 +8932,14 @@ public: if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. - CombinedInfo.Types.push_back(OMP_MAP_LITERAL); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). - CombinedInfo.Types.push_back(OMP_MAP_NONE); + CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); } auto I = FirstPrivateDecls.find(VD); @@ -9331,11 +8971,12 @@ public: IsImplicit = I->getSecond(); } // Every default map produces a single argument which is a target parameter. - CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; + CombinedInfo.Types.back() |= + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; // Add flag stating this is an implicit map. if (IsImplicit) - CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; // No user-defined mapper for default mapping. CombinedInfo.Mappers.push_back(nullptr); @@ -9404,7 +9045,7 @@ static void emitNonContiguousDescriptor( DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, 0, I); + Info.RTArgs.PointersArray, 0, I); Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); ++L; @@ -9482,13 +9123,13 @@ static void emitOffloadingArrays( Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); - Info.BasePointersArray = + Info.RTArgs.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); - Info.PointersArray = + Info.RTArgs.PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); Address MappersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); - Info.MappersArray = MappersArray.getPointer(); + Info.RTArgs.MappersArray = MappersArray.getPointer(); // If we don't have any VLA types or other types that require runtime // evaluation, we can use a constant array for the map sizes, otherwise we @@ -9501,8 +9142,10 @@ static void emitOffloadingArrays( for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { - if (IsNonContiguous && (CombinedInfo.Types[I] & - MappableExprsHandler::OMP_MAP_NON_CONTIG)) + if (IsNonContiguous && + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + CombinedInfo.Types[I] & + OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG)) ConstSizes[I] = llvm::ConstantInt::get( CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); else @@ -9517,7 +9160,7 @@ static void emitOffloadingArrays( QualType SizeArrayType = Ctx.getConstantArrayType( Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); - Info.SizesArray = + Info.RTArgs.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { auto *SizesArrayInit = llvm::ConstantArray::get( @@ -9541,26 +9184,29 @@ static void emitOffloadingArrays( CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( /*DestWidth=*/64, /*Signed=*/false))), CGF.getTypeSize(SizeArrayType)); - Info.SizesArray = Buffer.getPointer(); + Info.RTArgs.SizesArray = Buffer.getPointer(); } else { - Info.SizesArray = SizesArrayGbl; + Info.RTArgs.SizesArray = SizesArrayGbl; } } // The map types are always constant so we don't need to generate code to // fill arrays. Instead, we create an array constant. - SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); - llvm::copy(CombinedInfo.Types, Mapping.begin()); + SmallVector<uint64_t, 4> Mapping; + for (auto mapFlag : CombinedInfo.Types) + Mapping.push_back( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + mapFlag)); std::string MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); auto *MapTypesArrayGbl = OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); - Info.MapTypesArray = MapTypesArrayGbl; + Info.RTArgs.MapTypesArray = MapTypesArrayGbl; // The information types are only built if there is debug information // requested. if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { - Info.MapNamesArray = llvm::Constant::getNullValue( + Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue( llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); } else { auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { @@ -9572,7 +9218,7 @@ static void emitOffloadingArrays( CGM.getOpenMPRuntime().getName({"offload_mapnames"}); auto *MapNamesArrayGbl = OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); - Info.MapNamesArray = MapNamesArrayGbl; + Info.RTArgs.MapNamesArray = MapNamesArrayGbl; } // If there's a present map type modifier, it must not be applied to the end @@ -9580,15 +9226,19 @@ static void emitOffloadingArrays( if (Info.separateBeginEndCalls()) { bool EndMapTypesDiffer = false; for (uint64_t &Type : Mapping) { - if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { - Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; + if (Type & + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) { + Type &= + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); EndMapTypesDiffer = true; } } if (EndMapTypesDiffer) { MapTypesArrayGbl = OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); - Info.MapTypesArrayEnd = MapTypesArrayGbl; + Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl; } } @@ -9596,7 +9246,7 @@ static void emitOffloadingArrays( llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.BasePointersArray, 0, I); + Info.RTArgs.BasePointersArray, 0, I); BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address BPAddr(BP, BPVal->getType(), @@ -9611,7 +9261,7 @@ static void emitOffloadingArrays( llvm::Value *PVal = CombinedInfo.Pointers[I]; llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, 0, I); + Info.RTArgs.PointersArray, 0, I); P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); @@ -9620,7 +9270,7 @@ static void emitOffloadingArrays( if (RuntimeSizes.test(I)) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - Info.SizesArray, + Info.RTArgs.SizesArray, /*Idx0=*/0, /*Idx1=*/I); Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); @@ -9650,76 +9300,6 @@ static void emitOffloadingArrays( emitNonContiguousDescriptor(CGF, CombinedInfo, Info); } -namespace { -/// Additional arguments for emitOffloadingArraysArgument function. -struct ArgumentsOptions { - bool ForEndCall = false; - ArgumentsOptions() = default; - ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} -}; -} // namespace - -/// Emit the arguments to be passed to the runtime library based on the -/// arrays of base pointers, pointers, sizes, map types, and mappers. If -/// ForEndCall, emit map types to be passed for the end of the region instead of -/// the beginning. -static void emitOffloadingArraysArgument( - CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, - llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, - llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, - llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, - const ArgumentsOptions &Options = ArgumentsOptions()) { - assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && - "expected region end call to runtime only when end call is separate"); - CodeGenModule &CGM = CGF.CGM; - if (Info.NumberOfPtrs) { - BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.BasePointersArray, - /*Idx0=*/0, /*Idx1=*/0); - PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, - /*Idx0=*/0, - /*Idx1=*/0); - SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, - /*Idx0=*/0, /*Idx1=*/0); - MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd - : Info.MapTypesArray, - /*Idx0=*/0, - /*Idx1=*/0); - - // Only emit the mapper information arrays if debug information is - // requested. - if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) - MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - else - MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.MapNamesArray, - /*Idx0=*/0, - /*Idx1=*/0); - // If there is no user-defined mapper, set the mapper array to nullptr to - // avoid an unnecessary data privatization - if (!Info.HasMapper) - MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - else - MappersArrayArg = - CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); - } else { - BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); - MapTypesArrayArg = - llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); - MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - } -} - /// Check for inner distribute directive. static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { @@ -9999,7 +9579,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); // Extract the MEMBER_OF field from the map type. - llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); + llvm::Value *OriMapType = MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Info.Types[I])); llvm::Value *MemberMapType = MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); @@ -10017,8 +9599,10 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // tofrom | alloc | to | from | tofrom | release | delete llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); llvm::BasicBlock *AllocElseBB = MapperCGF.createBasicBlock("omp.type.alloc.else"); @@ -10032,30 +9616,40 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, MapperCGF.EmitBlock(AllocBB); llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( MemberMapType, - MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapperCGF.Builder.CreateBr(EndBB); MapperCGF.EmitBlock(AllocElseBB); llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( LeftToFrom, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO))); MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); // In case of to, clear OMP_MAP_FROM. MapperCGF.EmitBlock(ToBB); llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( MemberMapType, - MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapperCGF.Builder.CreateBr(EndBB); MapperCGF.EmitBlock(ToElseBB); llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( LeftToFrom, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); // In case of from, clear OMP_MAP_TO. MapperCGF.EmitBlock(FromBB); llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( MemberMapType, - MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO))); // In case of tofrom, do nothing. MapperCGF.EmitBlock(EndBB); LastBB = EndBB; @@ -10130,7 +9724,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); llvm::Value *DeleteCond; llvm::Value *Cond; if (IsInit) { @@ -10139,7 +9735,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( // IsPtrAndObj? llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); @@ -10162,11 +9760,15 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( // memory allocation/deletion purpose only. llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapTypeArg = MapperCGF.Builder.CreateOr( MapTypeArg, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); // Call the runtime API __tgt_push_mapper_component to fill up the runtime // data structure. @@ -10301,9 +9903,29 @@ void CGOpenMPRuntime::emitTargetCall( llvm::Value *NumIterations = emitTargetNumIterationsCall(CGF, D, SizeEmitter); + llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); + if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { + CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); + llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( + DynMemClause->getSize(), /*IgnoreResultAssign=*/true); + DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, + /*isSigned=*/false); + } + + llvm::Value *ZeroArray = + llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3)); + + bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); + llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait); + + llvm::Value *NumTeams3D = + CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0}); + llvm::Value *NumThreads3D = + CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0}); + // Arguments for the target kernel. SmallVector<llvm::Value *> KernelArgs{ - CGF.Builder.getInt32(/* Version */ 1), + CGF.Builder.getInt32(/* Version */ 2), PointerNum, InputInfo.BasePointersArray.getPointer(), InputInfo.PointersArray.getPointer(), @@ -10311,18 +9933,13 @@ void CGOpenMPRuntime::emitTargetCall( MapTypesArray, MapNamesArray, InputInfo.MappersArray.getPointer(), - NumIterations}; - - // Arguments passed to the 'nowait' variant. - SmallVector<llvm::Value *> NoWaitKernelArgs{ - CGF.Builder.getInt32(0), - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), - CGF.Builder.getInt32(0), - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + NumIterations, + Flags, + NumTeams3D, + NumThreads3D, + DynCGroupMem, }; - bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); - // The target region is an outlined function launched by the runtime // via calls to __tgt_target_kernel(). // @@ -10336,13 +9953,9 @@ void CGOpenMPRuntime::emitTargetCall( // __tgt_target_teams() launches a GPU kernel with the requested number // of teams and threads so no additional calls to the runtime are required. // Check the error code and execute the host version if required. - CGF.Builder.restoreIP( - HasNoWait ? OMPBuilder.emitTargetKernel( - CGF.Builder, Return, RTLoc, DeviceID, NumTeams, - NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs) - : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc, - DeviceID, NumTeams, NumThreads, - OutlinedFnID, KernelArgs)); + CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel( + CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads, + OutlinedFnID, KernelArgs)); llvm::BasicBlock *OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); @@ -10392,9 +10005,10 @@ void CGOpenMPRuntime::emitTargetCall( CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); // Copy to the device as an argument. No need to retrieve it. - CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM | - MappableExprsHandler::OMP_MAP_IMPLICIT); + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); CurInfo.Mappers.push_back(nullptr); } else { // If we have any information in the map clause, we use it, otherwise we @@ -10441,25 +10055,26 @@ void CGOpenMPRuntime::emitTargetCall( // weren't referenced within the construct. MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); - TargetDataInfo Info; + CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - emitOffloadingArraysArgument( - CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, - {/*ForEndCall=*/false}); + bool EmitDebug = + CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = - Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.PointersArray = - Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, + CGM.getPointerAlign()); InputInfo.SizesArray = - Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); InputInfo.MappersArray = - Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - MapTypesArray = Info.MapTypesArray; - MapNamesArray = Info.MapNamesArray; + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; if (RequiresOuterTask) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else @@ -10506,16 +10121,12 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (RequiresDeviceCodegen) { const auto &E = *cast<OMPExecutableDirective>(S); - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, - FileID, Line); + auto EntryInfo = + getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName); // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. - if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, - ParentName, Line)) + if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo)) return; switch (E.getDirectiveKind()) { @@ -10645,7 +10256,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, } static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = OMPDeclareTargetDeclAttr::getDeviceType(VD); if (!DevTy) return false; @@ -10710,11 +10321,12 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { } // Do not to emit variable if it is not marked as declare target. - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( cast<VarDecl>(GD.getDecl())); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); return true; @@ -10729,12 +10341,12 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, return; // If we have host/nohost variables, they do not need to be registered. - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = OMPDeclareTargetDeclAttr::getDeviceType(VD); if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any) return; - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) { if (CGM.getLangOpts().OpenMPIsDevice) { @@ -10746,20 +10358,22 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, return; } // Register declare target variables. - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; + llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags; StringRef VarName; - CharUnits VarSize; + int64_t VarSize; llvm::GlobalValue::LinkageTypes Linkage; - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !HasRequiresUnifiedSharedMemory) { - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; VarName = CGM.getMangledName(VD); if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { - VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); - assert(!VarSize.isZero() && "Expected non-zero size of the variable"); + VarSize = + CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity(); + assert(VarSize != 0 && "Expected non-zero size of the variable"); } else { - VarSize = CharUnits::Zero(); + VarSize = 0; } Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); // Temp solution to prevent optimizations of the internal variables. @@ -10771,7 +10385,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, std::string RefName = getName({VarName, "ref"}); if (!CGM.GetGlobalValue(RefName)) { llvm::Constant *AddrRef = - getOrCreateInternalVariable(Addr->getType(), RefName); + OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName); auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); GVAddrRef->setConstant(/*Val=*/true); GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); @@ -10781,13 +10395,14 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, } } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."); if (*Res == OMPDeclareTargetDeclAttr::MT_Link) - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; else - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; if (CGM.getLangOpts().OpenMPIsDevice) { VarName = Addr->getName(); @@ -10796,7 +10411,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, VarName = getAddrOfDeclareTargetVar(VD).getName(); Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); } - VarSize = CGM.getPointerSize(); + VarSize = CGM.getPointerSize().getQuantity(); Linkage = llvm::GlobalValue::WeakAnyLinkage; } @@ -10814,16 +10429,18 @@ bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { void CGOpenMPRuntime::emitDeferredTargetDecls() const { for (const VarDecl *VD : DeferredGlobalVariables) { - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !HasRequiresUnifiedSharedMemory) { CGM.EmitGlobal(VD); } else { assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."); (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -10841,6 +10458,7 @@ void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { HasRequiresUnifiedSharedMemory = true; + OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); } else if (const auto *AC = dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { switch (AC->getAtomicDefaultMemOrderKind()) { @@ -11025,7 +10643,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, void CGOpenMPRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, - const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { + const Expr *Device, const RegionCodeGenTy &CodeGen, + CGOpenMPRuntime::TargetDataInfo &Info) { if (!CGF.HaveInsertPoint()) return; @@ -11049,15 +10668,11 @@ void CGOpenMPRuntime::emitTargetDataCalls( emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, /*IsNonContiguous=*/true); - llvm::Value *BasePointersArrayArg = nullptr; - llvm::Value *PointersArrayArg = nullptr; - llvm::Value *SizesArrayArg = nullptr; - llvm::Value *MapTypesArrayArg = nullptr; - llvm::Value *MapNamesArrayArg = nullptr; - llvm::Value *MappersArrayArg = nullptr; - emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, - MapNamesArrayArg, MappersArrayArg, Info); + llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; + bool EmitDebug = + CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info, + EmitDebug); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -11077,12 +10692,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = {RTLoc, DeviceID, PointerNum, - BasePointersArrayArg, - PointersArrayArg, - SizesArrayArg, - MapTypesArrayArg, - MapNamesArrayArg, - MappersArrayArg}; + RTArgs.BasePointersArray, + RTArgs.PointersArray, + RTArgs.SizesArray, + RTArgs.MapTypesArray, + RTArgs.MapNamesArray, + RTArgs.MappersArray}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), @@ -11099,16 +10714,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); - llvm::Value *BasePointersArrayArg = nullptr; - llvm::Value *PointersArrayArg = nullptr; - llvm::Value *SizesArrayArg = nullptr; - llvm::Value *MapTypesArrayArg = nullptr; - llvm::Value *MapNamesArrayArg = nullptr; - llvm::Value *MappersArrayArg = nullptr; - emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, - MapNamesArrayArg, MappersArrayArg, Info, - {/*ForEndCall=*/true}); + llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; + bool EmitDebug = + CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info, + EmitDebug, + /*ForEndCall=*/true); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -11128,12 +10739,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = {RTLoc, DeviceID, PointerNum, - BasePointersArrayArg, - PointersArrayArg, - SizesArrayArg, - MapTypesArrayArg, - MapNamesArrayArg, - MappersArrayArg}; + RTArgs.BasePointersArray, + RTArgs.PointersArray, + RTArgs.SizesArray, + RTArgs.MapTypesArray, + RTArgs.MapNamesArray, + RTArgs.MappersArray}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), @@ -11322,27 +10933,28 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( MappableExprsHandler MEHandler(D, CGF); MEHandler.generateAllInfo(CombinedInfo); - TargetDataInfo Info; + CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, /*IsNonContiguous=*/true); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - emitOffloadingArraysArgument( - CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, - {/*ForEndCall=*/false}); + bool EmitDebug = + CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = - Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.PointersArray = - Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, + CGM.getPointerAlign()); InputInfo.SizesArray = - Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); InputInfo.MappersArray = - Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - MapTypesArray = Info.MapTypesArray; - MapNamesArray = Info.MapNamesArray; + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; if (RequiresOuterTask) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else @@ -11937,7 +11549,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, if (CGM.getTarget().hasFeature("sve")) emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, MangledName, 's', 128, Fn, ExprLoc); - if (CGM.getTarget().hasFeature("neon")) + else if (CGM.getTarget().hasFeature("neon")) emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, MangledName, 'n', 128, Fn, ExprLoc); } @@ -12039,7 +11651,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); + llvm::ArrayRef(FiniArgs)); } void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, @@ -12130,7 +11742,7 @@ static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, /// Return the alignment from an allocate directive if present. static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { - llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); + std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); if (!AllocateAlignment) return nullptr; @@ -12570,15 +12182,15 @@ void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, // Last updated loop counter for the lastprivate conditional var. // int<xx> last_iv = 0; llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); - llvm::Constant *LastIV = - getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); + llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( + LLIVTy, getName({UniqueDeclName, "iv"})); cast<llvm::GlobalVariable>(LastIV)->setAlignment( IVLVal.getAlignment().getAsAlign()); LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); // Last value of the lastprivate conditional. // decltype(priv_a) last_a; - llvm::GlobalVariable *Last = getOrCreateInternalVariable( + llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); Last->setAlignment(LVal.getAlignment().getAsAlign()); LValue LastLVal = CGF.MakeAddrLValue( @@ -13037,7 +12649,8 @@ void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, void CGOpenMPSIMDRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, - const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { + const Expr *Device, const RegionCodeGenTy &CodeGen, + CGOpenMPRuntime::TargetDataInfo &Info) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index b95aef68335e..e7c1a098c768 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -121,6 +121,7 @@ struct OMPTaskDataTy final { bool Nogroup = false; bool IsReductionWithTaskMod = false; bool IsWorksharingReduction = false; + bool HasNowaitClause = false; }; /// Class intended to support codegen of all kind of the reduction clauses. @@ -306,21 +307,10 @@ public: protected: CodeGenModule &CGM; - StringRef FirstSeparator, Separator; /// An OpenMP-IR-Builder instance. llvm::OpenMPIRBuilder OMPBuilder; - /// Constructor allowing to redefine the name separator for the variables. - explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, - StringRef Separator); - - /// Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr, size \a Size, and flags \a Flags. - virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage); - /// Helper to emit outlined function for 'target' directive. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. @@ -339,9 +329,10 @@ protected: /// Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. + /// \param EmitLoc emit source location with debug-info is off. /// llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - unsigned Flags = 0); + unsigned Flags = 0, bool EmitLoc = false); /// Emit the number of teams for a target directive. Inspect the num_teams /// clause associated with a teams construct combined or closely nested @@ -387,7 +378,7 @@ protected: /// Emits \p Callee function call with arguments \p Args with location \p Loc. void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, - ArrayRef<llvm::Value *> Args = llvm::None) const; + ArrayRef<llvm::Value *> Args = std::nullopt) const; /// Emits address of the word in a memory where current thread id is /// stored. @@ -419,8 +410,7 @@ protected: /// llvm::Value *getCriticalRegionLock(StringRef CriticalName); -private: - +protected: /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap<SourceLocation, llvm::Value *> OpenMPDebugLocMapTy; OpenMPDebugLocMapTy OpenMPDebugLocMap; @@ -520,214 +510,7 @@ private: QualType KmpDimTy; /// Entity that registers the offloading constants that were emitted so /// far. - class OffloadEntriesInfoManagerTy { - CodeGenModule &CGM; - - /// Number of entries registered so far. - unsigned OffloadingEntriesNum = 0; - - public: - /// Base class of the entries info. - class OffloadEntryInfo { - public: - /// Kind of a given entry. - enum OffloadingEntryInfoKinds : unsigned { - /// Entry is a target region. - OffloadingEntryInfoTargetRegion = 0, - /// Entry is a declare target variable. - OffloadingEntryInfoDeviceGlobalVar = 1, - /// Invalid entry info. - OffloadingEntryInfoInvalid = ~0u - }; - - protected: - OffloadEntryInfo() = delete; - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, - uint32_t Flags) - : Flags(Flags), Order(Order), Kind(Kind) {} - ~OffloadEntryInfo() = default; - - public: - bool isValid() const { return Order != ~0u; } - unsigned getOrder() const { return Order; } - OffloadingEntryInfoKinds getKind() const { return Kind; } - uint32_t getFlags() const { return Flags; } - void setFlags(uint32_t NewFlags) { Flags = NewFlags; } - llvm::Constant *getAddress() const { - return cast_or_null<llvm::Constant>(Addr); - } - void setAddress(llvm::Constant *V) { - assert(!Addr.pointsToAliveValue() && "Address has been set before!"); - Addr = V; - } - static bool classof(const OffloadEntryInfo *Info) { return true; } - - private: - /// Address of the entity that has to be mapped for offloading. - llvm::WeakTrackingVH Addr; - - /// Flags associated with the device global. - uint32_t Flags = 0u; - - /// Order this entry was emitted. - unsigned Order = ~0u; - - OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; - }; - - /// Return true if a there are no entries defined. - bool empty() const; - /// Return number of entries defined so far. - unsigned size() const { return OffloadingEntriesNum; } - OffloadEntriesInfoManagerTy(CodeGenModule &CGM) : CGM(CGM) {} - - // - // Target region entries related. - // - - /// Kind of the target registry entry. - enum OMPTargetRegionEntryKind : uint32_t { - /// Mark the entry as target region. - OMPTargetRegionEntryTargetRegion = 0x0, - /// Mark the entry as a global constructor. - OMPTargetRegionEntryCtor = 0x02, - /// Mark the entry as a global destructor. - OMPTargetRegionEntryDtor = 0x04, - }; - - /// Target region entries info. - class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { - /// Address that can be used as the ID of the entry. - llvm::Constant *ID = nullptr; - - public: - OffloadEntryInfoTargetRegion() - : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} - explicit OffloadEntryInfoTargetRegion(unsigned Order, - llvm::Constant *Addr, - llvm::Constant *ID, - OMPTargetRegionEntryKind Flags) - : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), - ID(ID) { - setAddress(Addr); - } - - llvm::Constant *getID() const { return ID; } - void setID(llvm::Constant *V) { - assert(!ID && "ID has been set before!"); - ID = V; - } - static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OffloadingEntryInfoTargetRegion; - } - }; - - /// Initialize target region entry. - void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - unsigned Order); - /// Register target region entry. - void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - llvm::Constant *Addr, llvm::Constant *ID, - OMPTargetRegionEntryKind Flags); - /// Return true if a target region entry with the provided information - /// exists. - bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - bool IgnoreAddressId = false) const; - /// brief Applies action \a Action on all registered entries. - typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned, - const OffloadEntryInfoTargetRegion &)> - OffloadTargetRegionEntryInfoActTy; - void actOnTargetRegionEntriesInfo( - const OffloadTargetRegionEntryInfoActTy &Action); - - // - // Device global variable entries related. - // - - /// Kind of the global variable entry.. - enum OMPTargetGlobalVarEntryKind : uint32_t { - /// Mark the entry as a to declare target. - OMPTargetGlobalVarEntryTo = 0x0, - /// Mark the entry as a to declare target link. - OMPTargetGlobalVarEntryLink = 0x1, - }; - - /// Device global variable entries info. - class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { - /// Type of the global variable. - CharUnits VarSize; - llvm::GlobalValue::LinkageTypes Linkage; - - public: - OffloadEntryInfoDeviceGlobalVar() - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} - explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, - OMPTargetGlobalVarEntryKind Flags) - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} - explicit OffloadEntryInfoDeviceGlobalVar( - unsigned Order, llvm::Constant *Addr, CharUnits VarSize, - OMPTargetGlobalVarEntryKind Flags, - llvm::GlobalValue::LinkageTypes Linkage) - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), - VarSize(VarSize), Linkage(Linkage) { - setAddress(Addr); - } - - CharUnits getVarSize() const { return VarSize; } - void setVarSize(CharUnits Size) { VarSize = Size; } - llvm::GlobalValue::LinkageTypes getLinkage() const { return Linkage; } - void setLinkage(llvm::GlobalValue::LinkageTypes LT) { Linkage = LT; } - static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; - } - }; - - /// Initialize device global variable entry. - void initializeDeviceGlobalVarEntryInfo(StringRef Name, - OMPTargetGlobalVarEntryKind Flags, - unsigned Order); - - /// Register device global variable entry. - void - registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, - CharUnits VarSize, - OMPTargetGlobalVarEntryKind Flags, - llvm::GlobalValue::LinkageTypes Linkage); - /// Checks if the variable with the given name has been registered already. - bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { - return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; - } - /// Applies action \a Action on all registered entries. - typedef llvm::function_ref<void(StringRef, - const OffloadEntryInfoDeviceGlobalVar &)> - OffloadDeviceGlobalVarEntryInfoActTy; - void actOnDeviceGlobalVarEntriesInfo( - const OffloadDeviceGlobalVarEntryInfoActTy &Action); - - private: - // Storage for target region entries kind. The storage is to be indexed by - // file ID, device ID, parent function name and line number. - typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion> - OffloadEntriesTargetRegionPerLine; - typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine> - OffloadEntriesTargetRegionPerParentName; - typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerParentName> - OffloadEntriesTargetRegionPerFile; - typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerFile> - OffloadEntriesTargetRegionPerDevice; - typedef OffloadEntriesTargetRegionPerDevice OffloadEntriesTargetRegionTy; - OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; - /// Storage for device global variable entries kind. The storage is to be - /// indexed by mangled name. - typedef llvm::StringMap<OffloadEntryInfoDeviceGlobalVar> - OffloadEntriesDeviceGlobalVarTy; - OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; - }; - OffloadEntriesInfoManagerTy OffloadEntriesInfoManager; + llvm::OffloadEntriesInfoManager OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; /// List of the emitted declarations. @@ -773,7 +556,7 @@ private: /// metadata. void loadOffloadInfoMetadata(); - /// Start scanning from statement \a S and and emit all target regions + /// Start scanning from statement \a S and emit all target regions /// found along the way. /// \param S Starting statement. /// \param ParentName Name of the function declaration that is being scanned. @@ -814,16 +597,6 @@ private: /// \return Cache variable for the specified threadprivate. llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD); - /// Gets (if variable with the given name already exist) or creates - /// internal global variable with the specified Name. The created variable has - /// linkage CommonLinkage by default and is initialized by null value. - /// \param Ty Type of the global variable. If it is exist already the type - /// must be the same. - /// \param Name Name of the variable. - llvm::GlobalVariable *getOrCreateInternalVariable(llvm::Type *Ty, - const llvm::Twine &Name, - unsigned AddressSpace = 0); - /// Set of threadprivate variables with the generated initializer. llvm::StringSet<> ThreadPrivateWithDefinition; @@ -915,11 +688,13 @@ private: Address DependenciesArray); public: - explicit CGOpenMPRuntime(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM, ".", ".") {} + explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Returns true if the current target is a GPU. + virtual bool isTargetCodegen() const { return false; } + /// Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -1048,6 +823,11 @@ public: /// Emits code for a taskyield directive. virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc); + /// Emit __kmpc_error call for error directive + /// extern void __kmpc_error(ident_t *loc, int severity, const char *message); + virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, + bool IsFatal); + /// Emit a taskgroup region. /// \param TaskgroupOpGen Generator for the statement associated with the /// given taskgroup region. @@ -1654,65 +1434,16 @@ public: /// Struct that keeps all the relevant information that should be kept /// throughout a 'target data' region. - class TargetDataInfo { - /// Set to true if device pointer information have to be obtained. - bool RequiresDevicePointerInfo = false; - /// Set to true if Clang emits separate runtime calls for the beginning and - /// end of the region. These calls might have separate map type arrays. - bool SeparateBeginEndCalls = false; - + class TargetDataInfo : public llvm::OpenMPIRBuilder::TargetDataInfo { public: - /// The array of base pointer passed to the runtime library. - llvm::Value *BasePointersArray = nullptr; - /// The array of section pointers passed to the runtime library. - llvm::Value *PointersArray = nullptr; - /// The array of sizes passed to the runtime library. - llvm::Value *SizesArray = nullptr; - /// The array of map types passed to the runtime library for the beginning - /// of the region or for the entire region if there are no separate map - /// types for the region end. - llvm::Value *MapTypesArray = nullptr; - /// The array of map types passed to the runtime library for the end of the - /// region, or nullptr if there are no separate map types for the region - /// end. - llvm::Value *MapTypesArrayEnd = nullptr; - /// The array of user-defined mappers passed to the runtime library. - llvm::Value *MappersArray = nullptr; - /// The array of original declaration names of mapped pointers sent to the - /// runtime library for debugging - llvm::Value *MapNamesArray = nullptr; - /// Indicate whether any user-defined mapper exists. - bool HasMapper = false; - /// The total number of pointers passed to the runtime library. - unsigned NumberOfPtrs = 0u; + explicit TargetDataInfo() : llvm::OpenMPIRBuilder::TargetDataInfo() {} + explicit TargetDataInfo(bool RequiresDevicePointerInfo, + bool SeparateBeginEndCalls) + : llvm::OpenMPIRBuilder::TargetDataInfo(RequiresDevicePointerInfo, + SeparateBeginEndCalls) {} /// Map between the a declaration of a capture and the corresponding base /// pointer address where the runtime returns the device pointers. llvm::DenseMap<const ValueDecl *, Address> CaptureDeviceAddrMap; - - explicit TargetDataInfo() {} - explicit TargetDataInfo(bool RequiresDevicePointerInfo, - bool SeparateBeginEndCalls) - : RequiresDevicePointerInfo(RequiresDevicePointerInfo), - SeparateBeginEndCalls(SeparateBeginEndCalls) {} - /// Clear information about the data arrays. - void clearArrayInfo() { - BasePointersArray = nullptr; - PointersArray = nullptr; - SizesArray = nullptr; - MapTypesArray = nullptr; - MapTypesArrayEnd = nullptr; - MapNamesArray = nullptr; - MappersArray = nullptr; - HasMapper = false; - NumberOfPtrs = 0u; - } - /// Return true if the current target data information has valid arrays. - bool isValid() { - return BasePointersArray && PointersArray && SizesArray && - MapTypesArray && (!HasMapper || MappersArray) && NumberOfPtrs; - } - bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } - bool separateBeginEndCalls() { return SeparateBeginEndCalls; } }; /// Emit the target data mapping code associated with \a D. @@ -1727,7 +1458,7 @@ public: const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, - TargetDataInfo &Info); + CGOpenMPRuntime::TargetDataInfo &Info); /// Emit the data mapping/movement code associated with the directive /// \a D that should be of the form 'target [{enter|exit} data | update]'. @@ -1792,7 +1523,7 @@ public: virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, - ArrayRef<llvm::Value *> Args = llvm::None) const; + ArrayRef<llvm::Value *> Args = std::nullopt) const; /// Emits OpenMP-specific function prolog. /// Required for device constructs. @@ -2487,7 +2218,7 @@ public: void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, - TargetDataInfo &Info) override; + CGOpenMPRuntime::TargetDataInfo &Info) override; /// Emit the data mapping/movement code associated with the directive /// \a D that should be of the form 'target [{enter|exit} data | update]'. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 6dea846f486f..e8c5f04db49f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Cuda.h" @@ -73,30 +74,15 @@ private: CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode = CGOpenMPRuntimeGPU::EM_Unknown; CGOpenMPRuntimeGPU::ExecutionMode &ExecMode; - bool SavedRuntimeMode = false; - bool *RuntimeMode = nullptr; public: - /// Constructor for Non-SPMD mode. - ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode) - : ExecMode(ExecMode) { - SavedExecMode = ExecMode; - ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD; - } - /// Constructor for SPMD mode. ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode, - bool &RuntimeMode, bool FullRuntimeMode) - : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { + CGOpenMPRuntimeGPU::ExecutionMode EntryMode) + : ExecMode(ExecMode) { SavedExecMode = ExecMode; - SavedRuntimeMode = RuntimeMode; - ExecMode = CGOpenMPRuntimeGPU::EM_SPMD; - RuntimeMode = FullRuntimeMode; - } - ~ExecutionRuntimeModesRAII() { - ExecMode = SavedExecMode; - if (RuntimeMode) - *RuntimeMode = SavedRuntimeMode; + ExecMode = EntryMode; } + ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; } }; /// GPU Configuration: This information can be derived from cuda registers, @@ -109,9 +95,6 @@ enum MachineConfiguration : unsigned { /// Global memory alignment for performance. GlobalMemoryAlignment = 128, - - /// Maximal size of the shared memory buffer. - SharedMemorySize = 128, }; static const ValueDecl *getPrivateItem(const Expr *RefExpr) { @@ -444,9 +427,8 @@ public: markAsEscaped(VD); if (isa<OMPCapturedExprDecl>(VD)) VisitValueDecl(VD); - else if (const auto *VarD = dyn_cast<VarDecl>(VD)) - if (VarD->isInitCapture()) - VisitValueDecl(VD); + else if (VD->isInitCapture()) + VisitValueDecl(VD); } void VisitUnaryOperator(const UnaryOperator *E) { if (!E) @@ -746,274 +728,13 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, "Unknown programming model for OpenMP directive on NVPTX target."); } -/// Check if the directive is loops based and has schedule clause at all or has -/// static scheduling. -static bool hasStaticScheduling(const OMPExecutableDirective &D) { - assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) && - isOpenMPLoopDirective(D.getDirectiveKind()) && - "Expected loop-based directive."); - return !D.hasClausesOfKind<OMPOrderedClause>() && - (!D.hasClausesOfKind<OMPScheduleClause>() || - llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(), - [](const OMPScheduleClause *C) { - return C->getScheduleKind() == OMPC_SCHEDULE_static; - })); -} - -/// Check for inner (nested) lightweight runtime construct, if any -static bool hasNestedLightweightDirective(ASTContext &Ctx, - const OMPExecutableDirective &D) { - assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive."); - const auto *CS = D.getInnermostCapturedStmt(); - const auto *Body = - CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - - if (const auto *NestedDir = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); - switch (D.getDirectiveKind()) { - case OMPD_target: - if (isOpenMPParallelDirective(DKind) && - isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && - hasStaticScheduling(*NestedDir)) - return true; - if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd) - return true; - if (DKind == OMPD_parallel) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - } - } else if (DKind == OMPD_teams) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPParallelDirective(DKind) && - isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - if (DKind == OMPD_parallel) { - Body = NND->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - } - } - } - } - return false; - case OMPD_target_teams: - if (isOpenMPParallelDirective(DKind) && - isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && - hasStaticScheduling(*NestedDir)) - return true; - if (DKind == OMPD_distribute_simd || DKind == OMPD_simd) - return true; - if (DKind == OMPD_parallel) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - } - } - return false; - case OMPD_target_parallel: - if (DKind == OMPD_simd) - return true; - return isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); - case OMPD_target_teams_distribute: - case OMPD_target_simd: - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_teams_distribute_simd: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_unknown: - default: - llvm_unreachable("Unexpected directive."); - } - } - - return false; -} - -/// Checks if the construct supports lightweight runtime. It must be SPMD -/// construct + inner loop-based construct with static scheduling. -static bool supportsLightweightRuntime(ASTContext &Ctx, - const OMPExecutableDirective &D) { - if (!supportsSPMDExecutionMode(Ctx, D)) - return false; - OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); - switch (DirectiveKind) { - case OMPD_target: - case OMPD_target_teams: - case OMPD_target_parallel: - return hasNestedLightweightDirective(Ctx, D); - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: - // (Last|First)-privates must be shared in parallel region. - return hasStaticScheduling(D); - case OMPD_target_simd: - case OMPD_target_teams_distribute_simd: - return true; - case OMPD_target_teams_distribute: - return false; - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_unknown: - default: - break; - } - llvm_unreachable( - "Unknown programming model for OpenMP directive on NVPTX target."); -} - void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD); EntryFunctionState EST; WrapperFunctionsMap.clear(); @@ -1048,8 +769,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime())); - IsInTargetMasterThreadRegion = IsSPMD; + Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD)); if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } @@ -1061,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime()); + OMPBuilder.createTargetDeinit(Bld, IsSPMD); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -1070,10 +790,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionRuntimeModesRAII ModeRAII( - CurrentExecutionMode, RequiresFullRuntime, - CGM.getLangOpts().OpenMPCUDAForceFullRuntime || - !supportsLightweightRuntime(CGM.getContext(), D)); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD); EntryFunctionState EST; // Emit target region as a standalone region. @@ -1116,36 +833,10 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC), Twine(Name, "_exec_mode")); + GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility); CGM.addCompilerUsedGlobal(GVMode); } -void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, - uint64_t Size, int32_t, - llvm::GlobalValue::LinkageTypes) { - // TODO: Add support for global variables on the device after declare target - // support. - llvm::Function *Fn = dyn_cast<llvm::Function>(Addr); - if (!Fn) - return; - - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - - // Get "nvvm.annotations" metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations. - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); - - // Add a function attribute for the kernel. - Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel")); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -1166,39 +857,14 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Enum for accesseing the reserved_2 field of the ident_t struct. -enum ModeFlagsTy : unsigned { - /// Bit set to 1 when in SPMD mode. - KMP_IDENT_SPMD_MODE = 0x01, - /// Bit set to 1 when a simplified runtime is used. - KMP_IDENT_SIMPLE_RT_MODE = 0x02, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE) -}; - -/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime. -static const ModeFlagsTy UndefinedMode = - (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; -} // anonymous namespace - -unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const { - switch (getExecutionMode()) { - case EM_SPMD: - if (requiresFullRuntime()) - return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); - return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; - case EM_NonSPMD: - assert(requiresFullRuntime() && "Expected full runtime."); - return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); - case EM_Unknown: - return UndefinedMode; - } - llvm_unreachable("Unknown flags are requested."); -} - CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM, "_", "$") { + : CGOpenMPRuntime(CGM) { + llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true, + hasRequiresUnifiedSharedMemory(), + CGM.getLangOpts().OpenMPOffloadMandatory); + OMPBuilder.setConfig(Config); + OffloadEntriesInfoManager.setConfig(Config); + if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP can only handle device code."); @@ -1214,6 +880,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) "__omp_rtl_assume_threads_oversubscription"); OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState, "__omp_rtl_assume_no_thread_state"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism, + "__omp_rtl_assume_no_nested_parallelism"); } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, @@ -1241,33 +909,13 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. - class NVPTXPrePostActionTy : public PrePostActionTy { - bool &IsInParallelRegion; - bool PrevIsInParallelRegion; - - public: - NVPTXPrePostActionTy(bool &IsInParallelRegion) - : IsInParallelRegion(IsInParallelRegion) {} - void Enter(CodeGenFunction &CGF) override { - PrevIsInParallelRegion = IsInParallelRegion; - IsInParallelRegion = true; - } - void Exit(CodeGenFunction &CGF) override { - IsInParallelRegion = PrevIsInParallelRegion; - } - } Action(IsInParallelRegion); - CodeGen.setAction(Action); bool PrevIsInTTDRegion = IsInTTDRegion; IsInTTDRegion = false; - bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; - IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; - if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD && - !IsInParallelRegion) { + if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) { llvm::Function *WrapperFun = createParallelDataSharingWrapper(OutlinedFun, D); WrapperFunctionsMap[OutlinedFun] = WrapperFun; @@ -1330,7 +978,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); if (!LastPrivatesReductions.empty()) { GlobalizedRD = ::buildRecordForGlobalizedVars( - CGM.getContext(), llvm::None, LastPrivatesReductions, + CGM.getContext(), std::nullopt, LastPrivatesReductions, MappedDeclsFields, WarpSize); } } else if (!LastPrivatesReductions.empty()) { @@ -3307,7 +2955,7 @@ void CGOpenMPRuntimeGPU::emitReduction( ++Cnt; } const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( - CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap, + CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap, C.getLangOpts().OpenMPCUDAReductionBufNum); TeamsReductions.push_back(TeamReductionRec); if (!KernelTeamsReductionPtr) { @@ -3379,7 +3027,7 @@ void CGOpenMPRuntimeGPU::emitReduction( llvm::Value *EndArgs[] = {ThreadId}; RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( - nullptr, llvm::None, + nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); @@ -3435,7 +3083,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF, const Type *NonQualTy = QC.strip(NativeParamType); QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); unsigned NativePointeeAddrSpace = - CGF.getContext().getTargetAddressSpace(NativePointeeTy); + CGF.getTypes().getTargetAddressSpace(NativePointeeTy); QualType TargetTy = TargetParam->getType(); llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); @@ -3659,16 +3307,6 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, assert(VD->isCanonicalDecl() && "Expected canonical declaration"); Data.insert(std::make_pair(VD, MappedVarData())); } - if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { - CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); - VarChecker.Visit(Body); - I->getSecond().SecondaryLocalVarData.emplace(); - DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData; - for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { - assert(VD->isCanonicalDecl() && "Expected canonical declaration"); - Data.insert(std::make_pair(VD, MappedVarData())); - } - } if (!NeedToDelayGlobalization) { emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); struct GlobalizationScope final : EHScopeStack::Cleanup { @@ -3810,7 +3448,7 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas( else VDLVal = CGF.MakeAddrLValue( VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); - llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; FieldDecl *ThisCapture = nullptr; RD->getCaptureFields(Captures, ThisCapture); if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) { @@ -3822,13 +3460,15 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas( for (const LambdaCapture &LC : RD->captures()) { if (LC.getCaptureKind() != LCK_ByRef) continue; - const VarDecl *VD = LC.getCapturedVar(); - if (!CS->capturesVariable(VD)) + const ValueDecl *VD = LC.getCapturedVar(); + // FIXME: For now VD is always a VarDecl because OpenMP does not support + // capturing structured bindings in lambdas yet. + if (!CS->capturesVariable(cast<VarDecl>(VD))) continue; auto It = Captures.find(VD); assert(It != Captures.end() && "Found lambda capture without field."); LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); - Address VDAddr = CGF.GetAddrOfLocalVar(VD); + Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD)); if (VD->getType().getCanonicalType()->isReferenceType()) VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType().getCanonicalType()) @@ -3913,6 +3553,9 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::SM_75: case CudaArch::SM_80: case CudaArch::SM_86: + case CudaArch::SM_87: + case CudaArch::SM_89: + case CudaArch::SM_90: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX602: @@ -4006,10 +3649,10 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) { llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), + llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); + return Bld.CreateCall(F, std::nullopt, "nvptx_num_threads"); } llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index ff585efa3fce..75d140205773 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -42,8 +42,6 @@ private: ExecutionMode getExecutionMode() const; - bool requiresFullRuntime() const { return RequiresFullRuntime; } - /// Get barrier to synchronize all threads in a block. void syncCTAThreads(CodeGenFunction &CGF); @@ -66,12 +64,6 @@ private: // Base class overrides. // - /// Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr, size \a Size, and flags \a Flags. - void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) override; - /// Emit outlined function specialized for the Fork-Join /// programming model for applicable target directives on the NVPTX device. /// \param D Directive to emit. @@ -161,16 +153,12 @@ protected: /// Constant for NVPTX for better optimization. bool isDefaultLocationConstant() const override { return true; } - /// Returns additional flags that can be stored in reserved_2 field of the - /// default location. - /// For NVPTX target contains data about SPMD/Non-SPMD execution mode + - /// Full/Lightweight runtime mode. Used for better optimization. - unsigned getDefaultLocationReserved2Flags() const override; - public: explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); void clear() override; + bool isTargetCodegen() const override { return true; }; + /// Declare generalized virtual functions which need to be defined /// by all specializations of OpenMPGPURuntime Targets like AMDGCN /// and NVPTX. @@ -330,7 +318,7 @@ public: /// translating these arguments to correct target-specific arguments. void emitOutlinedFunctionCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, - ArrayRef<llvm::Value *> Args = llvm::None) const override; + ArrayRef<llvm::Value *> Args = std::nullopt) const override; /// Emits OpenMP-specific function prolog. /// Required for device constructs. @@ -386,17 +374,9 @@ private: /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; - /// Check if the full runtime is required (default - yes). - bool RequiresFullRuntime = true; - - /// true if we're emitting the code for the target region and next parallel - /// region is L0 for sure. - bool IsInTargetMasterThreadRegion = false; /// true if currently emitting code for target/teams/distribute region, false /// - otherwise. bool IsInTTDRegion = false; - /// true if we're definitely in the parallel region. - bool IsInParallelRegion = false; /// Map between an outlined function and its wrapper. llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap; @@ -421,12 +401,10 @@ private: using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; struct FunctionData { DeclToAddrMapTy LocalVarData; - llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None; EscapedParamsTy EscapedParameters; llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> EscapedVariableLengthDeclsAddrs; - llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; }; /// Maps the function to the list of the globalized variables with their @@ -438,9 +416,6 @@ private: /// reductions. /// All the records are gathered into a union `union.type` is created. llvm::SmallVector<const RecordDecl *, 4> TeamsReductions; - /// Shared pointer for the global memory in the global memory buffer used for - /// the given kernel. - llvm::GlobalVariable *KernelStaticGlobalized = nullptr; /// Pair of the Non-SPMD team and all reductions variables in this team /// region. std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>> diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 6f85bca8a201..596f0bd33204 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -162,7 +162,7 @@ struct CGRecordLowering { return CharUnits::fromQuantity(DataLayout.getTypeAllocSize(Type)); } CharUnits getAlignment(llvm::Type *Type) { - return CharUnits::fromQuantity(DataLayout.getABITypeAlignment(Type)); + return CharUnits::fromQuantity(DataLayout.getABITypeAlign(Type)); } bool isZeroInitializable(const FieldDecl *FD) { return Types.isZeroInitializable(FD->getType()); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 05ab16668743..248ffb544014 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/SaveAndRestore.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -254,6 +255,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPTaskyieldDirectiveClass: EmitOMPTaskyieldDirective(cast<OMPTaskyieldDirective>(*S)); break; + case Stmt::OMPErrorDirectiveClass: + EmitOMPErrorDirective(cast<OMPErrorDirective>(*S)); + break; case Stmt::OMPBarrierDirectiveClass: EmitOMPBarrierDirective(cast<OMPBarrierDirective>(*S)); break; @@ -571,9 +575,9 @@ void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB, bool IsFinished) { // Place the block after the current block, if possible, or else at // the end of the function. if (CurBB && CurBB->getParent()) - CurFn->getBasicBlockList().insertAfter(CurBB->getIterator(), BB); + CurFn->insert(std::next(CurBB->getIterator()), BB); else - CurFn->getBasicBlockList().push_back(BB); + CurFn->insert(CurFn->end(), BB); Builder.SetInsertPoint(BB); } @@ -598,15 +602,14 @@ void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) { bool inserted = false; for (llvm::User *u : block->users()) { if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) { - CurFn->getBasicBlockList().insertAfter(insn->getParent()->getIterator(), - block); + CurFn->insert(std::next(insn->getParent()->getIterator()), block); inserted = true; break; } } if (!inserted) - CurFn->getBasicBlockList().push_back(block); + CurFn->insert(CurFn->end(), block); Builder.SetInsertPoint(block); } @@ -718,11 +721,10 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { break; } } - SaveAndRestore<bool> save_nomerge(InNoMergeAttributedStmt, nomerge); - SaveAndRestore<bool> save_noinline(InNoInlineAttributedStmt, noinline); - SaveAndRestore<bool> save_alwaysinline(InAlwaysInlineAttributedStmt, - alwaysinline); - SaveAndRestore<const CallExpr *> save_musttail(MustTailCall, musttail); + SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge); + SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline); + SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline); + SaveAndRestore save_musttail(MustTailCall, musttail); EmitStmt(S.getSubStmt(), S.getAttrs()); } @@ -815,11 +817,20 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // Prefer the PGO based weights over the likelihood attribute. // When the build isn't optimized the metadata isn't used, so don't generate // it. + // Also, differentiate between disabled PGO and a never executed branch with + // PGO. Assuming PGO is in use: + // - we want to ignore the [[likely]] attribute if the branch is never + // executed, + // - assuming the profile is poor, preserving the attribute may still be + // beneficial. + // As an approximation, preserve the attribute only if both the branch and the + // parent context were not executed. Stmt::Likelihood LH = Stmt::LH_None; - uint64_t Count = getProfileCount(S.getThen()); - if (!Count && CGM.getCodeGenOpts().OptimizationLevel) + uint64_t ThenCount = getProfileCount(S.getThen()); + if (!ThenCount && !getCurrentProfileCount() && + CGM.getCodeGenOpts().OptimizationLevel) LH = Stmt::getLikelihood(S.getThen(), S.getElse()); - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, LH); + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, ThenCount, LH); // Emit the 'then' code. EmitBlock(ThenBlock); @@ -1458,7 +1469,7 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S, llvm::BasicBlock *FalseDest = CaseRangeBlock; CaseRangeBlock = createBasicBlock("sw.caserange"); - CurFn->getBasicBlockList().push_back(CaseRangeBlock); + CurFn->insert(CurFn->end(), CaseRangeBlock); Builder.SetInsertPoint(CaseRangeBlock); // Emit range check. @@ -1509,6 +1520,21 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S, llvm::ConstantInt *CaseVal = Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext())); + + // Emit debuginfo for the case value if it is an enum value. + const ConstantExpr *CE; + if (auto ICE = dyn_cast<ImplicitCastExpr>(S.getLHS())) + CE = dyn_cast<ConstantExpr>(ICE->getSubExpr()); + else + CE = dyn_cast<ConstantExpr>(S.getLHS()); + if (CE) { + if (auto DE = dyn_cast<DeclRefExpr>(CE->getSubExpr())) + if (CGDebugInfo *Dbg = getDebugInfo()) + if (CGM.getCodeGenOpts().hasReducedDebugInfo()) + Dbg->EmitGlobalVariable(DE->getDecl(), + APValue(llvm::APSInt(CaseVal->getValue()))); + } + if (SwitchLikelihood) SwitchLikelihood->push_back(Stmt::getLikelihood(Attrs)); @@ -1843,11 +1869,11 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S, FoundCase; } -static Optional<SmallVector<uint64_t, 16>> +static std::optional<SmallVector<uint64_t, 16>> getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) { // Are there enough branches to weight them? if (Likelihoods.size() <= 1) - return None; + return std::nullopt; uint64_t NumUnlikely = 0; uint64_t NumNone = 0; @@ -1868,7 +1894,7 @@ getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) { // Is there a likelihood attribute used? if (NumUnlikely == 0 && NumLikely == 0) - return None; + return std::nullopt; // When multiple cases share the same code they can be combined during // optimization. In that case the weights of the branch will be the sum of @@ -2050,7 +2076,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { } else if (SwitchLikelihood) { assert(SwitchLikelihood->size() == 1 + SwitchInsn->getNumCases() && "switch likelihoods do not match switch cases"); - Optional<SmallVector<uint64_t, 16>> LHW = + std::optional<SmallVector<uint64_t, 16>> LHW = getLikelihoodWeights(*SwitchLikelihood); if (LHW) { llvm::MDBuilder MDHelper(CGM.getLLVMContext()); @@ -2256,9 +2282,9 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) - Result.addFnAttr(llvm::Attribute::ReadNone); + Result.setDoesNotAccessMemory(); else if (ReadOnly) - Result.addFnAttr(llvm::Attribute::ReadOnly); + Result.setOnlyReadsMemory(); } // Add elementtype attribute for indirect constraints. @@ -2343,6 +2369,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Type *> ArgElemTypes; std::vector<llvm::Value*> Args; llvm::BitVector ResultTypeRequiresCast; + llvm::BitVector ResultRegIsFlagReg; // Keep track of inout constraints. std::string InOutConstraints; @@ -2400,6 +2427,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultRegQualTys.push_back(QTy); ResultRegDests.push_back(Dest); + bool IsFlagReg = llvm::StringRef(OutputConstraint).startswith("{@cc"); + ResultRegIsFlagReg.push_back(IsFlagReg); + llvm::Type *Ty = ConvertTypeForMem(QTy); const bool RequiresCast = Info.allowsRegister() && (getTargetHooks().isScalarizableAsmOperand(*this, Ty) || @@ -2448,7 +2478,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); + VT->getPrimitiveSizeInBits().getKnownMinValue()); } else { Address DestAddr = Dest.getAddress(*this); // Matrix types in memory are represented by arrays, but accessed through @@ -2487,7 +2517,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); + VT->getPrimitiveSizeInBits().getKnownMinValue()); // Only tie earlyclobber physregs. if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber())) InOutConstraints += llvm::utostr(i); @@ -2577,7 +2607,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); + VT->getPrimitiveSizeInBits().getKnownMinValue()); ArgTypes.push_back(Arg->getType()); ArgElemTypes.push_back(ArgElemType); @@ -2717,10 +2747,21 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // ResultRegDests can be also populated by addReturnRegisterOutputs() above, // in which case its size may grow. assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); + assert(ResultRegIsFlagReg.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; llvm::Type *TruncTy = ResultTruncRegTypes[i]; + if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) { + // Target must guarantee the Value `Tmp` here is lowered to a boolean + // value. + llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2); + llvm::Value *IsBooleanValue = + Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two); + llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume); + Builder.CreateCall(FnAssume, IsBooleanValue); + } + // If the result type of the LLVM IR asm doesn't match the result type of // the expression, do the conversion. if (ResultRegTypes[i] != ResultTruncRegTypes[i]) { diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index aa55cdaca5dc..6bc30ad0302e 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" +#include <optional> using namespace clang; using namespace CodeGen; using namespace llvm::omp; @@ -74,7 +75,7 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { public: OMPLexicalScope( CodeGenFunction &CGF, const OMPExecutableDirective &S, - const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, + const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, const bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { @@ -114,7 +115,7 @@ class OMPParallelScope final : public OMPLexicalScope { public: OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, EmitPreInitStmt(S)) {} }; @@ -129,7 +130,7 @@ class OMPTeamsScope final : public OMPLexicalScope { public: OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, EmitPreInitStmt(S)) {} }; @@ -446,7 +447,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( FunctionDecl *DebugFunctionDecl = nullptr; if (!FO.UIntPtrCastRequired) { FunctionProtoType::ExtProtoInfo EPI; - QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); + QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI); DebugFunctionDecl = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), SourceLocation(), DeclarationName(), FunctionTy, @@ -708,8 +709,9 @@ void CodeGenFunction::EmitOMPAggregateAssign( llvm::Value *SrcBegin = SrcAddr.getPointer(); llvm::Value *DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - llvm::Value *DestEnd = - Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); + llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), + DestBegin, NumElements); + // The basic structure here is a while-do loop. llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); @@ -1347,6 +1349,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( case OMPD_parallel_for_simd: case OMPD_task: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_taskgroup: @@ -1593,6 +1596,19 @@ static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, llvm::SmallVectorImpl<llvm::Value *> &) {} +static void emitOMPCopyinClause(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + bool Copyins = CGF.EmitOMPCopyinClause(S); + if (Copyins) { + // Emit implicit barrier to synchronize threads and avoid data races on + // propagation master's thread values of threadprivate variables to local + // instances of that variables of all other implicit threads. + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } +} + Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( CodeGenFunction &CGF, const VarDecl *VD) { CodeGenModule &CGM = CGF.CGM; @@ -1774,16 +1790,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); - bool Copyins = CGF.EmitOMPCopyinClause(S); + emitOMPCopyinClause(CGF, S); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); - if (Copyins) { - // Emit implicit barrier to synchronize threads and avoid data races on - // propagation master's thread values of threadprivate variables to local - // instances of that variables of all other implicit threads. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); - } CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -2582,8 +2590,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); emitPostUpdateForReductionClause(CGF, S, [](CodeGenFunction &) { return nullptr; }); + LoopScope.restoreMap(); + CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); } - CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); @@ -2594,8 +2603,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { // Check for unsupported clauses for (OMPClause *C : S.clauses()) { - // Currently only simdlen clause is supported - if (!isa<OMPSimdlenClause>(C)) + // Currently only order, simdlen and safelen clauses are supported + if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || + isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) return false; } @@ -2621,6 +2631,36 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { } return true; } +static llvm::MapVector<llvm::Value *, llvm::Value *> +GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { + llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; + for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { + llvm::APInt ClauseAlignment(64, 0); + if (const Expr *AlignmentExpr = Clause->getAlignment()) { + auto *AlignmentCI = + cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); + ClauseAlignment = AlignmentCI->getValue(); + } + for (const Expr *E : Clause->varlists()) { + llvm::APInt Alignment(ClauseAlignment); + if (Alignment == 0) { + // OpenMP [2.8.1, Description] + // If no optional parameter is specified, implementation-defined default + // alignments for SIMD instructions on the target platforms are assumed. + Alignment = + CGF.getContext() + .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( + E->getType()->getPointeeType())) + .getQuantity(); + } + assert((Alignment == 0 || Alignment.isPowerOf2()) && + "alignment is not power of 2"); + llvm::Value *PtrValue = CGF.EmitScalarExpr(E); + AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); + } + } + return AlignedVars; +} void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { bool UseOMPIRBuilder = @@ -2630,6 +2670,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { PrePostActionTy &) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { + llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = + GetAlignedMapping(S, CGF); // Emit the associated statement and get its loop representation. const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = @@ -2646,7 +2688,24 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); Simdlen = Val; } - OMPBuilder.applySimd(CLI, Simdlen); + llvm::ConstantInt *Safelen = nullptr; + if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { + RValue Len = + this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + Safelen = Val; + } + llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; + if (const auto *C = S.getSingleClause<OMPOrderClause>()) { + if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { + Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; + } + } + // Add simd metadata to the collapsed loop. Do not generate + // another loop for if clause. Support for if clause is done earlier. + OMPBuilder.applySimd(CLI, AlignedVars, + /*IfCond*/ nullptr, Order, Simdlen, Safelen); return; } }; @@ -3426,11 +3485,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( EmitOMPLastprivateClauseFinal( S, isOpenMPSimdDirective(S.getDirectiveKind()), Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); + LoopScope.restoreMap(); + EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); } - EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { - return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); - }); DoacrossCleanupScope.ForceCleanup(); // We're now done with the loop, so jump to the continuation block. if (ContBlock) { @@ -4340,6 +4400,7 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + emitOMPCopyinClause(CGF, S); (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; { @@ -4373,6 +4434,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + emitOMPCopyinClause(CGF, S); (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { @@ -4407,16 +4469,8 @@ void CodeGenFunction::EmitOMPParallelMasterDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); - bool Copyins = CGF.EmitOMPCopyinClause(S); + emitOMPCopyinClause(CGF, S); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); - if (Copyins) { - // Emit implicit barrier to synchronize threads and avoid data races on - // propagation master's thread values of threadprivate variables to local - // instances of that variables of all other implicit threads. - CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); - } CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); @@ -4441,6 +4495,7 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( // directives: 'parallel' with 'sections' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + emitOMPCopyinClause(CGF, S); CGF.EmitSections(S); }; { @@ -4892,7 +4947,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); - OMPLexicalScope Scope(*this, S, llvm::None, + OMPLexicalScope Scope(*this, S, std::nullopt, !isOpenMPParallelDirective(S.getDirectiveKind()) && !isOpenMPSimdDirective(S.getDirectiveKind())); TaskGen(*this, OutlinedFn, Data); @@ -5192,6 +5247,16 @@ void CodeGenFunction::EmitOMPTaskyieldDirective( CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); } +void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { + const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); + Expr *ME = MC ? MC->getMessageString() : nullptr; + const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); + bool IsFatal = false; + if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) + IsFatal = true; + CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); +} + void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); } @@ -5200,6 +5265,7 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { OMPTaskDataTy Data; // Build list of dependences buildDependences(S, Data); + Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); } @@ -5263,9 +5329,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { *this, [&S]() -> ArrayRef<const Expr *> { if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) - return llvm::makeArrayRef(FlushClause->varlist_begin(), - FlushClause->varlist_end()); - return llvm::None; + return llvm::ArrayRef(FlushClause->varlist_begin(), + FlushClause->varlist_end()); + return std::nullopt; }(), S.getBeginLoc(), AO); } @@ -5934,7 +6000,7 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, case llvm::AtomicOrdering::Acquire: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Acquire); break; case llvm::AtomicOrdering::Monotonic: @@ -5963,7 +6029,7 @@ static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, case llvm::AtomicOrdering::Release: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: @@ -6154,7 +6220,7 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, case llvm::AtomicOrdering::Release: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: @@ -6269,17 +6335,17 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, // operation is also an acquire flush. switch (AO) { case llvm::AtomicOrdering::Release: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Acquire); break; case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: CGF.CGM.getOpenMPRuntime().emitFlush( - CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease); + CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease); break; case llvm::AtomicOrdering::Monotonic: break; @@ -6392,95 +6458,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, IsPostfixUpdate, IsFailOnly, Loc); break; } - case OMPC_if: - case OMPC_final: - case OMPC_num_threads: - case OMPC_private: - case OMPC_firstprivate: - case OMPC_lastprivate: - case OMPC_reduction: - case OMPC_task_reduction: - case OMPC_in_reduction: - case OMPC_safelen: - case OMPC_simdlen: - case OMPC_sizes: - case OMPC_full: - case OMPC_partial: - case OMPC_allocator: - case OMPC_allocate: - case OMPC_collapse: - case OMPC_default: - case OMPC_seq_cst: - case OMPC_acq_rel: - case OMPC_acquire: - case OMPC_release: - case OMPC_relaxed: - case OMPC_shared: - case OMPC_linear: - case OMPC_aligned: - case OMPC_copyin: - case OMPC_copyprivate: - case OMPC_flush: - case OMPC_depobj: - case OMPC_proc_bind: - case OMPC_schedule: - case OMPC_ordered: - case OMPC_nowait: - case OMPC_untied: - case OMPC_threadprivate: - case OMPC_depend: - case OMPC_mergeable: - case OMPC_device: - case OMPC_threads: - case OMPC_simd: - case OMPC_map: - case OMPC_num_teams: - case OMPC_thread_limit: - case OMPC_priority: - case OMPC_grainsize: - case OMPC_nogroup: - case OMPC_num_tasks: - case OMPC_hint: - case OMPC_dist_schedule: - case OMPC_defaultmap: - case OMPC_uniform: - case OMPC_to: - case OMPC_from: - case OMPC_use_device_ptr: - case OMPC_use_device_addr: - case OMPC_is_device_ptr: - case OMPC_has_device_addr: - case OMPC_unified_address: - case OMPC_unified_shared_memory: - case OMPC_reverse_offload: - case OMPC_dynamic_allocators: - case OMPC_atomic_default_mem_order: - case OMPC_device_type: - case OMPC_match: - case OMPC_nontemporal: - case OMPC_order: - case OMPC_destroy: - case OMPC_detach: - case OMPC_inclusive: - case OMPC_exclusive: - case OMPC_uses_allocators: - case OMPC_affinity: - case OMPC_init: - case OMPC_inbranch: - case OMPC_notinbranch: - case OMPC_link: - case OMPC_indirect: - case OMPC_use: - case OMPC_novariants: - case OMPC_nocontext: - case OMPC_filter: - case OMPC_when: - case OMPC_adjust_args: - case OMPC_append_args: - case OMPC_memory_order: - case OMPC_bind: - case OMPC_align: - case OMPC_cancellation_construct_type: + default: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -7658,6 +7636,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, (*LIP)->getType(), S.getBeginLoc()))); } + LoopScope.restoreMap(); CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, @@ -7714,7 +7693,7 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective( }; auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); - OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } @@ -7792,7 +7771,19 @@ void CodeGenFunction::EmitOMPGenericLoopDirective( const OMPGenericLoopDirective &S) { // Unimplemented, just inline the underlying statement for now. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + // Emit the loop iteration variable. + const Stmt *CS = + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); + const auto *ForS = dyn_cast<ForStmt>(CS); + if (ForS && !isa<DeclStmt>(ForS->getInit())) { + OMPPrivateScope LoopScope(CGF); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitStmt(CS); + LoopScope.restoreMap(); + } else { + CGF.EmitStmt(CS); + } }; OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); diff --git a/clang/lib/CodeGen/CGVTT.cpp b/clang/lib/CodeGen/CGVTT.cpp index ebac9196df02..d0c8e351626b 100644 --- a/clang/lib/CodeGen/CGVTT.cpp +++ b/clang/lib/CodeGen/CGVTT.cpp @@ -114,7 +114,7 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { llvm::ArrayType *ArrayType = llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size()); - unsigned Align = CGM.getDataLayout().getABITypeAlignment(CGM.Int8PtrTy); + llvm::Align Align = CGM.getDataLayout().getABITypeAlign(CGM.Int8PtrTy); llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align); diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index cdd40d2a6a2e..a0b5d9e4b096 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -128,7 +128,7 @@ static void resolveTopLevelMetadata(llvm::Function *Fn, // Find all llvm.dbg.declare intrinsics and resolve the DILocalVariable nodes // they are referencing. - for (auto &BB : Fn->getBasicBlockList()) { + for (auto &BB : *Fn) { for (auto &I : BB) { if (auto *DII = dyn_cast<llvm::DbgVariableIntrinsic>(&I)) { auto *DILocal = DII->getVariable(); @@ -664,6 +664,12 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder, proxy->setVisibility(llvm::GlobalValue::HiddenVisibility); proxy->setComdat(module.getOrInsertComdat(rttiProxyName)); } + // Do not instrument the rtti proxies with hwasan to avoid a duplicate + // symbol error. Aliases generated by hwasan will retain the same namebut + // the addresses they are set to may have different tags from different + // compilation units. We don't run into this without hwasan because the + // proxies are in comdat groups, but those aren't propagated to the alias. + RemoveHwasanMetadata(proxy); } target = proxy; } @@ -672,15 +678,23 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder, /*position=*/vtableAddressPoint); } -bool CodeGenVTables::useRelativeLayout() const { +static bool UseRelativeLayout(const CodeGenModule &CGM) { return CGM.getTarget().getCXXABI().isItaniumFamily() && CGM.getItaniumVTableContext().isRelativeLayout(); } +bool CodeGenVTables::useRelativeLayout() const { + return UseRelativeLayout(CGM); +} + +llvm::Type *CodeGenModule::getVTableComponentType() const { + if (UseRelativeLayout(*this)) + return Int32Ty; + return Int8PtrTy; +} + llvm::Type *CodeGenVTables::getVTableComponentType() const { - if (useRelativeLayout()) - return CGM.Int32Ty; - return CGM.Int8PtrTy; + return CGM.getVTableComponentType(); } static void AddPointerLayoutOffset(const CodeGenModule &CGM, @@ -895,7 +909,7 @@ llvm::GlobalVariable *CodeGenVTables::GenerateConstructionVTable( if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage) Linkage = llvm::GlobalVariable::InternalLinkage; - unsigned Align = CGM.getDataLayout().getABITypeAlignment(VTType); + llvm::Align Align = CGM.getDataLayout().getABITypeAlign(VTType); // Create the variable that will hold the construction vtable. llvm::GlobalVariable *VTable = @@ -921,12 +935,33 @@ llvm::GlobalVariable *CodeGenVTables::GenerateConstructionVTable( CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get()); - if (UsingRelativeLayout && !VTable->isDSOLocal()) - GenerateRelativeVTableAlias(VTable, OutName); + if (UsingRelativeLayout) { + RemoveHwasanMetadata(VTable); + if (!VTable->isDSOLocal()) + GenerateRelativeVTableAlias(VTable, OutName); + } return VTable; } +// Ensure this vtable is not instrumented by hwasan. That is, a global alias is +// not generated for it. This is mainly used by the relative-vtables ABI where +// vtables instead contain 32-bit offsets between the vtable and function +// pointers. Hwasan is disabled for these vtables for now because the tag in a +// vtable pointer may fail the overflow check when resolving 32-bit PLT +// relocations. A future alternative for this would be finding which usages of +// the vtable can continue to use the untagged hwasan value without any loss of +// value in hwasan. +void CodeGenVTables::RemoveHwasanMetadata(llvm::GlobalValue *GV) const { + if (CGM.getLangOpts().Sanitize.has(SanitizerKind::HWAddress)) { + llvm::GlobalValue::SanitizerMetadata Meta; + if (GV->hasSanitizerMetadata()) + Meta = GV->getSanitizerMetadata(); + Meta.NoHWAddress = true; + GV->setSanitizerMetadata(Meta); + } +} + // If the VTable is not dso_local, then we will not be able to indicate that // the VTable does not need a relocation and move into rodata. A frequent // time this can occur is for classes that should be made public from a DSO @@ -1254,8 +1289,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, if (!getCodeGenOpts().LTOUnit) return; - CharUnits PointerWidth = - Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); + CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint; std::vector<AddressPoint> AddressPoints; @@ -1293,7 +1327,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, ArrayRef<VTableComponent> Comps = VTLayout.vtable_components(); for (auto AP : AddressPoints) { // Create type metadata for the address point. - AddVTableTypeMetadata(VTable, PointerWidth * AP.second, AP.first); + AddVTableTypeMetadata(VTable, ComponentWidth * AP.second, AP.first); // The class associated with each address point could also potentially be // used for indirect calls via a member function pointer, so we need to @@ -1306,7 +1340,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, Context.getMemberPointerType( Comps[I].getFunctionDecl()->getType(), Context.getRecordType(AP.first).getTypePtr())); - VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD); + VTable->addTypeMetadata((ComponentWidth * I).getQuantity(), MD); } } diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h index bdfc075ee305..e7b59d94f257 100644 --- a/clang/lib/CodeGen/CGVTables.h +++ b/clang/lib/CodeGen/CGVTables.h @@ -102,6 +102,10 @@ public: return *cast<ItaniumVTableContext>(VTContext); } + const ItaniumVTableContext &getItaniumVTableContext() const { + return *cast<ItaniumVTableContext>(VTContext); + } + MicrosoftVTableContext &getMicrosoftVTableContext() { return *cast<MicrosoftVTableContext>(VTContext); } @@ -154,6 +158,9 @@ public: /// when a vtable may not be dso_local. void GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable, llvm::StringRef AliasNameRef); + + /// Specify a global should not be instrumented with hwasan. + void RemoveHwasanMetadata(llvm::GlobalValue *GV) const; }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 12c6b3f49c43..2b219267869e 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -49,6 +49,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include <memory> +#include <optional> using namespace clang; using namespace llvm; @@ -422,7 +423,8 @@ namespace clang { bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const; - Optional<FullSourceLoc> getFunctionSourceLocation(const Function &F) const; + std::optional<FullSourceLoc> + getFunctionSourceLocation(const Function &F) const; void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); /// Specialized handler for InlineAsm diagnostic. @@ -435,6 +437,11 @@ namespace clang { /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); + /// Specialized handler for ResourceLimit diagnostic. + /// \return True if the diagnostic has been successfully reported, false + /// otherwise. + bool ResourceLimitDiagHandler(const llvm::DiagnosticInfoResourceLimit &D); + /// Specialized handler for unsupported backend feature diagnostic. void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); /// Specialized handlers for optimization remarks. @@ -623,10 +630,23 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { if (!Loc) return false; - // FIXME: Shouldn't need to truncate to uint32_t Diags.Report(*Loc, diag::warn_fe_frame_larger_than) - << static_cast<uint32_t>(D.getStackSize()) - << static_cast<uint32_t>(D.getStackLimit()) + << D.getStackSize() + << D.getStackLimit() + << llvm::demangle(D.getFunction().getName().str()); + return true; +} + +bool BackendConsumer::ResourceLimitDiagHandler( + const llvm::DiagnosticInfoResourceLimit &D) { + auto Loc = getFunctionSourceLocation(D.getFunction()); + if (!Loc) + return false; + unsigned DiagID = diag::err_fe_backend_resource_limit; + ComputeDiagID(D.getSeverity(), backend_resource_limit, DiagID); + + Diags.Report(*Loc, DiagID) + << D.getResourceName() << D.getResourceSize() << D.getResourceLimit() << llvm::demangle(D.getFunction().getName().str()); return true; } @@ -673,14 +693,14 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( return Loc; } -Optional<FullSourceLoc> +std::optional<FullSourceLoc> BackendConsumer::getFunctionSourceLocation(const Function &F) const { auto Hash = llvm::hash_value(F.getName()); for (const auto &Pair : ManglingFullSourceLocs) { if (Pair.first == Hash) return Pair.second; } - return Optional<FullSourceLoc>(); + return std::nullopt; } void BackendConsumer::UnsupportedDiagHandler( @@ -874,6 +894,11 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { return; ComputeDiagID(Severity, backend_frame_larger_than, DiagID); break; + case llvm::DK_ResourceLimit: + if (ResourceLimitDiagHandler(cast<DiagnosticInfoResourceLimit>(DI))) + return; + ComputeDiagID(Severity, backend_resource_limit, DiagID); + break; case DK_Linker: ComputeDiagID(Severity, linking_module, DiagID); break; @@ -1078,6 +1103,8 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); + VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers); + // For ThinLTO backend invocations, ensure that the context // merges types based on ODR identifiers. We also need to read // the correct module out of a multi-module bitcode file. @@ -1157,7 +1184,7 @@ void CodeGenAction::ExecuteAction() { SourceManager &SM = CI.getSourceManager(); FileID FID = SM.getMainFileID(); - Optional<MemoryBufferRef> MainFile = SM.getBufferOrNone(FID); + std::optional<MemoryBufferRef> MainFile = SM.getBufferOrNone(FID); if (!MainFile) return; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 5012bd822bd3..8cbe2a540744 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -16,6 +16,7 @@ #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGDebugInfo.h" +#include "CGHLSLRuntime.h" #include "CGOpenMPRuntime.h" #include "CodeGenModule.h" #include "CodeGenPGO.h" @@ -45,6 +46,7 @@ #include "llvm/Support/CRC.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -172,10 +174,11 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) { mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs()); mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs()); mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero()); - mergeFnAttrValue("unsafe-fp-math", FPFeatures.getAllowFPReassociate() && - FPFeatures.getAllowReciprocal() && - FPFeatures.getAllowApproxFunc() && - FPFeatures.getNoSignedZero()); + mergeFnAttrValue( + "unsafe-fp-math", + FPFeatures.getAllowFPReassociate() && FPFeatures.getAllowReciprocal() && + FPFeatures.getAllowApproxFunc() && FPFeatures.getNoSignedZero() && + FPFeatures.allowFPContractAcrossStatement()); } CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() { @@ -317,8 +320,10 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) { if (!BB) return; - if (!BB->use_empty()) - return CGF.CurFn->getBasicBlockList().push_back(BB); + if (!BB->use_empty()) { + CGF.CurFn->insert(CGF.CurFn->end(), BB); + return; + } delete BB; } @@ -356,17 +361,18 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { bool HasOnlyLifetimeMarkers = HasCleanups && EHStack.containsOnlyLifetimeMarkers(PrologueCleanupDepth); bool EmitRetDbgLoc = !HasCleanups || HasOnlyLifetimeMarkers; + + std::optional<ApplyDebugLocation> OAL; if (HasCleanups) { // Make sure the line table doesn't jump back into the body for // the ret after it's been at EndLoc. - Optional<ApplyDebugLocation> AL; if (CGDebugInfo *DI = getDebugInfo()) { if (OnlySimpleReturnStmts) DI->EmitLocation(Builder, EndLoc); else // We may not have a valid end location. Try to apply it anyway, and // fall back to an artificial location if needed. - AL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc); + OAL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc); } PopCleanupBlocks(PrologueCleanupDepth); @@ -477,13 +483,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { if (auto *VT = dyn_cast<llvm::VectorType>(A.getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); + VT->getPrimitiveSizeInBits().getKnownMinValue()); // Update vector width based on return type. if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); + VT->getPrimitiveSizeInBits().getKnownMinValue()); if (CurFnInfo->getMaxVectorWidth() > LargestVectorWidth) LargestVectorWidth = CurFnInfo->getMaxVectorWidth(); @@ -495,10 +501,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // 4. Width of vector arguments and return types for this function. // 5. Width of vector aguments and return types for functions called by this // function. - CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); + if (getContext().getTargetInfo().getTriple().isX86()) + CurFn->addFnAttr("min-legal-vector-width", + llvm::utostr(LargestVectorWidth)); // Add vscale_range attribute if appropriate. - Optional<std::pair<unsigned, unsigned>> VScaleRange = + std::optional<std::pair<unsigned, unsigned>> VScaleRange = getContext().getTargetInfo().getVScaleRange(getLangOpts()); if (VScaleRange) { CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( @@ -699,7 +707,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, CurCodeDecl = D; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (FD && FD->usesSEHTry()) - CurSEHParent = FD; + CurSEHParent = GD; CurFuncDecl = (D ? D->getNonClosureContext() : nullptr); FnRetTy = RetTy; CurFn = Fn; @@ -724,7 +732,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, const bool SanitizeBounds = SanOpts.hasOneOf(SanitizerKind::Bounds); bool NoSanitizeCoverage = false; - for (auto Attr : D->specific_attrs<NoSanitizeAttr>()) { + for (auto *Attr : D->specific_attrs<NoSanitizeAttr>()) { // Apply the no_sanitize* attributes to SanOpts. SanitizerMask mask = Attr->getMask(); SanOpts.Mask &= ~mask; @@ -842,8 +850,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, auto FuncGroups = CGM.getCodeGenOpts().XRayTotalFunctionGroups; if (FuncGroups > 1) { - auto FuncName = llvm::makeArrayRef<uint8_t>( - CurFn->getName().bytes_begin(), CurFn->getName().bytes_end()); + auto FuncName = llvm::ArrayRef<uint8_t>(CurFn->getName().bytes_begin(), + CurFn->getName().bytes_end()); auto Group = crc32(FuncName) % FuncGroups; if (Group != CGM.getCodeGenOpts().XRaySelectedFunctionGroup && !AlwaysXRayAttr) @@ -851,9 +859,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, } } - if (CGM.getCodeGenOpts().getProfileInstr() != CodeGenOptions::ProfileNone) - if (CGM.isFunctionBlockedFromProfileInstr(Fn, Loc)) + if (CGM.getCodeGenOpts().getProfileInstr() != CodeGenOptions::ProfileNone) { + switch (CGM.isFunctionBlockedFromProfileInstr(Fn, Loc)) { + case ProfileList::Skip: + Fn->addFnAttr(llvm::Attribute::SkipProfile); + break; + case ProfileList::Forbid: Fn->addFnAttr(llvm::Attribute::NoProfile); + break; + case ProfileList::Allow: + break; + } + } unsigned Count, Offset; if (const auto *Attr = @@ -874,7 +891,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // backends as they don't need it -- instructions on these architectures are // always atomically patchable at runtime. if (CGM.getCodeGenOpts().HotPatch && - getContext().getTargetInfo().getTriple().isX86()) + getContext().getTargetInfo().getTriple().isX86() && + getContext().getTargetInfo().getTriple().getEnvironment() != + llvm::Triple::CODE16) Fn->addFnAttr("patchable-function", "prologue-short-redirect"); // Add no-jump-tables value. @@ -941,7 +960,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // If we're checking nullability, we need to know whether we can check the // return value. Initialize the flag to 'true' and refine it in EmitParmDecl. if (SanOpts.has(SanitizerKind::NullabilityReturn)) { - auto Nullability = FnRetTy->getNullability(getContext()); + auto Nullability = FnRetTy->getNullability(); if (Nullability && *Nullability == NullabilityKind::NonNull) { if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>())) @@ -1128,6 +1147,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (getLangOpts().OpenMP && CurCodeDecl) CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl); + // Handle emitting HLSL entry functions. + if (D && D->hasAttr<HLSLShaderAttr>()) + CGM.getHLSLRuntime().emitEntryFunction(FD, Fn); + EmitFunctionProlog(*CurFnInfo, CurFn, Args); if (isa_and_nonnull<CXXMethodDecl>(D) && @@ -1450,7 +1473,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, llvm::Value *IsFalse = Builder.getFalse(); EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return), SanitizerHandler::MissingReturn, - EmitCheckSourceLocation(FD->getLocation()), None); + EmitCheckSourceLocation(FD->getLocation()), std::nullopt); } else if (ShouldEmitUnreachable) { if (CGM.getCodeGenOpts().OptimizationLevel == 0) EmitTrapCall(llvm::Intrinsic::trap); @@ -2214,7 +2237,6 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::ConstantMatrix: case Type::Record: case Type::Enum: - case Type::Elaborated: case Type::Using: case Type::TemplateSpecialization: case Type::ObjCTypeParam: @@ -2224,6 +2246,10 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::BitInt: llvm_unreachable("type class is never variably-modified!"); + case Type::Elaborated: + type = cast<ElaboratedType>(ty)->getNamedType(); + break; + case Type::Adjusted: type = cast<AdjustedType>(ty)->getAdjustedType(); break; @@ -2426,8 +2452,6 @@ void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue) { - if (auto *CE = dyn_cast<CastExpr>(E)) - E = CE->getSubExprAsWritten(); QualType Ty = E->getType(); SourceLocation Loc = E->getExprLoc(); @@ -2442,8 +2466,10 @@ llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn, const AnnotateAttr *Attr) { SmallVector<llvm::Value *, 5> Args = { AnnotatedVal, - Builder.CreateBitCast(CGM.EmitAnnotationString(AnnotationStr), Int8PtrTy), - Builder.CreateBitCast(CGM.EmitAnnotationUnit(Location), Int8PtrTy), + Builder.CreateBitCast(CGM.EmitAnnotationString(AnnotationStr), + ConstGlobalsPtrTy), + Builder.CreateBitCast(CGM.EmitAnnotationUnit(Location), + ConstGlobalsPtrTy), CGM.EmitAnnotationLineNo(Location), }; if (Attr) @@ -2455,9 +2481,12 @@ void CodeGenFunction::EmitVarAnnotations(const VarDecl *D, llvm::Value *V) { assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute"); // FIXME We create a new bitcast for every annotation because that's what // llvm-gcc was doing. + unsigned AS = V->getType()->getPointerAddressSpace(); + llvm::Type *I8PtrTy = Builder.getInt8PtrTy(AS); for (const auto *I : D->specific_attrs<AnnotateAttr>()) - EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation), - Builder.CreateBitCast(V, CGM.Int8PtrTy, V->getName()), + EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation, + {I8PtrTy, CGM.ConstGlobalsPtrTy}), + Builder.CreateBitCast(V, I8PtrTy, V->getName()), I->getAnnotation(), D->getLocation(), I); } @@ -2470,8 +2499,8 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, unsigned AS = PTy ? PTy->getAddressSpace() : 0; llvm::PointerType *IntrinTy = llvm::PointerType::getWithSamePointeeType(CGM.Int8PtrTy, AS); - llvm::Function *F = - CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, IntrinTy); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, + {IntrinTy, CGM.ConstGlobalsPtrTy}); for (const auto *I : D->specific_attrs<AnnotateAttr>()) { // FIXME Always emit the cast inst so we can differentiate between @@ -2594,8 +2623,30 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } -llvm::Value * -CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { +void CodeGenFunction::EmitKCFIOperandBundle( + const CGCallee &Callee, SmallVectorImpl<llvm::OperandBundleDef> &Bundles) { + const FunctionProtoType *FP = + Callee.getAbstractInfo().getCalleeFunctionProtoType(); + if (FP) + Bundles.emplace_back("kcfi", CGM.CreateKCFITypeId(FP->desugar())); +} + +llvm::Value *CodeGenFunction::FormAArch64ResolverCondition( + const MultiVersionResolverOption &RO) { + llvm::SmallVector<StringRef, 8> CondFeatures; + for (const StringRef &Feature : RO.Conditions.Features) { + // Form condition for features which are not yet enabled in target + if (!getContext().getTargetInfo().hasFeature(Feature)) + CondFeatures.push_back(Feature); + } + if (!CondFeatures.empty()) { + return EmitAArch64CpuSupports(CondFeatures); + } + return nullptr; +} + +llvm::Value *CodeGenFunction::FormX86ResolverCondition( + const MultiVersionResolverOption &RO) { llvm::Value *Condition = nullptr; if (!RO.Conditions.Architecture.empty()) @@ -2633,8 +2684,72 @@ static void CreateMultiVersionResolverReturn(CodeGenModule &CGM, void CodeGenFunction::EmitMultiVersionResolver( llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { - assert(getContext().getTargetInfo().getTriple().isX86() && - "Only implemented for x86 targets"); + + llvm::Triple::ArchType ArchType = + getContext().getTargetInfo().getTriple().getArch(); + + switch (ArchType) { + case llvm::Triple::x86: + case llvm::Triple::x86_64: + EmitX86MultiVersionResolver(Resolver, Options); + return; + case llvm::Triple::aarch64: + EmitAArch64MultiVersionResolver(Resolver, Options); + return; + + default: + assert(false && "Only implemented for x86 and AArch64 targets"); + } +} + +void CodeGenFunction::EmitAArch64MultiVersionResolver( + llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { + assert(!Options.empty() && "No multiversion resolver options found"); + assert(Options.back().Conditions.Features.size() == 0 && + "Default case must be last"); + bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); + assert(SupportsIFunc && + "Multiversion resolver requires target IFUNC support"); + bool AArch64CpuInitialized = false; + llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); + + for (const MultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + llvm::Value *Condition = FormAArch64ResolverCondition(RO); + + // The 'default' or 'all features enabled' case. + if (!Condition) { + CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function, + SupportsIFunc); + return; + } + + if (!AArch64CpuInitialized) { + Builder.SetInsertPoint(CurBlock, CurBlock->begin()); + EmitAArch64CpuInit(); + AArch64CpuInitialized = true; + Builder.SetInsertPoint(CurBlock); + } + + llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); + CGBuilderTy RetBuilder(*this, RetBlock); + CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function, + SupportsIFunc); + CurBlock = createBasicBlock("resolver_else", Resolver); + Builder.CreateCondBr(Condition, RetBlock, CurBlock); + } + + // If no default, emit an unreachable. + Builder.SetInsertPoint(CurBlock); + llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); +} + +void CodeGenFunction::EmitX86MultiVersionResolver( + llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); @@ -2645,7 +2760,7 @@ void CodeGenFunction::EmitMultiVersionResolver( for (const MultiVersionResolverOption &RO : Options) { Builder.SetInsertPoint(CurBlock); - llvm::Value *Condition = FormResolverCondition(RO); + llvm::Value *Condition = FormX86ResolverCondition(RO); // The 'default' or 'generic' case. if (!Condition) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index fe0890f433e8..a535aa7c0410 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -41,6 +41,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/SanitizerStats.h" +#include <optional> namespace llvm { class BasicBlock; @@ -539,7 +540,7 @@ public: /// potentially set the return value. bool SawAsmBlock = false; - const NamedDecl *CurSEHParent = nullptr; + GlobalDecl CurSEHParent; /// True if the current function is an outlined SEH helper. This can be a /// finally block or filter expression. @@ -570,7 +571,7 @@ public: return false; // C++11 and later guarantees that a thread eventually will do one of the - // following (6.9.2.3.1 in C++11): + // following (C++11 [intro.multithread]p24 and C++17 [intro.progress]p1): // - terminate, // - make a call to a library I/O function, // - perform an access through a volatile glvalue, or @@ -609,7 +610,7 @@ public: const CodeGen::CGBlockInfo *BlockInfo = nullptr; llvm::Value *BlockPointer = nullptr; - llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; + llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; /// A mapping from NRVO variables to the flags used to indicate @@ -723,7 +724,7 @@ public: FPOptions OldFPFeatures; llvm::fp::ExceptionBehavior OldExcept; llvm::RoundingMode OldRounding; - Optional<CGBuilderTy::FastMathFlagGuard> FMFGuard; + std::optional<CGBuilderTy::FastMathFlagGuard> FMFGuard; }; FPOptions CurFPFeatures; @@ -1094,7 +1095,7 @@ public: void ForceCleanup() { RunCleanupsScope::ForceCleanup(); - MappedVars.restore(CGF); + restoreMap(); } /// Exit scope - all the mapped variables are restored. @@ -1108,6 +1109,11 @@ public: VD = VD->getCanonicalDecl(); return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0; } + + /// Restore all mapped variables w/o clean up. This is usefully when we want + /// to reference the original variables but don't want the clean up because + /// that could emit lifetime end too early, causing backend issue #56913. + void restoreMap() { MappedVars.restore(CGF); } }; /// Save/restore original map of previously emitted local vars in case when we @@ -1522,7 +1528,8 @@ public: /// If \p StepV is null, the default increment is 1. void incrementProfileCounter(const Stmt *S, llvm::Value *StepV = nullptr) { if (CGM.getCodeGenOpts().hasProfileClangInstr() && - !CurFn->hasFnAttribute(llvm::Attribute::NoProfile)) + !CurFn->hasFnAttribute(llvm::Attribute::NoProfile) && + !CurFn->hasFnAttribute(llvm::Attribute::SkipProfile)) PGO.emitCounterIncrement(Builder, S, StepV); PGO.setCurrentStmt(S); } @@ -2015,7 +2022,7 @@ public: return getInvokeDestImpl(); } - bool currentFunctionUsesSEHTry() const { return CurSEHParent != nullptr; } + bool currentFunctionUsesSEHTry() const { return !!CurSEHParent; } const TargetInfo &getTarget() const { return Target; } llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); } @@ -2225,7 +2232,7 @@ public: /// Emit the unified return block, trying to avoid its emission when /// possible. /// \return The debug location of the user written return statement if the - /// return block is is avoided. + /// return block is avoided. llvm::DebugLoc EmitReturnBlock(); /// FinishFunction - Complete IR generation of the current function. It is @@ -2878,7 +2885,7 @@ public: AggValueSlot::Overlap_t Overlap, SourceLocation Loc, bool NewPointerIsChecked); - /// Emit assumption load for all bases. Requires to be be called only on + /// Emit assumption load for all bases. Requires to be called only on /// most-derived class and not under construction of the object. void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This); @@ -3207,7 +3214,7 @@ public: /// This function may clear the current insertion point; callers should use /// EnsureInsertPoint if they wish to subsequently generate code without first /// calling EmitBlock, EmitBranch, or EmitStmt. - void EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs = None); + void EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs = std::nullopt); /// EmitSimpleStmt - Try to emit a "simple" statement which does not /// necessarily require an insertion point or debug information; typically @@ -3235,10 +3242,10 @@ public: void EmitIfStmt(const IfStmt &S); void EmitWhileStmt(const WhileStmt &S, - ArrayRef<const Attr *> Attrs = None); - void EmitDoStmt(const DoStmt &S, ArrayRef<const Attr *> Attrs = None); + ArrayRef<const Attr *> Attrs = std::nullopt); + void EmitDoStmt(const DoStmt &S, ArrayRef<const Attr *> Attrs = std::nullopt); void EmitForStmt(const ForStmt &S, - ArrayRef<const Attr *> Attrs = None); + ArrayRef<const Attr *> Attrs = std::nullopt); void EmitReturnStmt(const ReturnStmt &S); void EmitDeclStmt(const DeclStmt &S); void EmitBreakStmt(const BreakStmt &S); @@ -3315,7 +3322,7 @@ public: llvm::Value *ParentFP); void EmitCXXForRangeStmt(const CXXForRangeStmt &S, - ArrayRef<const Attr *> Attrs = None); + ArrayRef<const Attr *> Attrs = std::nullopt); /// Controls insertion of cancellation exit blocks in worksharing constructs. class OMPCancelStackRAII { @@ -3514,6 +3521,7 @@ public: void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S); void EmitOMPTaskDirective(const OMPTaskDirective &S); void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S); + void EmitOMPErrorDirective(const OMPErrorDirective &S); void EmitOMPBarrierDirective(const OMPBarrierDirective &S); void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); @@ -3967,6 +3975,8 @@ public: llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); + llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface, + const ObjCIvarDecl *Ivar); LValue EmitLValueForField(LValue Base, const FieldDecl* Field); LValue EmitLValueForLambdaField(const FieldDecl *Field); @@ -4194,6 +4204,12 @@ public: llvm::Type *getEltType(const SVETypeFlags &TypeFlags); llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags); llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags); + llvm::Value *EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, + llvm::Type *ReturnType, + ArrayRef<llvm::Value *> Ops); + llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags, + llvm::Type *ReturnType, + ArrayRef<llvm::Value *> Ops); llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags); llvm::Value *EmitSVEDupX(llvm::Value *Scalar); llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty); @@ -4247,7 +4263,8 @@ public: llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); - bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, + llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID); @@ -4403,6 +4420,11 @@ public: /// EmitLoadOfComplex - Load a complex number from the specified l-value. ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc); + ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType PromotionType); + llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType PromotionType); + ComplexPairTy EmitPromotedValue(ComplexPairTy result, QualType PromotionType); + ComplexPairTy EmitUnPromotedValue(ComplexPairTy result, QualType PromotionType); + Address emitAddrOfRealComponent(Address complex, QualType complexType); Address emitAddrOfImagComponent(Address complex, QualType complexType); @@ -4600,6 +4622,9 @@ public: /// passing to a runtime sanitizer handler. llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc); + void EmitKCFIOperandBundle(const CGCallee &Callee, + SmallVectorImpl<llvm::OperandBundleDef> &Bundles); + /// Create a basic block that will either trap or call a handler function in /// the UBSan runtime with the provided arguments, and create a conditional /// branch to it. @@ -4789,6 +4814,12 @@ public: // last (if it exists). void EmitMultiVersionResolver(llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options); + void + EmitX86MultiVersionResolver(llvm::Function *Resolver, + ArrayRef<MultiVersionResolverOption> Options); + void + EmitAArch64MultiVersionResolver(llvm::Function *Resolver, + ArrayRef<MultiVersionResolverOption> Options); private: QualType getVarArgType(const Expr *Arg); @@ -4807,7 +4838,11 @@ private: llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); llvm::Value *EmitX86CpuSupports(uint64_t Mask); llvm::Value *EmitX86CpuInit(); - llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); + llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO); + llvm::Value *EmitAArch64CpuInit(); + llvm::Value * + FormAArch64ResolverCondition(const MultiVersionResolverOption &RO); + llvm::Value *EmitAArch64CpuSupports(ArrayRef<StringRef> FeatureStrs); }; @@ -4817,9 +4852,9 @@ DominatingLLVMValue::save(CodeGenFunction &CGF, llvm::Value *value) { // Otherwise, we need an alloca. auto align = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType())); + CGF.CGM.getDataLayout().getPrefTypeAlign(value->getType())); Address alloca = - CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save"); + CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save"); CGF.Builder.CreateStore(value, alloca); return saved_type(alloca.getPointer(), true); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 4e8e120d89df..12d602fed693 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -47,6 +47,8 @@ #include "clang/CodeGen/BackendUtil.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -58,14 +60,16 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/X86TargetParser.h" +#include "llvm/Support/xxhash.h" +#include <optional> using namespace clang; using namespace CodeGen; @@ -120,9 +124,10 @@ CodeGenModule::CodeGenModule(ASTContext &C, BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); - PointerWidthInBits = C.getTargetInfo().getPointerWidth(0); + PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default); PointerAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity(); + C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default)) + .getQuantity(); SizeSizeInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = @@ -137,6 +142,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const llvm::DataLayout &DL = M.getDataLayout(); AllocaInt8PtrTy = Int8Ty->getPointerTo(DL.getAllocaAddrSpace()); GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace()); + ConstGlobalsPtrTy = Int8Ty->getPointerTo( + C.getTargetAddressSpace(GetGlobalConstantAddressSpace())); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); // Build C++20 Module initializers. @@ -179,15 +186,11 @@ CodeGenModule::CodeGenModule(ASTContext &C, if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); - if (auto E = ReaderOrErr.takeError()) { - unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, - "Could not read profile %0: %1"); - llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { - getDiags().Report(DiagID) << CodeGenOpts.ProfileInstrumentUsePath - << EI.message(); - }); - } else - PGOReader = std::move(ReaderOrErr.get()); + // We're checking for profile read errors in CompilerInvocation, so if + // there was an error it should've already been caught. If it hasn't been + // somehow, trip an assertion. + assert(ReaderOrErr); + PGOReader = std::move(ReaderOrErr.get()); } // If coverage mapping generation is enabled, create the @@ -205,22 +208,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, Path = Entry.second + Path.substr(Entry.first.size()); break; } - llvm::MD5 Md5; - Md5.update(Path); - llvm::MD5::MD5Result R; - Md5.final(R); - SmallString<32> Str; - llvm::MD5::stringifyResult(R, Str); - // Convert MD5hash to Decimal. Demangler suffixes can either contain - // numbers or characters but not both. - llvm::APInt IntHash(128, Str.str(), 16); - // Prepend "__uniq" before the hash for tools like profilers to understand - // that this symbol is of internal linkage type. The "__uniq" is the - // pre-determined prefix that is used to tell tools that this symbol was - // created with -funique-internal-linakge-symbols and the tools can strip or - // keep the prefix as needed. - ModuleNameHash = (Twine(".__uniq.") + - Twine(toString(IntHash, /* Radix = */ 10, /* Signed = */false))).str(); + ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path); } } @@ -521,7 +509,7 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, void CodeGenModule::Release() { Module *Primary = getContext().getModuleForCodeGen(); - if (CXX20ModuleInits && Primary && !Primary->isModuleMapModule()) + if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); EmitDeferred(); DeferredDecls.insert(EmittedDeferredDecls.begin(), @@ -531,6 +519,14 @@ void CodeGenModule::Release() { applyGlobalValReplacements(); applyReplacements(); emitMultiVersionFunctions(); + + if (Context.getLangOpts().IncrementalExtensions && + GlobalTopLevelStmtBlockInFlight.first) { + const TopLevelStmtDecl *TLSD = GlobalTopLevelStmtBlockInFlight.second; + GlobalTopLevelStmtBlockInFlight.first->FinishFunction(TLSD->getEndLoc()); + GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; + } + if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else @@ -560,6 +556,9 @@ void CodeGenModule::Release() { if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } + llvm::stable_sort(GlobalCtors, [](const Structor &L, const Structor &R) { + return L.LexOrder < R.LexOrder; + }); EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); EmitGlobalAnnotations(); @@ -573,6 +572,8 @@ void CodeGenModule::Release() { CodeGenFunction(*this).EmitCfiCheckFail(); CodeGenFunction(*this).EmitCfiCheckStub(); } + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) + finalizeKCFITypes(); emitAtAvailableLinkGuard(); if (Context.getTargetInfo().getTriple().isWasm()) EmitMainVoidAlias(); @@ -594,9 +595,8 @@ void CodeGenModule::Release() { } // Emit amdgpu_code_object_version module flag, which is code object version // times 100. - // ToDo: Enable module flag for all code object version when ROCm device - // library is ready. - if (getTarget().getTargetOpts().CodeObjectVersion == TargetOptions::COV_5) { + if (getTarget().getTargetOpts().CodeObjectVersion != + TargetOptions::COV_None) { getModule().addModuleFlag(llvm::Module::Error, "amdgpu_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); @@ -689,6 +689,10 @@ void CodeGenModule::Release() { // Function ID tables for EH Continuation Guard. getModule().addModuleFlag(llvm::Module::Warning, "ehcontguard", 1); } + if (Context.getLangOpts().Kernel) { + // Note if we are compiling with /kernel. + getModule().addModuleFlag(llvm::Module::Warning, "ms-kernel", 1); + } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers // FIXME: we could support it by stripping all the information introduced @@ -755,6 +759,15 @@ void CodeGenModule::Release() { CodeGenOpts.SanitizeCfiCanonicalJumpTables); } + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) { + getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1); + // KCFI assumes patchable-function-prefix is the same for all indirectly + // called functions. Store the expected offset for code generation. + if (CodeGenOpts.PatchableFunctionEntryOffset) + getModule().addModuleFlag(llvm::Module::Override, "kcfi-offset", + CodeGenOpts.PatchableFunctionEntryOffset); + } + if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. @@ -769,12 +782,12 @@ void CodeGenModule::Release() { 1); } - if (CodeGenOpts.IBTSeal) - getModule().addModuleFlag(llvm::Module::Min, "ibt-seal", 1); - if (CodeGenOpts.FunctionReturnThunks) getModule().addModuleFlag(llvm::Module::Override, "function_return_thunk_extern", 1); + if (CodeGenOpts.IndirectBranchCSPrefix) + getModule().addModuleFlag(llvm::Module::Override, "indirect_branch_cs_prefix", 1); + // Add module metadata for return address signing (ignoring // non-leaf/all) and stack tagging. These are actually turned on by function // attributes, but we use module metadata to emit build attributes. This is @@ -965,14 +978,9 @@ void CodeGenModule::EmitOpenCLMetadata() { void CodeGenModule::EmitBackendOptionsMetadata( const CodeGenOptions CodeGenOpts) { - switch (getTriple().getArch()) { - default: - break; - case llvm::Triple::riscv32: - case llvm::Triple::riscv64: + if (getTriple().isRISCV()) { getModule().addModuleFlag(llvm::Module::Error, "SmallDataLimit", CodeGenOpts.SmallDataLimit); - break; } } @@ -1101,8 +1109,6 @@ llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const { - if (GV->hasDLLImportStorageClass()) - return; // Internal definitions always have default visibility. if (GV->hasLocalLinkage()) { GV->setVisibility(llvm::GlobalValue::DefaultVisibility); @@ -1113,6 +1119,21 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, // Set visibility for definitions, and for declarations if requested globally // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); + if (GV->hasDLLExportStorageClass() || GV->hasDLLImportStorageClass()) { + // Reject incompatible dlllstorage and visibility annotations. + if (!LV.isVisibilityExplicit()) + return; + if (GV->hasDLLExportStorageClass()) { + if (LV.getVisibility() == HiddenVisibility) + getDiags().Report(D->getLocation(), + diag::err_hidden_visibility_dllexport); + } else if (LV.getVisibility() != DefaultVisibility) { + getDiags().Report(D->getLocation(), + diag::err_non_default_visibility_dllimport); + } + return; + } + if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls || !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); @@ -1320,6 +1341,20 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, Out << ".resolver"; } +static void AppendTargetVersionMangling(const CodeGenModule &CGM, + const TargetVersionAttr *Attr, + raw_ostream &Out) { + if (Attr->isDefaultVersion()) + return; + Out << "._"; + llvm::SmallVector<StringRef, 8> Feats; + Attr->getFeatures(Feats); + for (const auto &Feat : Feats) { + Out << 'M'; + Out << Feat; + } +} + static void AppendTargetMangling(const CodeGenModule &CGM, const TargetAttr *Attr, raw_ostream &Out) { if (Attr->isDefaultVersion()) @@ -1327,21 +1362,21 @@ static void AppendTargetMangling(const CodeGenModule &CGM, Out << '.'; const TargetInfo &Target = CGM.getTarget(); - ParsedTargetAttr Info = - Attr->parse([&Target](StringRef LHS, StringRef RHS) { - // Multiversioning doesn't allow "no-${feature}", so we can - // only have "+" prefixes here. - assert(LHS.startswith("+") && RHS.startswith("+") && - "Features should always have a prefix."); - return Target.multiVersionSortPriority(LHS.substr(1)) > - Target.multiVersionSortPriority(RHS.substr(1)); - }); + ParsedTargetAttr Info = Target.parseTargetAttr(Attr->getFeaturesStr()); + llvm::sort(Info.Features, [&Target](StringRef LHS, StringRef RHS) { + // Multiversioning doesn't allow "no-${feature}", so we can + // only have "+" prefixes here. + assert(LHS.startswith("+") && RHS.startswith("+") && + "Features should always have a prefix."); + return Target.multiVersionSortPriority(LHS.substr(1)) > + Target.multiVersionSortPriority(RHS.substr(1)); + }); bool IsFirst = true; - if (!Info.Architecture.empty()) { + if (!Info.CPU.empty()) { IsFirst = false; - Out << "arch_" << Info.Architecture; + Out << "arch_" << Info.CPU; } for (StringRef Feat : Info.Features) { @@ -1365,14 +1400,27 @@ static void AppendTargetClonesMangling(const CodeGenModule &CGM, const TargetClonesAttr *Attr, unsigned VersionIndex, raw_ostream &Out) { - Out << '.'; - StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); - if (FeatureStr.startswith("arch=")) - Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); - else - Out << FeatureStr; + if (CGM.getTarget().getTriple().isAArch64()) { + StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); + if (FeatureStr == "default") + return; + Out << "._"; + SmallVector<StringRef, 8> Features; + FeatureStr.split(Features, "+"); + for (auto &Feat : Features) { + Out << 'M'; + Out << Feat; + } + } else { + Out << '.'; + StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); + if (FeatureStr.startswith("arch=")) + Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); + else + Out << FeatureStr; - Out << '.' << Attr->getMangledIndex(VersionIndex); + Out << '.' << Attr->getMangledIndex(VersionIndex); + } } static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, @@ -1428,6 +1476,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out); break; + case MultiVersionKind::TargetVersion: + AppendTargetVersionMangling(CGM, FD->getAttr<TargetVersionAttr>(), Out); + break; case MultiVersionKind::TargetClones: AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(), GD.getMultiVersionIndex(), Out); @@ -1581,9 +1632,10 @@ llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) { /// AddGlobalCtor - Add a function to the list that will be called before /// main() runs. void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority, + unsigned LexOrder, llvm::Constant *AssociatedData) { // FIXME: Type coercion of void()* types. - GlobalCtors.push_back(Structor(Priority, Ctor, AssociatedData)); + GlobalCtors.push_back(Structor(Priority, LexOrder, Ctor, AssociatedData)); } /// AddGlobalDtor - Add a function to the list that will be called @@ -1597,7 +1649,7 @@ void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority, } // FIXME: Type coercion of void()* types. - GlobalDtors.push_back(Structor(Priority, Dtor, nullptr)); + GlobalDtors.push_back(Structor(Priority, ~0U, Dtor, nullptr)); } void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { @@ -1633,7 +1685,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. - list->setAlignment(llvm::None); + list->setAlignment(std::nullopt); Fns.clear(); } @@ -1666,6 +1718,20 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } +llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) { + if (auto *FnType = T->getAs<FunctionProtoType>()) + T = getContext().getFunctionType( + FnType->getReturnType(), FnType->getParamTypes(), + FnType->getExtProtoInfo().withExceptionSpec(EST_None)); + + std::string OutName; + llvm::raw_string_ostream Out(OutName); + getCXXABI().getMangleContext().mangleTypeName(T, Out); + + return llvm::ConstantInt::get(Int32Ty, + static_cast<uint32_t>(llvm::xxHash64(OutName))); +} + void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk) { @@ -1765,7 +1831,7 @@ void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn, // Get image and pipe access qualifier: if (ty->isImageType() || ty->isPipeType()) { const Decl *PDecl = parm; - if (auto *TD = dyn_cast<TypedefType>(ty)) + if (const auto *TD = ty->getAs<TypedefType>()) PDecl = TD->getDecl(); const OpenCLAccessAttr *A = PDecl->getAttr<OpenCLAccessAttr>(); if (A && A->isWriteOnly()) @@ -1935,7 +2001,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F) { llvm::AttrBuilder B(F->getContext()); - if (CodeGenOpts.UnwindTables) + if ((!D || !D->hasAttr<NoUwtableAttr>()) && CodeGenOpts.UnwindTables) B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables)); if (CodeGenOpts.StackClashProtector) @@ -1944,14 +2010,17 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); - if (!D || !D->hasAttr<NoStackProtectorAttr>()) { - if (LangOpts.getStackProtector() == LangOptions::SSPOn) - B.addAttribute(llvm::Attribute::StackProtect); - else if (LangOpts.getStackProtector() == LangOptions::SSPStrong) - B.addAttribute(llvm::Attribute::StackProtectStrong); - else if (LangOpts.getStackProtector() == LangOptions::SSPReq) - B.addAttribute(llvm::Attribute::StackProtectReq); - } + if (D && D->hasAttr<NoStackProtectorAttr>()) + ; // Do nothing. + else if (D && D->hasAttr<StrictGuardStackCheckAttr>() && + LangOpts.getStackProtector() == LangOptions::SSPOn) + B.addAttribute(llvm::Attribute::StackProtectStrong); + else if (LangOpts.getStackProtector() == LangOptions::SSPOn) + B.addAttribute(llvm::Attribute::StackProtect); + else if (LangOpts.getStackProtector() == LangOptions::SSPStrong) + B.addAttribute(llvm::Attribute::StackProtectStrong); + else if (LangOpts.getStackProtector() == LangOptions::SSPReq) + B.addAttribute(llvm::Attribute::StackProtectReq); if (!D) { // If we don't have a declaration to control inlining, the function isn't @@ -2131,10 +2200,12 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr; + const auto *TV = FD ? FD->getAttr<TargetVersionAttr>() : nullptr; + assert((!TD || !TV) && "both target_version and target specified"); const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr; bool AddedAttr = false; - if (TD || SD || TC) { + if (TD || TV || SD || TC) { llvm::StringMap<bool> FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); @@ -2147,10 +2218,11 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // get and parse the target attribute so we can get the cpu for // the function. if (TD) { - ParsedTargetAttr ParsedAttr = TD->parse(); - if (!ParsedAttr.Architecture.empty() && - getTarget().isValidCPUName(ParsedAttr.Architecture)) { - TargetCPU = ParsedAttr.Architecture; + ParsedTargetAttr ParsedAttr = + Target.parseTargetAttr(TD->getFeaturesStr()); + if (!ParsedAttr.CPU.empty() && + getTarget().isValidCPUName(ParsedAttr.CPU)) { + TargetCPU = ParsedAttr.CPU; TuneCPU = ""; // Clear the tune CPU. } if (!ParsedAttr.Tune.empty() && @@ -2280,6 +2352,57 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } +void CodeGenModule::setKCFIType(const FunctionDecl *FD, llvm::Function *F) { + if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic()) + return; + + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + F->setMetadata(llvm::LLVMContext::MD_kcfi_type, + llvm::MDNode::get( + Ctx, MDB.createConstant(CreateKCFITypeId(FD->getType())))); +} + +static bool allowKCFIIdentifier(StringRef Name) { + // KCFI type identifier constants are only necessary for external assembly + // functions, which means it's safe to skip unusual names. Subset of + // MCAsmInfo::isAcceptableChar() and MCAsmInfoXCOFF::isAcceptableChar(). + return llvm::all_of(Name, [](const char &C) { + return llvm::isAlnum(C) || C == '_' || C == '.'; + }); +} + +void CodeGenModule::finalizeKCFITypes() { + llvm::Module &M = getModule(); + for (auto &F : M.functions()) { + // Remove KCFI type metadata from non-address-taken local functions. + bool AddressTaken = F.hasAddressTaken(); + if (!AddressTaken && F.hasLocalLinkage()) + F.eraseMetadata(llvm::LLVMContext::MD_kcfi_type); + + // Generate a constant with the expected KCFI type identifier for all + // address-taken function declarations to support annotating indirectly + // called assembly functions. + if (!AddressTaken || !F.isDeclaration()) + continue; + + const llvm::ConstantInt *Type; + if (const llvm::MDNode *MD = F.getMetadata(llvm::LLVMContext::MD_kcfi_type)) + Type = llvm::mdconst::extract<llvm::ConstantInt>(MD->getOperand(0)); + else + continue; + + StringRef Name = F.getName(); + if (!allowKCFIIdentifier(Name)) + continue; + + std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + + Name + ", " + Twine(Type->getZExtValue()) + "\n") + .str(); + M.appendModuleInlineAsm(Asm); + } +} + void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, bool IsThunk) { @@ -2362,9 +2485,15 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, !CodeGenOpts.SanitizeCfiCanonicalJumpTables) CreateFunctionTypeMetadataForIcall(FD, F); + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) + setKCFIType(FD, F); + if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); + if (CodeGenOpts.InlineMaxStackSize != UINT_MAX) + F->addFnAttr("inline-max-stacksize", llvm::utostr(CodeGenOpts.InlineMaxStackSize)); + if (const auto *CB = FD->getAttr<CallbackAttr>()) { // Annotate the callback behavior as metadata: // - The callback callee (as argument number). @@ -2521,21 +2650,23 @@ void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) { // source, first Global Module Fragments, if present. if (auto GMF = Primary->getGlobalModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(GMF)) { - assert(D->getKind() == Decl::Var && "GMF initializer decl is not a var?"); + if (isa<ImportDecl>(D)) + continue; + assert(isa<VarDecl>(D) && "GMF initializer decl is not a var?"); EmitTopLevelDecl(D); } } // Second any associated with the module, itself. for (Decl *D : getContext().getModuleInitializers(Primary)) { // Skip import decls, the inits for those are called explicitly. - if (D->getKind() == Decl::Import) + if (isa<ImportDecl>(D)) continue; EmitTopLevelDecl(D); } // Third any associated with the Privat eMOdule Fragment, if present. if (auto PMF = Primary->getPrivateModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(PMF)) { - assert(D->getKind() == Decl::Var && "PMF initializer decl is not a var?"); + assert(isa<VarDecl>(D) && "PMF initializer decl is not a var?"); EmitTopLevelDecl(D); } } @@ -2719,9 +2850,10 @@ llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) { // Not found yet, create a new global. llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str); - auto *gv = - new llvm::GlobalVariable(getModule(), s->getType(), true, - llvm::GlobalValue::PrivateLinkage, s, ".str"); + auto *gv = new llvm::GlobalVariable( + getModule(), s->getType(), true, llvm::GlobalValue::PrivateLinkage, s, + ".str", nullptr, llvm::GlobalValue::NotThreadLocal, + ConstGlobalsPtrTy->getAddressSpace()); gv->setSection(AnnotationSection); gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); AStr = gv; @@ -2747,7 +2879,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) { llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { ArrayRef<Expr *> Exprs = {Attr->args_begin(), Attr->args_size()}; if (Exprs.empty()) - return llvm::ConstantPointerNull::get(GlobalsInt8PtrTy); + return llvm::ConstantPointerNull::get(ConstGlobalsPtrTy); llvm::FoldingSetNodeID ID; for (Expr *E : Exprs) { @@ -2797,8 +2929,8 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[] = { llvm::ConstantExpr::getBitCast(GVInGlobalsAS, GlobalsInt8PtrTy), - llvm::ConstantExpr::getBitCast(AnnoGV, GlobalsInt8PtrTy), - llvm::ConstantExpr::getBitCast(UnitGV, GlobalsInt8PtrTy), + llvm::ConstantExpr::getBitCast(AnnoGV, ConstGlobalsPtrTy), + llvm::ConstantExpr::getBitCast(UnitGV, ConstGlobalsPtrTy), LineNoCst, Args, }; @@ -2890,46 +3022,44 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, return true; } -bool CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn, - SourceLocation Loc) const { +ProfileList::ExclusionType +CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn, + SourceLocation Loc) const { const auto &ProfileList = getContext().getProfileList(); // If the profile list is empty, then instrument everything. if (ProfileList.isEmpty()) - return false; + return ProfileList::Allow; CodeGenOptions::ProfileInstrKind Kind = getCodeGenOpts().getProfileInstr(); // First, check the function name. - Optional<bool> V = ProfileList.isFunctionExcluded(Fn->getName(), Kind); - if (V) + if (auto V = ProfileList.isFunctionExcluded(Fn->getName(), Kind)) return *V; // Next, check the source location. - if (Loc.isValid()) { - Optional<bool> V = ProfileList.isLocationExcluded(Loc, Kind); - if (V) + if (Loc.isValid()) + if (auto V = ProfileList.isLocationExcluded(Loc, Kind)) return *V; - } // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); - if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - Optional<bool> V = ProfileList.isFileExcluded(MainFile->getName(), Kind); - if (V) + if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) + if (auto V = ProfileList.isFileExcluded(MainFile->getName(), Kind)) return *V; - } - return ProfileList.getDefault(); + return ProfileList.getDefault(Kind); } -bool CodeGenModule::isFunctionBlockedFromProfileInstr( - llvm::Function *Fn, SourceLocation Loc) const { - if (isFunctionBlockedByProfileList(Fn, Loc)) - return true; +ProfileList::ExclusionType +CodeGenModule::isFunctionBlockedFromProfileInstr(llvm::Function *Fn, + SourceLocation Loc) const { + auto V = isFunctionBlockedByProfileList(Fn, Loc); + if (V != ProfileList::Allow) + return V; auto NumGroups = getCodeGenOpts().ProfileTotalFunctionGroups; if (NumGroups > 1) { auto Group = llvm::crc32(arrayRefFromStringRef(Fn->getName())) % NumGroups; if (Group != getCodeGenOpts().ProfileSelectedFunctionGroup) - return true; + return ProfileList::Skip; } - return false; + return ProfileList::Allow; } bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { @@ -2955,7 +3085,7 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // we have if the level of the declare target attribute is -1. Note that we // check somewhere else if we should emit this at all. if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd) { - llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = + std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = OMPDeclareTargetDeclAttr::getActiveAttr(Global); if (!ActiveAttr || (*ActiveAttr)->getLevel() != (unsigned)-1) return false; @@ -3113,7 +3243,7 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { // See if there is already something with the target's name in the module. llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) { - unsigned AS = getContext().getTargetAddressSpace(VD->getType()); + unsigned AS = getTypes().getTargetAddressSpace(VD->getType()); auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS)); return ConstantAddress(Ptr, DeclTy, Alignment); } @@ -3219,16 +3349,18 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. - if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && UnifiedMemoryEnabled)) && "Link clause or to clause with unified memory expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -3271,6 +3403,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); addDeferredDeclToEmit(GD); + EmittedDeferredDecls[MangledName] = GD; } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into @@ -3526,12 +3659,18 @@ static unsigned TargetMVPriority(const TargetInfo &TI, const CodeGenFunction::MultiVersionResolverOption &RO) { unsigned Priority = 0; - for (StringRef Feat : RO.Conditions.Features) + unsigned NumFeatures = 0; + for (StringRef Feat : RO.Conditions.Features) { Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); + NumFeatures++; + } if (!RO.Conditions.Architecture.empty()) Priority = std::max( Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); + + Priority += TI.multiVersionFeatureCost() * NumFeatures; + return Priority; } @@ -3576,13 +3715,19 @@ void CodeGenModule::emitMultiVersionFunctions() { } assert(Func && "This should have just been created"); } - - const auto *TA = CurFD->getAttr<TargetAttr>(); - llvm::SmallVector<StringRef, 8> Feats; - TA->getAddedFeatures(Feats); - - Options.emplace_back(cast<llvm::Function>(Func), - TA->getArchitecture(), Feats); + if (CurFD->getMultiVersionKind() == MultiVersionKind::Target) { + const auto *TA = CurFD->getAttr<TargetAttr>(); + llvm::SmallVector<StringRef, 8> Feats; + TA->getAddedFeatures(Feats); + Options.emplace_back(cast<llvm::Function>(Func), + TA->getArchitecture(), Feats); + } else { + const auto *TVA = CurFD->getAttr<TargetVersionAttr>(); + llvm::SmallVector<StringRef, 8> Feats; + TVA->getFeatures(Feats); + Options.emplace_back(cast<llvm::Function>(Func), + /*Architecture*/ "", Feats); + } }); } else if (FD->isTargetClonesMultiVersion()) { const auto *TC = FD->getAttr<TargetClonesAttr>(); @@ -3612,10 +3757,19 @@ void CodeGenModule::emitMultiVersionFunctions() { StringRef Architecture; llvm::SmallVector<StringRef, 1> Feature; - if (Version.startswith("arch=")) - Architecture = Version.drop_front(sizeof("arch=") - 1); - else if (Version != "default") - Feature.push_back(Version); + if (getTarget().getTriple().isAArch64()) { + if (Version != "default") { + llvm::SmallVector<StringRef, 8> VerFeats; + Version.split(VerFeats, "+"); + for (auto &CurFeat : VerFeats) + Feature.push_back(CurFeat.trim()); + } + } else { + if (Version.startswith("arch=")) + Architecture = Version.drop_front(sizeof("arch=") - 1); + else if (Version != "default") + Feature.push_back(Version); + } Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature); } @@ -3675,7 +3829,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { if (getTarget().supportsIFunc()) { ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), + getTypes().getTargetAddressSpace(FD->getType())), false); } else { @@ -3813,8 +3967,8 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { // cpu_dispatch will be emitted in this translation unit. if (getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion()) { llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get( - DeclTy, getContext().getTargetAddressSpace(FD->getType())), + llvm::PointerType::get(DeclTy, + getTypes().getTargetAddressSpace(FD->getType())), false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, @@ -3917,7 +4071,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo()); + return llvm::ConstantExpr::getBitCast( + Entry, Ty->getPointerTo(Entry->getAddressSpace())); } // This function doesn't have a complete type (for example, the return @@ -3958,7 +4113,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( } llvm::Constant *BC = llvm::ConstantExpr::getBitCast( - F, Entry->getValueType()->getPointerTo()); + F, Entry->getValueType()->getPointerTo(Entry->getAddressSpace())); addGlobalValReplacement(Entry, BC); } @@ -3974,7 +4129,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base // dtors on usage, even if there is no dtor definition in the TU. - if (D && isa<CXXDestructorDecl>(D) && + if (isa_and_nonnull<CXXDestructorDecl>(D) && getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D), GD.getDtorType())) addDeferredDeclToEmit(GD); @@ -3988,6 +4143,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). addDeferredDeclToEmit(DDI->second); + EmittedDeferredDecls[DDI->first] = DDI->second; DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're @@ -4021,8 +4177,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( return F; } - llvm::Type *PTy = llvm::PointerType::getUnqual(Ty); - return llvm::ConstantExpr::getBitCast(F, PTy); + return llvm::ConstantExpr::getBitCast(F, + Ty->getPointerTo(F->getAddressSpace())); } /// GetAddrOfFunction - Return the address of the given function. If Ty is @@ -4071,8 +4227,9 @@ llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) { llvm::GlobalValue *F = cast<llvm::GlobalValue>(GetAddrOfFunction(Decl)->stripPointerCasts()); - return llvm::ConstantExpr::getBitCast(llvm::NoCFIValue::get(F), - llvm::Type::getInt8PtrTy(VMContext)); + return llvm::ConstantExpr::getBitCast( + llvm::NoCFIValue::get(F), + llvm::Type::getInt8PtrTy(VMContext, F->getAddressSpace())); } static const FunctionDecl * @@ -4269,6 +4426,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). addDeferredDeclToEmit(DDI->second); + EmittedDeferredDecls[DDI->first] = DDI->second; DeferredDecls.erase(DDI); } @@ -4408,7 +4566,7 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage, - unsigned Alignment) { + llvm::Align Alignment) { llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name); llvm::GlobalVariable *OldGV = nullptr; @@ -4444,7 +4602,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); - GV->setAlignment(llvm::MaybeAlign(Alignment)); + GV->setAlignment(Alignment); return GV; } @@ -4673,13 +4831,19 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, llvm::TrackingVH<llvm::Constant> Init; bool NeedsGlobalCtor = false; + // Whether the definition of the variable is available externally. + // If yes, we shouldn't emit the GloablCtor and GlobalDtor for the variable + // since this is the job for its original source. + bool IsDefinitionAvailableExternally = + getContext().GetGVALinkageForVariable(D) == GVA_AvailableExternally; bool NeedsGlobalDtor = + !IsDefinitionAvailableExternally && D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); - Optional<ConstantEmitter> emitter; + std::optional<ConstantEmitter> emitter; // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for @@ -4727,7 +4891,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, if (InitDecl->hasFlexibleArrayInit(getContext())) ErrorUnsupported(D, "flexible array initializer"); Init = EmitNullConstant(T); - NeedsGlobalCtor = true; + + if (!IsDefinitionAvailableExternally) + NeedsGlobalCtor = true; } else { ErrorUnsupported(D, "static initializer"); Init = llvm::UndefValue::get(getTypes().ConvertType(T)); @@ -4837,7 +5003,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, CharUnits AlignVal = getContext().getDeclAlign(D); // Check for alignment specifed in an 'omp allocate' directive. - if (llvm::Optional<CharUnits> AlignValFromAllocate = + if (std::optional<CharUnits> AlignValFromAllocate = getOMPAllocateAlignment(D)) AlignVal = *AlignValFromAllocate; GV->setAlignment(AlignVal.getAsAlign()); @@ -5331,7 +5497,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { // Emit global alias debug information. if (isa<VarDecl>(D)) if (CGDebugInfo *DI = getModuleDebugInfo()) - DI->EmitGlobalAlias(cast<llvm::GlobalValue>(GA->getAliasee()), GD); + DI->EmitGlobalAlias(cast<llvm::GlobalValue>(GA->getAliasee()->stripPointerCasts()), GD); } void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { @@ -5468,7 +5634,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { switch (CFRuntime) { default: break; - case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH; + case LangOptions::CoreFoundationABI::Swift: [[fallthrough]]; case LangOptions::CoreFoundationABI::Swift5_0: CFConstantStringClassName = Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN" @@ -5546,7 +5712,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { // String pointer. llvm::Constant *C = nullptr; if (isUTF16) { - auto Arr = llvm::makeArrayRef( + auto Arr = llvm::ArrayRef( reinterpret_cast<uint16_t *>(const_cast<char *>(Entry.first().data())), Entry.first().size() / 2); C = llvm::ConstantDataArray::get(VMContext, Arr); @@ -5891,7 +6057,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( LangAS AddrSpace = VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); - Optional<ConstantEmitter> emitter; + std::optional<ConstantEmitter> emitter; llvm::Constant *InitialValue = nullptr; bool Constant = false; llvm::Type *Type; @@ -5929,10 +6095,13 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); - setGVProperties(GV, VD); - if (GV->getDLLStorageClass() == llvm::GlobalVariable::DLLExportStorageClass) - // The reference temporary should never be dllexport. - GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); + // Don't assign dllimport or dllexport to local linkage globals. + if (!llvm::GlobalValue::isLocalLinkage(Linkage)) { + setGVProperties(GV, VD); + if (GV->getDLLStorageClass() == llvm::GlobalVariable::DLLExportStorageClass) + // The reference temporary should never be dllexport. + GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); + } GV->setAlignment(Align.getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -6058,6 +6227,39 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { EmitDeclContext(LSD); } +void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) { + std::unique_ptr<CodeGenFunction> &CurCGF = + GlobalTopLevelStmtBlockInFlight.first; + + // We emitted a top-level stmt but after it there is initialization. + // Stop squashing the top-level stmts into a single function. + if (CurCGF && CXXGlobalInits.back() != CurCGF->CurFn) { + CurCGF->FinishFunction(D->getEndLoc()); + CurCGF = nullptr; + } + + if (!CurCGF) { + // void __stmts__N(void) + // FIXME: Ask the ABI name mangler to pick a name. + std::string Name = "__stmts__" + llvm::utostr(CXXGlobalInits.size()); + FunctionArgList Args; + QualType RetTy = getContext().VoidTy; + const CGFunctionInfo &FnInfo = + getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); + llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo); + llvm::Function *Fn = llvm::Function::Create( + FnTy, llvm::GlobalValue::InternalLinkage, Name, &getModule()); + + CurCGF.reset(new CodeGenFunction(*this)); + GlobalTopLevelStmtBlockInFlight.second = D; + CurCGF->StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, + D->getBeginLoc(), D->getBeginLoc()); + CXXGlobalInits.push_back(Fn); + } + + CurCGF->EmitStmt(D->getStmt()); +} + void CodeGenModule::EmitDeclContext(const DeclContext *DC) { for (auto *I : DC->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope @@ -6125,7 +6327,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { TSK_ExplicitInstantiationDefinition && Spec->hasDefinition()) DI->completeTemplateDefinition(*Spec); - } LLVM_FALLTHROUGH; + } [[fallthrough]]; case Decl::CXXRecord: { CXXRecordDecl *CRD = cast<CXXRecordDecl>(D); if (CGDebugInfo *DI = getModuleDebugInfo()) { @@ -6267,6 +6469,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { break; } + case Decl::TopLevelStmt: + EmitTopLevelStmt(cast<TopLevelStmtDecl>(D)); + break; + case Decl::Import: { auto *Import = cast<ImportDecl>(D); @@ -6363,6 +6569,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { DI->EmitAndRetainType(getContext().getEnumType(cast<EnumDecl>(D))); break; + case Decl::HLSLBuffer: + getHLSLRuntime().addBuffer(cast<HLSLBufferDecl>(D)); + break; + default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 5fbcc5ad1f5f..b3354657b237 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -36,6 +36,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Transforms/Utils/SanitizerStats.h" +#include <optional> namespace llvm { class Module; @@ -282,12 +283,15 @@ class CodeGenModule : public CodeGenTypeCache { public: struct Structor { - Structor() : Priority(0), Initializer(nullptr), AssociatedData(nullptr) {} - Structor(int Priority, llvm::Constant *Initializer, + Structor() + : Priority(0), LexOrder(~0u), Initializer(nullptr), + AssociatedData(nullptr) {} + Structor(int Priority, unsigned LexOrder, llvm::Constant *Initializer, llvm::Constant *AssociatedData) - : Priority(Priority), Initializer(Initializer), + : Priority(Priority), LexOrder(LexOrder), Initializer(Initializer), AssociatedData(AssociatedData) {} int Priority; + unsigned LexOrder; llvm::Constant *Initializer; llvm::Constant *AssociatedData; }; @@ -588,6 +592,11 @@ private: llvm::DenseMap<const llvm::Constant *, llvm::GlobalVariable *> RTTIProxyMap; + // Helps squashing blocks of TopLevelStmtDecl into a single llvm::Function + // when used with -fincremental-extensions. + std::pair<std::unique_ptr<CodeGenFunction>, const TopLevelStmtDecl *> + GlobalTopLevelStmtBlockInFlight; + public: CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, const HeaderSearchOptions &headersearchopts, @@ -712,7 +721,8 @@ public: llvm::MDNode *getNoObjCARCExceptionsMetadata() { if (!NoObjCARCExceptionsMetadata) - NoObjCARCExceptionsMetadata = llvm::MDNode::get(getLLVMContext(), None); + NoObjCARCExceptionsMetadata = + llvm::MDNode::get(getLLVMContext(), std::nullopt); return NoObjCARCExceptionsMetadata; } @@ -751,6 +761,10 @@ public: return VTables.getItaniumVTableContext(); } + const ItaniumVTableContext &getItaniumVTableContext() const { + return VTables.getItaniumVTableContext(); + } + MicrosoftVTableContext &getMicrosoftVTableContext() { return VTables.getMicrosoftVTableContext(); } @@ -867,7 +881,7 @@ public: llvm::GlobalVariable * CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage, - unsigned Alignment); + llvm::Align Alignment); llvm::Function *CreateGlobalInitOrCleanUpFunction( llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, @@ -1076,7 +1090,8 @@ public: llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID); - llvm::Function *getIntrinsic(unsigned IID, ArrayRef<llvm::Type*> Tys = None); + llvm::Function *getIntrinsic(unsigned IID, + ArrayRef<llvm::Type *> Tys = std::nullopt); /// Emit code for a single top level declaration. void EmitTopLevelDecl(Decl *D); @@ -1351,13 +1366,14 @@ public: /// \returns true if \p Fn at \p Loc should be excluded from profile /// instrumentation by the SCL passed by \p -fprofile-list. - bool isFunctionBlockedByProfileList(llvm::Function *Fn, - SourceLocation Loc) const; + ProfileList::ExclusionType + isFunctionBlockedByProfileList(llvm::Function *Fn, SourceLocation Loc) const; /// \returns true if \p Fn at \p Loc should be excluded from profile /// instrumentation. - bool isFunctionBlockedFromProfileInstr(llvm::Function *Fn, - SourceLocation Loc) const; + ProfileList::ExclusionType + isFunctionBlockedFromProfileInstr(llvm::Function *Fn, + SourceLocation Loc) const; SanitizerMetadata *getSanitizerMetadata() { return SanitizerMD.get(); @@ -1406,7 +1422,7 @@ public: void EmitOMPAllocateDecl(const OMPAllocateDecl *D); /// Return the alignment specified in an allocate directive, if present. - llvm::Optional<CharUnits> getOMPAllocateAlignment(const VarDecl *VD); + std::optional<CharUnits> getOMPAllocateAlignment(const VarDecl *VD); /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable @@ -1433,9 +1449,14 @@ public: llvm::GlobalVariable *VTable, const VTableLayout &VTLayout); + llvm::Type *getVTableComponentType() const; + /// Generate a cross-DSO type identifier for MD. llvm::ConstantInt *CreateCrossDsoCfiTypeId(llvm::Metadata *MD); + /// Generate a KCFI type identifier for T. + llvm::ConstantInt *CreateKCFITypeId(QualType T); + /// Create a metadata identifier for the given type. This may either be an /// MDString (for external identifiers) or a distinct unnamed MDNode (for /// internal identifiers). @@ -1454,9 +1475,16 @@ public: void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F); + /// Set type metadata to the given function. + void setKCFIType(const FunctionDecl *FD, llvm::Function *F); + + /// Emit KCFI type identifier constants and remove unused identifiers. + void finalizeKCFITypes(); + /// Whether this function's return type has no side effects, and thus may /// be trivially discarded if it is unused. - bool MayDropFunctionReturn(const ASTContext &Context, QualType ReturnType); + bool MayDropFunctionReturn(const ASTContext &Context, + QualType ReturnType) const; /// Returns whether this module needs the "all-vtables" type identifier. bool NeedAllVtablesTypeId() const; @@ -1577,6 +1605,7 @@ private: void EmitDeclContext(const DeclContext *DC); void EmitLinkageSpec(const LinkageSpecDecl *D); + void EmitTopLevelStmt(const TopLevelStmtDecl *D); /// Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); @@ -1601,6 +1630,7 @@ private: // FIXME: Hardcoding priority here is gross. void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535, + unsigned LexOrder = ~0U, llvm::Constant *AssociatedData = nullptr); void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535, bool IsDtorAttrFunc = false); diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 587bcef78ee5..15a3d74666ca 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" +#include <optional> static llvm::cl::opt<bool> EnableValueProfiling("enable-value-profiling", @@ -755,7 +756,7 @@ void PGOHash::combine(HashType Type) { if (Count && Count % NumTypesPerWord == 0) { using namespace llvm::support; uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working); - MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); + MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); Working = 0; } @@ -781,7 +782,7 @@ uint64_t PGOHash::finalize() { } else { using namespace llvm::support; uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working); - MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); + MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); } } @@ -822,6 +823,8 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { CGM.ClearUnusedCoverageMapping(D); if (Fn->hasFnAttribute(llvm::Attribute::NoProfile)) return; + if (Fn->hasFnAttribute(llvm::Attribute::SkipProfile)) + return; setFuncName(Fn); @@ -963,11 +966,11 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, Builder.getInt32(Counter), StepV}; if (!StepV) Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), - makeArrayRef(Args, 4)); + ArrayRef(Args, 4)); else Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step), - makeArrayRef(Args)); + ArrayRef(Args)); } void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) { @@ -1114,7 +1117,7 @@ CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond, uint64_t LoopCount) const { if (!PGO.haveRegionCounts()) return nullptr; - Optional<uint64_t> CondCount = PGO.getStmtCount(Cond); + std::optional<uint64_t> CondCount = PGO.getStmtCount(Cond); if (!CondCount || *CondCount == 0) return nullptr; return createProfileWeights(LoopCount, diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h index f740692ac205..66c93cba4bb0 100644 --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include <array> #include <memory> +#include <optional> namespace clang { namespace CodeGen { @@ -59,12 +60,12 @@ public: /// Check if an execution count is known for a given statement. If so, return /// true and put the value in Count; else return false. - Optional<uint64_t> getStmtCount(const Stmt *S) const { + std::optional<uint64_t> getStmtCount(const Stmt *S) const { if (!StmtCountMap) - return None; + return std::nullopt; auto I = StmtCountMap->find(S); if (I == StmtCountMap->end()) - return None; + return std::nullopt; return I->second; } diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 0cb63fbbe9e5..395ed7b1d703 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -338,7 +338,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { using TBAAStructField = llvm::MDBuilder::TBAAStructField; SmallVector<TBAAStructField, 4> Fields; if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - // Handle C++ base classes. Non-virtual bases can treated a a kind of + // Handle C++ base classes. Non-virtual bases can treated a kind of // field. Virtual bases are more complex and omitted, but avoid an // incomplete view for NewStructPathTBAA. if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h index 577f88367a3a..e848dc3b449c 100644 --- a/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/clang/lib/CodeGen/CodeGenTypeCache.h @@ -75,6 +75,9 @@ struct CodeGenTypeCache { llvm::PointerType *GlobalsInt8PtrTy; }; + /// void* in the address space for constant globals + llvm::PointerType *ConstGlobalsPtrTy; + /// The size and alignment of the builtin C type 'int'. This comes /// up enough in various ABI lowering tasks to be worth pre-computing. union { diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index fcce424747f1..abbf71daf1d5 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -67,7 +67,7 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD, if (RD->getDeclContext()) RD->printQualifiedName(OS, Policy); else - RD->printName(OS); + RD->printName(OS, Policy); } else if (const TypedefNameDecl *TDD = RD->getTypedefNameForAnonDecl()) { // FIXME: We should not have to check for a null decl context here. // Right now we do it because the implicit Obj-C decls don't have one. @@ -655,7 +655,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { const ReferenceType *RTy = cast<ReferenceType>(Ty); QualType ETy = RTy->getPointeeType(); llvm::Type *PointeeType = ConvertTypeForMem(ETy); - unsigned AS = Context.getTargetAddressSpace(ETy); + unsigned AS = getTargetAddressSpace(ETy); ResultType = llvm::PointerType::get(PointeeType, AS); break; } @@ -665,7 +665,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { llvm::Type *PointeeType = ConvertTypeForMem(ETy); if (PointeeType->isVoidTy()) PointeeType = llvm::Type::getInt8Ty(getLLVMContext()); - unsigned AS = Context.getTargetAddressSpace(ETy); + unsigned AS = getTargetAddressSpace(ETy); ResultType = llvm::PointerType::get(PointeeType, AS); break; } @@ -772,10 +772,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { // Block pointers lower to function type. For function type, // getTargetAddressSpace() returns default address space for // function pointer i.e. program address space. Therefore, for block - // pointers, it is important to pass qualifiers when calling - // getTargetAddressSpace(), to ensure that we get the address space - // for data pointers and not function pointers. - unsigned AS = Context.getTargetAddressSpace(FTy.getQualifiers()); + // pointers, it is important to pass the pointee AST address space when + // calling getTargetAddressSpace(), to ensure that we get the LLVM IR + // address space for data pointers and not function pointers. + unsigned AS = Context.getTargetAddressSpace(FTy.getAddressSpace()); ResultType = llvm::PointerType::get(PointeeType, AS); break; } @@ -807,8 +807,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType, llvm::ArrayType::get(CGM.Int8Ty, (atomicSize - valueSize) / 8) }; - ResultType = llvm::StructType::get(getLLVMContext(), - llvm::makeArrayRef(elts)); + ResultType = + llvm::StructType::get(getLLVMContext(), llvm::ArrayRef(elts)); } break; } @@ -958,3 +958,13 @@ bool CodeGenTypes::isZeroInitializable(QualType T) { bool CodeGenTypes::isZeroInitializable(const RecordDecl *RD) { return getCGRecordLayout(RD).isZeroInitializable(); } + +unsigned CodeGenTypes::getTargetAddressSpace(QualType T) const { + // Return the address space for the type. If the type is a + // function type without an address space qualifier, the + // program address space is used. Otherwise, the target picks + // the best address space based on the type information + return T->isFunctionType() && !T.hasAddressSpace() + ? getDataLayout().getProgramAddressSpace() + : getContext().getTargetAddressSpace(T.getAddressSpace()); +} diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index cd20563cbf75..e76fda95513f 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -109,6 +109,7 @@ public: const llvm::DataLayout &getDataLayout() const { return TheModule.getDataLayout(); } + CodeGenModule &getCGM() const { return CGM; } ASTContext &getContext() const { return Context; } const ABIInfo &getABIInfo() const { return TheABIInfo; } const TargetInfo &getTarget() const { return Target; } @@ -305,7 +306,7 @@ public: // These are internal details of CGT that shouldn't be used externally. bool isRecordBeingLaidOut(const Type *Ty) const { return RecordsBeingLaidOut.count(Ty); } - + unsigned getTargetAddressSpace(QualType T) const; }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h index 188b82e56f53..1a7a181ca7f0 100644 --- a/clang/lib/CodeGen/ConstantEmitter.h +++ b/clang/lib/CodeGen/ConstantEmitter.h @@ -67,6 +67,9 @@ public: return Abstract; } + bool isInConstantContext() const { return InConstantContext; } + void setInConstantContext(bool var) { InConstantContext = var; } + /// Try to emit the initiaizer of the given declaration as an abstract /// constant. If this succeeds, the emission must be finalized. llvm::Constant *tryEmitForInitializer(const VarDecl &D); diff --git a/clang/lib/CodeGen/ConstantInitBuilder.cpp b/clang/lib/CodeGen/ConstantInitBuilder.cpp index 06d3e44f01b1..3cf69f3b6415 100644 --- a/clang/lib/CodeGen/ConstantInitBuilder.cpp +++ b/clang/lib/CodeGen/ConstantInitBuilder.cpp @@ -209,8 +209,7 @@ ConstantAggregateBuilderBase::addPlaceholderWithSize(llvm::Type *type) { // Advance the offset past that field. auto &layout = Builder.CGM.getDataLayout(); if (!Packed) - offset = offset.alignTo(CharUnits::fromQuantity( - layout.getABITypeAlignment(type))); + offset = offset.alignTo(CharUnits::fromQuantity(layout.getABITypeAlign(type))); offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type)); CachedOffsetEnd = Builder.Buffer.size(); @@ -249,8 +248,8 @@ CharUnits ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const{ "cannot compute offset when a placeholder is present"); llvm::Type *elementType = element->getType(); if (!Packed) - offset = offset.alignTo(CharUnits::fromQuantity( - layout.getABITypeAlignment(elementType))); + offset = offset.alignTo( + CharUnits::fromQuantity(layout.getABITypeAlign(elementType))); offset += CharUnits::fromQuantity(layout.getTypeStoreSize(elementType)); } while (++cacheEnd != end); } @@ -268,7 +267,7 @@ llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy) { assert((Begin < buffer.size() || (Begin == buffer.size() && eltTy)) && "didn't add any array elements without element type"); - auto elts = llvm::makeArrayRef(buffer).slice(Begin); + auto elts = llvm::ArrayRef(buffer).slice(Begin); if (!eltTy) eltTy = elts[0]->getType(); auto type = llvm::ArrayType::get(eltTy, elts.size()); auto constant = llvm::ConstantArray::get(type, elts); @@ -281,7 +280,7 @@ ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) { markFinished(); auto &buffer = getBuffer(); - auto elts = llvm::makeArrayRef(buffer).slice(Begin); + auto elts = llvm::ArrayRef(buffer).slice(Begin); if (ty == nullptr && elts.empty()) ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed); diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 0fe084b628da..101cd6a67b49 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -17,7 +17,6 @@ #include "clang/Basic/FileManager.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Lexer.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ProfileData/Coverage/CoverageMapping.h" @@ -26,6 +25,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include <optional> // This selects the coverage mapping format defined when `InstrProfData.inc` // is textually included. @@ -97,27 +97,29 @@ class SourceMappingRegion { Counter Count; /// Secondary Counter used for Branch Regions for "False" branches. - Optional<Counter> FalseCount; + std::optional<Counter> FalseCount; /// The region's starting location. - Optional<SourceLocation> LocStart; + std::optional<SourceLocation> LocStart; /// The region's ending location. - Optional<SourceLocation> LocEnd; + std::optional<SourceLocation> LocEnd; /// Whether this region is a gap region. The count from a gap region is set /// as the line execution count if there are no other regions on the line. bool GapRegion; public: - SourceMappingRegion(Counter Count, Optional<SourceLocation> LocStart, - Optional<SourceLocation> LocEnd, bool GapRegion = false) + SourceMappingRegion(Counter Count, std::optional<SourceLocation> LocStart, + std::optional<SourceLocation> LocEnd, + bool GapRegion = false) : Count(Count), LocStart(LocStart), LocEnd(LocEnd), GapRegion(GapRegion) { } - SourceMappingRegion(Counter Count, Optional<Counter> FalseCount, - Optional<SourceLocation> LocStart, - Optional<SourceLocation> LocEnd, bool GapRegion = false) + SourceMappingRegion(Counter Count, std::optional<Counter> FalseCount, + std::optional<SourceLocation> LocStart, + std::optional<SourceLocation> LocEnd, + bool GapRegion = false) : Count(Count), FalseCount(FalseCount), LocStart(LocStart), LocEnd(LocEnd), GapRegion(GapRegion) {} @@ -325,24 +327,24 @@ public: /// Get the coverage mapping file ID for \c Loc. /// - /// If such file id doesn't exist, return None. - Optional<unsigned> getCoverageFileID(SourceLocation Loc) { + /// If such file id doesn't exist, return std::nullopt. + std::optional<unsigned> getCoverageFileID(SourceLocation Loc) { auto Mapping = FileIDMapping.find(SM.getFileID(Loc)); if (Mapping != FileIDMapping.end()) return Mapping->second.first; - return None; + return std::nullopt; } /// This shrinks the skipped range if it spans a line that contains a /// non-comment token. If shrinking the skipped range would make it empty, - /// this returns None. + /// this returns std::nullopt. /// Note this function can potentially be expensive because /// getSpellingLineNumber uses getLineNumber, which is expensive. - Optional<SpellingRegion> adjustSkippedRange(SourceManager &SM, - SourceLocation LocStart, - SourceLocation LocEnd, - SourceLocation PrevTokLoc, - SourceLocation NextTokLoc) { + std::optional<SpellingRegion> adjustSkippedRange(SourceManager &SM, + SourceLocation LocStart, + SourceLocation LocEnd, + SourceLocation PrevTokLoc, + SourceLocation NextTokLoc) { SpellingRegion SR{SM, LocStart, LocEnd}; SR.ColumnStart = 1; if (PrevTokLoc.isValid() && SM.isWrittenInSameFile(LocStart, PrevTokLoc) && @@ -355,7 +357,7 @@ public: } if (SR.isInSourceOrder()) return SR; - return None; + return std::nullopt; } /// Gather all the regions that were skipped by the preprocessor @@ -385,7 +387,7 @@ public: auto CovFileID = getCoverageFileID(LocStart); if (!CovFileID) continue; - Optional<SpellingRegion> SR; + std::optional<SpellingRegion> SR; if (I.isComment()) SR = adjustSkippedRange(SM, LocStart, LocEnd, I.PrevTokLoc, I.NextTokLoc); @@ -527,7 +529,7 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { if (MappingRegions.empty()) return; - CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions); + CoverageMappingWriter Writer(FileIDMapping, std::nullopt, MappingRegions); Writer.write(OS); } }; @@ -583,9 +585,10 @@ struct CounterCoverageMappingBuilder /// /// Returns the index on the stack where the region was pushed. This can be /// used with popRegions to exit a "scope", ending the region that was pushed. - size_t pushRegion(Counter Count, Optional<SourceLocation> StartLoc = None, - Optional<SourceLocation> EndLoc = None, - Optional<Counter> FalseCount = None) { + size_t pushRegion(Counter Count, + std::optional<SourceLocation> StartLoc = std::nullopt, + std::optional<SourceLocation> EndLoc = std::nullopt, + std::optional<Counter> FalseCount = std::nullopt) { if (StartLoc && !FalseCount) { MostRecentLocation = *StartLoc; @@ -810,7 +813,7 @@ struct CounterCoverageMappingBuilder } llvm::SmallSet<SourceLocation, 8> StartLocs; - Optional<Counter> ParentCounter; + std::optional<Counter> ParentCounter; for (SourceMappingRegion &I : llvm::reverse(RegionStack)) { if (!I.hasStartLoc()) continue; @@ -878,8 +881,8 @@ struct CounterCoverageMappingBuilder } /// Find a valid gap range between \p AfterLoc and \p BeforeLoc. - Optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc, - SourceLocation BeforeLoc) { + std::optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc, + SourceLocation BeforeLoc) { // If AfterLoc is in function-like macro, use the right parenthesis // location. if (AfterLoc.isMacroID()) { @@ -917,10 +920,10 @@ struct CounterCoverageMappingBuilder // If the start and end locations of the gap are both within the same macro // file, the range may not be in source order. if (AfterLoc.isMacroID() || BeforeLoc.isMacroID()) - return None; + return std::nullopt; if (!SM.isWrittenInSameFile(AfterLoc, BeforeLoc) || !SpellingRegion(SM, AfterLoc, BeforeLoc).isInSourceOrder()) - return None; + return std::nullopt; return {{AfterLoc, BeforeLoc}}; } @@ -1377,19 +1380,23 @@ struct CounterCoverageMappingBuilder // Extend into the condition before we propagate through it below - this is // needed to handle macros that generate the "if" but not the condition. - extendRegion(S->getCond()); + if (!S->isConsteval()) + extendRegion(S->getCond()); Counter ParentCount = getRegion().getCounter(); Counter ThenCount = getRegionCounter(S); - // Emitting a counter for the condition makes it easier to interpret the - // counter for the body when looking at the coverage. - propagateCounts(ParentCount, S->getCond()); + if (!S->isConsteval()) { + // Emitting a counter for the condition makes it easier to interpret the + // counter for the body when looking at the coverage. + propagateCounts(ParentCount, S->getCond()); - // The 'then' count applies to the area immediately after the condition. - auto Gap = findGapAreaBetween(S->getRParenLoc(), getStart(S->getThen())); - if (Gap) - fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount); + // The 'then' count applies to the area immediately after the condition. + std::optional<SourceRange> Gap = + findGapAreaBetween(S->getRParenLoc(), getStart(S->getThen())); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount); + } extendRegion(S->getThen()); Counter OutCount = propagateCounts(ThenCount, S->getThen()); @@ -1398,9 +1405,9 @@ struct CounterCoverageMappingBuilder if (const Stmt *Else = S->getElse()) { bool ThenHasTerminateStmt = HasTerminateStmt; HasTerminateStmt = false; - // The 'else' count applies to the area immediately after the 'then'. - Gap = findGapAreaBetween(getEnd(S->getThen()), getStart(Else)); + std::optional<SourceRange> Gap = + findGapAreaBetween(getEnd(S->getThen()), getStart(Else)); if (Gap) fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ElseCount); extendRegion(Else); @@ -1416,9 +1423,11 @@ struct CounterCoverageMappingBuilder GapRegionCounter = OutCount; } - // Create Branch Region around condition. - createBranchRegion(S->getCond(), ThenCount, - subtractCounters(ParentCount, ThenCount)); + if (!S->isConsteval()) { + // Create Branch Region around condition. + createBranchRegion(S->getCond(), ThenCount, + subtractCounters(ParentCount, ThenCount)); + } } void VisitCXXTryStmt(const CXXTryStmt *S) { @@ -1623,7 +1632,7 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord( #include "llvm/ProfileData/InstrProfData.inc" }; auto *FunctionRecordTy = - llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes), + llvm::StructType::get(Ctx, ArrayRef(FunctionRecordTypes), /*isPacked=*/true); // Create the function record constant. @@ -1631,8 +1640,8 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord( llvm::Constant *FunctionRecordVals[] = { #include "llvm/ProfileData/InstrProfData.inc" }; - auto *FuncRecordConstant = llvm::ConstantStruct::get( - FunctionRecordTy, makeArrayRef(FunctionRecordVals)); + auto *FuncRecordConstant = + llvm::ConstantStruct::get(FunctionRecordTy, ArrayRef(FunctionRecordVals)); // Create the function record global. auto *FuncRecord = new llvm::GlobalVariable( @@ -1676,7 +1685,7 @@ void CoverageMappingModuleGen::addFunctionMappingRecord( auto I = Entry.second; FilenameStrs[I] = normalizeFilename(Entry.first->getName()); } - ArrayRef<std::string> FilenameRefs = llvm::makeArrayRef(FilenameStrs); + ArrayRef<std::string> FilenameRefs = llvm::ArrayRef(FilenameStrs); RawCoverageMappingReader Reader(CoverageMapping, FilenameRefs, Filenames, Expressions, Regions); if (Reader.read()) @@ -1722,20 +1731,19 @@ void CoverageMappingModuleGen::emit() { #include "llvm/ProfileData/InstrProfData.inc" }; auto CovDataHeaderTy = - llvm::StructType::get(Ctx, makeArrayRef(CovDataHeaderTypes)); + llvm::StructType::get(Ctx, ArrayRef(CovDataHeaderTypes)); llvm::Constant *CovDataHeaderVals[] = { #define COVMAP_HEADER(Type, LLVMType, Name, Init) Init, #include "llvm/ProfileData/InstrProfData.inc" }; - auto CovDataHeaderVal = llvm::ConstantStruct::get( - CovDataHeaderTy, makeArrayRef(CovDataHeaderVals)); + auto CovDataHeaderVal = + llvm::ConstantStruct::get(CovDataHeaderTy, ArrayRef(CovDataHeaderVals)); // Create the coverage data record llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()}; - auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes)); + auto CovDataTy = llvm::StructType::get(Ctx, ArrayRef(CovDataTypes)); llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal}; - auto CovDataVal = - llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals)); + auto CovDataVal = llvm::ConstantStruct::get(CovDataTy, ArrayRef(TUDataVals)); auto CovData = new llvm::GlobalVariable( CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage, CovDataVal, llvm::getCoverageMappingVarName()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index fc2ff15a6acd..18403036e700 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -433,11 +433,7 @@ public: ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, /*UseARMGuardVarABI=*/true) {} - bool HasThisReturn(GlobalDecl GD) const override { - return (isa<CXXConstructorDecl>(GD.getDecl()) || ( - isa<CXXDestructorDecl>(GD.getDecl()) && - GD.getDtorType() != Dtor_Deleting)); - } + bool constructorsAndDestructorsReturnThis() const override { return true; } void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV, QualType ResTy) override; @@ -468,11 +464,7 @@ public: : ItaniumCXXABI(CGM) {} private: - bool HasThisReturn(GlobalDecl GD) const override { - return isa<CXXConstructorDecl>(GD.getDecl()) || - (isa<CXXDestructorDecl>(GD.getDecl()) && - GD.getDtorType() != Dtor_Deleting); - } + bool constructorsAndDestructorsReturnThis() const override { return true; } }; class WebAssemblyCXXABI final : public ItaniumCXXABI { @@ -486,11 +478,7 @@ public: llvm::Value *Exn) override; private: - bool HasThisReturn(GlobalDecl GD) const override { - return isa<CXXConstructorDecl>(GD.getDecl()) || - (isa<CXXDestructorDecl>(GD.getDecl()) && - GD.getDtorType() != Dtor_Deleting); - } + bool constructorsAndDestructorsReturnThis() const override { return true; } bool canCallMismatchedFunctionType() const override { return false; } }; @@ -1014,7 +1002,7 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD, } else { const ASTContext &Context = getContext(); CharUnits PointerWidth = Context.toCharUnitsFromBits( - Context.getTargetInfo().getPointerWidth(0)); + Context.getTargetInfo().getPointerWidth(LangAS::Default)); VTableOffset = Index * PointerWidth.getQuantity(); } @@ -1262,7 +1250,7 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); if (isNoReturn) - CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None); + CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, std::nullopt); else CGF.EmitRuntimeCallOrInvoke(Fn); } @@ -1337,8 +1325,9 @@ static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) { llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false); // Mark the function as nounwind readonly. - llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind, - llvm::Attribute::ReadOnly }; + llvm::AttrBuilder FuncAttrs(CGF.getLLVMContext()); + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); llvm::AttributeList Attrs = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); @@ -1769,8 +1758,11 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, } } - if (VTContext.isRelativeLayout() && !VTable->isDSOLocal()) - CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName()); + if (VTContext.isRelativeLayout()) { + CGVT.RemoveHwasanMetadata(VTable); + if (!VTable->isDSOLocal()) + CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName()); + } } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( @@ -1887,11 +1879,11 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, // values are read. unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout() ? 32 - : CGM.getTarget().getPointerAlign(0); + : CGM.getTarget().getPointerAlign(LangAS::Default); VTable = CGM.CreateOrReplaceCXXRuntimeVariable( Name, VTableType, llvm::GlobalValue::ExternalLinkage, - getContext().toCharUnitsFromBits(PAlign).getQuantity()); + getContext().toCharUnitsFromBits(PAlign).getAsAlign()); VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // In MS C++ if you have a class with virtual functions in which you are using @@ -1932,7 +1924,9 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) { VFunc = CGF.EmitVTableTypeCheckedLoad( MethodDecl->getParent(), VTable, TyPtr, - VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8); + VTableIndex * + CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default) / + 8); } else { CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc); @@ -2374,8 +2368,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, guardAlignment = CGF.getSizeAlign(); } else { guardTy = CGF.Int64Ty; - guardAlignment = CharUnits::fromQuantity( - CGM.getDataLayout().getABITypeAlignment(guardTy)); + guardAlignment = + CharUnits::fromQuantity(CGM.getDataLayout().getABITypeAlign(guardTy)); } } llvm::PointerType *guardPtrTy = guardTy->getPointerTo( @@ -2393,13 +2387,15 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, } // Create the guard variable with a zero-initializer. - // Just absorb linkage and visibility from the guarded variable. + // Just absorb linkage, visibility and dll storage class from the guarded + // variable. guard = new llvm::GlobalVariable(CGM.getModule(), guardTy, false, var->getLinkage(), llvm::ConstantInt::get(guardTy, 0), guardName.str()); guard->setDSOLocal(var->isDSOLocal()); guard->setVisibility(var->getVisibility()); + guard->setDLLStorageClass(var->getDLLStorageClass()); // If the variable is thread-local, so is its guard variable. guard->setThreadLocalMode(var->getThreadLocalMode()); guard->setAlignment(guardAlignment.getAsAlign()); @@ -2437,54 +2433,76 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // __cxa_guard_release (&obj_guard); // } // } + // + // If threadsafe statics are enabled, but we don't have inline atomics, just + // call __cxa_guard_acquire unconditionally. The "inline" check isn't + // actually inline, and the user might not expect calls to __atomic libcalls. - // Load the first byte of the guard variable. - llvm::LoadInst *LI = - Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); + unsigned MaxInlineWidthInBits = CGF.getTarget().getMaxAtomicInlineWidth(); + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); + if (!threadsafe || MaxInlineWidthInBits) { + // Load the first byte of the guard variable. + llvm::LoadInst *LI = + Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); - // Itanium ABI: - // An implementation supporting thread-safety on multiprocessor - // systems must also guarantee that references to the initialized - // object do not occur before the load of the initialization flag. - // - // In LLVM, we do this by marking the load Acquire. - if (threadsafe) - LI->setAtomic(llvm::AtomicOrdering::Acquire); + // Itanium ABI: + // An implementation supporting thread-safety on multiprocessor + // systems must also guarantee that references to the initialized + // object do not occur before the load of the initialization flag. + // + // In LLVM, we do this by marking the load Acquire. + if (threadsafe) + LI->setAtomic(llvm::AtomicOrdering::Acquire); - // For ARM, we should only check the first bit, rather than the entire byte: - // - // ARM C++ ABI 3.2.3.1: - // To support the potential use of initialization guard variables - // as semaphores that are the target of ARM SWP and LDREX/STREX - // synchronizing instructions we define a static initialization - // guard variable to be a 4-byte aligned, 4-byte word with the - // following inline access protocol. - // #define INITIALIZED 1 - // if ((obj_guard & INITIALIZED) != INITIALIZED) { - // if (__cxa_guard_acquire(&obj_guard)) - // ... - // } - // - // and similarly for ARM64: - // - // ARM64 C++ ABI 3.2.2: - // This ABI instead only specifies the value bit 0 of the static guard - // variable; all other bits are platform defined. Bit 0 shall be 0 when the - // variable is not initialized and 1 when it is. - llvm::Value *V = - (UseARMGuardVarABI && !useInt8GuardVariable) - ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1)) - : LI; - llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized"); + // For ARM, we should only check the first bit, rather than the entire byte: + // + // ARM C++ ABI 3.2.3.1: + // To support the potential use of initialization guard variables + // as semaphores that are the target of ARM SWP and LDREX/STREX + // synchronizing instructions we define a static initialization + // guard variable to be a 4-byte aligned, 4-byte word with the + // following inline access protocol. + // #define INITIALIZED 1 + // if ((obj_guard & INITIALIZED) != INITIALIZED) { + // if (__cxa_guard_acquire(&obj_guard)) + // ... + // } + // + // and similarly for ARM64: + // + // ARM64 C++ ABI 3.2.2: + // This ABI instead only specifies the value bit 0 of the static guard + // variable; all other bits are platform defined. Bit 0 shall be 0 when the + // variable is not initialized and 1 when it is. + llvm::Value *V = + (UseARMGuardVarABI && !useInt8GuardVariable) + ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1)) + : LI; + llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized"); - llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check"); - llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); + llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check"); - // Check if the first byte of the guard variable is zero. - CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock, - CodeGenFunction::GuardKind::VariableGuard, &D); + // Check if the first byte of the guard variable is zero. + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); - CGF.EmitBlock(InitCheckBlock); + CGF.EmitBlock(InitCheckBlock); + } + + // The semantics of dynamic initialization of variables with static or thread + // storage duration depends on whether they are declared at block-scope. The + // initialization of such variables at block-scope can be aborted with an + // exception and later retried (per C++20 [stmt.dcl]p4), and recursive entry + // to their initialization has undefined behavior (also per C++20 + // [stmt.dcl]p4). For such variables declared at non-block scope, exceptions + // lead to termination (per C++20 [except.terminate]p1), and recursive + // references to the variables are governed only by the lifetime rules (per + // C++20 [class.cdtor]p2), which means such references are perfectly fine as + // long as they avoid touching memory. As a result, block-scope variables must + // not be marked as initialized until after initialization completes (unless + // the mark is reverted following an exception), but non-block-scope variables + // must be marked prior to initialization so that recursive accesses during + // initialization do not restart initialization. // Variables used when coping with thread-safe statics and exceptions. if (threadsafe) { @@ -2501,6 +2519,12 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard); CGF.EmitBlock(InitBlock); + } else if (!D.isLocalVarDecl()) { + // For non-local variables, store 1 into the first byte of the guard + // variable before the object initialization begins so that references + // to the variable during initialization don't restart initialization. + Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1), + Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); } // Emit the initializer and add a global destructor if appropriate. @@ -2513,9 +2537,10 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // Call __cxa_guard_release. This cannot throw. CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy), guardAddr.getPointer()); - } else { - // Store 1 into the first byte of the guard variable after initialization is - // complete. + } else if (D.isLocalVarDecl()) { + // For local variables, store 1 into the first byte of the guard variable + // after the object initialization completes so that initialization is + // retried if initialization is interrupted by an exception. Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1), Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); } @@ -2687,7 +2712,7 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { } CGF.FinishFunction(); - AddGlobalCtor(GlobalInitFn, Priority, nullptr); + AddGlobalCtor(GlobalInitFn, Priority); } if (getCXXABI().useSinitAndSterm()) @@ -2988,14 +3013,16 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( // For a reference, the result of the wrapper function is a pointer to // the referenced object. - llvm::Value *Val = Var; + llvm::Value *Val = Builder.CreateThreadLocalAddress(Var); + if (VD->getType()->isReferenceType()) { CharUnits Align = CGM.getContext().getDeclAlign(VD); - Val = Builder.CreateAlignedLoad(Var->getValueType(), Var, Align); + Val = Builder.CreateAlignedLoad(Var->getValueType(), Val, Align); } if (Val->getType() != Wrapper->getReturnType()) Val = Builder.CreatePointerBitCastOrAddrSpaceCast( Val, Wrapper->getReturnType(), ""); + Builder.CreateRet(Val); } } @@ -3154,7 +3181,7 @@ llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName( auto Align = CGM.getContext().getTypeAlignInChars(CGM.getContext().CharTy); llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( - Name, Init->getType(), Linkage, Align.getQuantity()); + Name, Init->getType(), Linkage, Align.getAsAlign()); GV->setInitializer(Init); @@ -3540,7 +3567,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { } assert(isa<ObjCInterfaceType>(Ty)); - LLVM_FALLTHROUGH; + [[fallthrough]]; case Type::ObjCInterface: if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) { @@ -3852,8 +3879,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( if (CGM.supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(M.getOrInsertComdat(GV->getName())); - CharUnits Align = - CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0)); + CharUnits Align = CGM.getContext().toCharUnitsFromBits( + CGM.getTarget().getPointerAlign(LangAS::Default)); GV->setAlignment(Align.getAsAlign()); // The Itanium ABI specifies that type_info objects must be globally @@ -4031,7 +4058,8 @@ void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) { // LLP64 platforms. QualType OffsetFlagsTy = CGM.getContext().LongTy; const TargetInfo &TI = CGM.getContext().getTargetInfo(); - if (TI.getTriple().isOSCygMing() && TI.getPointerWidth(0) > TI.getLongWidth()) + if (TI.getTriple().isOSCygMing() && + TI.getPointerWidth(LangAS::Default) > TI.getLongWidth()) OffsetFlagsTy = CGM.getContext().LongLongTy; llvm::Type *OffsetFlagsLTy = CGM.getTypes().ConvertType(OffsetFlagsTy); @@ -4513,7 +4541,7 @@ static void InitCatchParam(CodeGenFunction &CGF, switch (CatchType.getQualifiers().getObjCLifetime()) { case Qualifiers::OCL_Strong: CastExn = CGF.EmitARCRetainNonBlock(CastExn); - LLVM_FALLTHROUGH; + [[fallthrough]]; case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: @@ -4650,13 +4678,16 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, /// void @__clang_call_terminate(i8* %exn) nounwind noreturn /// This code is used only in C++. static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) { - llvm::FunctionType *fnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); + ASTContext &C = CGM.getContext(); + const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + C.VoidTy, {C.getPointerType(C.CharTy)}); + llvm::FunctionType *fnTy = CGM.getTypes().GetFunctionType(FI); llvm::FunctionCallee fnRef = CGM.CreateRuntimeFunction( fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); llvm::Function *fn = cast<llvm::Function>(fnRef.getCallee()->stripPointerCasts()); if (fn->empty()) { + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, fn, /*IsThunk=*/false); fn->setDoesNotThrow(); fn->setDoesNotReturn(); diff --git a/clang/lib/CodeGen/MacroPPCallbacks.cpp b/clang/lib/CodeGen/MacroPPCallbacks.cpp index 2f09fd2b6c15..8589869f6e2f 100644 --- a/clang/lib/CodeGen/MacroPPCallbacks.cpp +++ b/clang/lib/CodeGen/MacroPPCallbacks.cpp @@ -120,7 +120,7 @@ void MacroPPCallbacks::FileEntered(SourceLocation Loc) { if (PP.getSourceManager().isWrittenInCommandLineFile(Loc)) return; updateStatusToNextScope(); - LLVM_FALLTHROUGH; + [[fallthrough]]; case CommandLineIncludeScope: EnteredCommandLineIncludeFiles++; break; @@ -167,7 +167,7 @@ void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason, void MacroPPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, - bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File, + bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { diff --git a/clang/lib/CodeGen/MacroPPCallbacks.h b/clang/lib/CodeGen/MacroPPCallbacks.h index 01041b16e4b7..5af177d0c3fa 100644 --- a/clang/lib/CodeGen/MacroPPCallbacks.h +++ b/clang/lib/CodeGen/MacroPPCallbacks.h @@ -101,7 +101,7 @@ public: void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, - Optional<FileEntryRef> File, StringRef SearchPath, + OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) override; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index f0c45654f8d9..ae785cce09f9 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -289,7 +289,7 @@ public: CodeGenFunction::VPtr Vptr) override; /// Don't initialize vptrs if dynamic class - /// is marked with with the 'novtable' attribute. + /// is marked with the 'novtable' attribute. bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override { return !VTableClass->hasAttr<MSNoVTableAttr>(); } @@ -458,7 +458,7 @@ public: friend struct MSRTTIBuilder; bool isImageRelative() const { - return CGM.getTarget().getPointerWidth(/*AddrSpace=*/0) == 64; + return CGM.getTarget().getPointerWidth(LangAS::Default) == 64; } // 5 routines for constructing the llvm types for MS RTTI structs. @@ -1086,8 +1086,8 @@ bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { return isDeletingDtor(GD); } -static bool isTrivialForAArch64MSVC(const CXXRecordDecl *RD) { - // For AArch64, we use the C++14 definition of an aggregate, so we also +static bool isTrivialForMSVC(const CXXRecordDecl *RD) { + // We use the C++14 definition of an aggregate, so we also // check for: // No private or protected non static data members. // No base classes @@ -1115,15 +1115,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // Normally, the C++ concept of "is trivially copyable" is used to determine - // if a struct can be returned directly. However, as MSVC and the language - // have evolved, the definition of "trivially copyable" has changed, while the - // ABI must remain stable. AArch64 uses the C++14 concept of an "aggregate", - // while other ISAs use the older concept of "plain old data". - bool isTrivialForABI = RD->isPOD(); - bool isAArch64 = CGM.getTarget().getTriple().isAArch64(); - if (isAArch64) - isTrivialForABI = RD->canPassInRegisters() && isTrivialForAArch64MSVC(RD); + bool isTrivialForABI = RD->canPassInRegisters() && isTrivialForMSVC(RD); // MSVC always returns structs indirectly from C++ instance methods. bool isIndirectReturn = !isTrivialForABI || FI.isInstanceMethod(); @@ -1137,7 +1129,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { // On AArch64, use the `inreg` attribute if the object is considered to not // be trivially copyable, or if this is an instance method struct return. - FI.getReturnInfo().setInReg(isAArch64); + FI.getReturnInfo().setInReg(CGM.getTarget().getTriple().isAArch64()); return true; } @@ -1679,7 +1671,7 @@ void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info, CharUnits AddressPoint = getContext().getLangOpts().RTTIData ? getContext().toCharUnitsFromBits( - getContext().getTargetInfo().getPointerWidth(0)) + getContext().getTargetInfo().getPointerWidth(LangAS::Default)) : CharUnits::Zero(); if (Info.PathToIntroducingObject.empty()) { @@ -1952,7 +1944,9 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) { VFunc = CGF.EmitVTableTypeCheckedLoad( getObjectWithVPtr(), VTable, Ty, - ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8); + ML.Index * + CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default) / + 8); } else { if (CGM.getCodeGenOpts().PrepareForLTO) CGF.EmitTypeMetadataCodeForVCall(getObjectWithVPtr(), VTable, Loc); @@ -2083,6 +2077,8 @@ MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD, // Start defining the function. CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo, FunctionArgs, MD->getLocation(), SourceLocation()); + + ApplyDebugLocation AL(CGF, MD->getLocation()); setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF)); // Load the vfptr and then callee from the vftable. The callee should have @@ -2127,7 +2123,7 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD, CharUnits Alignment = CGM.getContext().getTypeAlignInChars(CGM.getContext().IntTy); llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( - Name, VBTableType, Linkage, Alignment.getQuantity()); + Name, VBTableType, Linkage, Alignment.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (RD->hasAttr<DLLImportAttr>()) @@ -2348,6 +2344,10 @@ void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, if (D.getTLSKind()) return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr); + // HLSL doesn't support atexit. + if (CGM.getLangOpts().HLSL) + return CGM.AddCXXDtorEntry(Dtor, Addr); + // The default behavior is to use atexit. CGF.registerGlobalDtorWithAtExit(D, Dtor, Addr); } @@ -4142,7 +4142,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CodeGenFunction::RunCleanupsScope Cleanups(CGF); const auto *FPT = CD->getType()->castAs<FunctionProtoType>(); - CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0); + CGF.EmitCallArgs(Args, FPT, llvm::ArrayRef(ArgVec), CD, IsCopy ? 1 : 0); // Insert any ABI-specific implicit constructor arguments. AddedStructorArgCounts ExtraArgs = @@ -4350,10 +4350,10 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) { llvm::ArrayType *AT = llvm::ArrayType::get(CTType, NumEntries); llvm::StructType *CTAType = getCatchableTypeArrayType(NumEntries); llvm::Constant *Fields[] = { - llvm::ConstantInt::get(CGM.IntTy, NumEntries), // NumEntries + llvm::ConstantInt::get(CGM.IntTy, NumEntries), // NumEntries llvm::ConstantArray::get( - AT, llvm::makeArrayRef(CatchableTypes.begin(), - CatchableTypes.end())) // CatchableTypes + AT, llvm::ArrayRef(CatchableTypes.begin(), + CatchableTypes.end())) // CatchableTypes }; SmallString<256> MangledName; { @@ -4470,10 +4470,45 @@ MicrosoftCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This, } bool MicrosoftCXXABI::isPermittedToBeHomogeneousAggregate( - const CXXRecordDecl *CXXRD) const { - // MSVC Windows on Arm64 considers a type not HFA if it is not an - // aggregate according to the C++14 spec. This is not consistent with the - // AAPCS64, but is defacto spec on that platform. - return !CGM.getTarget().getTriple().isAArch64() || - isTrivialForAArch64MSVC(CXXRD); + const CXXRecordDecl *RD) const { + // All aggregates are permitted to be HFA on non-ARM platforms, which mostly + // affects vectorcall on x64/x86. + if (!CGM.getTarget().getTriple().isAArch64()) + return true; + // MSVC Windows on Arm64 has its own rules for determining if a type is HFA + // that are inconsistent with the AAPCS64 ABI. The following are our best + // determination of those rules so far, based on observation of MSVC's + // behavior. + if (RD->isEmpty()) + return false; + if (RD->isPolymorphic()) + return false; + if (RD->hasNonTrivialCopyAssignment()) + return false; + if (RD->hasNonTrivialDestructor()) + return false; + if (RD->hasNonTrivialDefaultConstructor()) + return false; + // These two are somewhat redundant given the caller + // (ABIInfo::isHomogeneousAggregate) checks the bases and fields, but that + // caller doesn't consider empty bases/fields to be non-homogenous, but it + // looks like Microsoft's AArch64 ABI does care about these empty types & + // anything containing/derived from one is non-homogeneous. + // Instead we could add another CXXABI entry point to query this property and + // have ABIInfo::isHomogeneousAggregate use that property. + // I don't think any other of the features listed above could be true of a + // base/field while not true of the outer struct. For example, if you have a + // base/field that has an non-trivial copy assignment/dtor/default ctor, then + // the outer struct's corresponding operation must be non-trivial. + for (const CXXBaseSpecifier &B : RD->bases()) { + if (const CXXRecordDecl *FRD = B.getType()->getAsCXXRecordDecl()) { + if (!isPermittedToBeHomogeneousAggregate(FRD)) + return false; + } + } + // empty fields seem to be caught by the ABIInfo::isHomogeneousAggregate + // checking for padding - but maybe there are ways to end up with an empty + // field without padding? Not that I know of, so don't check fields here & + // rely on the padding check. + return true; } diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp index c9a5e56c72c7..e3e953c34c59 100644 --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -179,6 +179,7 @@ namespace { } bool HandleTopLevelDecl(DeclGroupRef DG) override { + // FIXME: Why not return false and abort parsing? if (Diags.hasErrorOccurred()) return true; diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index f6eaa35b4873..677b66d3e1dc 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -107,7 +107,7 @@ class PCHContainerGenerator : public ASTConsumer { return true; SmallVector<QualType, 16> ArgTypes; - for (auto i : D->parameters()) + for (auto *i : D->parameters()) ArgTypes.push_back(i->getType()); QualType RetTy = D->getReturnType(); QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes, @@ -126,7 +126,7 @@ class PCHContainerGenerator : public ASTConsumer { ArgTypes.push_back(D->getSelfType(Ctx, D->getClassInterface(), selfIsPseudoStrong, selfIsConsumed)); ArgTypes.push_back(Ctx.getObjCSelType()); - for (auto i : D->parameters()) + for (auto *i : D->parameters()) ArgTypes.push_back(i->getType()); QualType RetTy = D->getReturnType(); QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes, diff --git a/clang/lib/CodeGen/PatternInit.cpp b/clang/lib/CodeGen/PatternInit.cpp index 26ac8b63a9ba..4400bc443688 100644 --- a/clang/lib/CodeGen/PatternInit.cpp +++ b/clang/lib/CodeGen/PatternInit.cpp @@ -43,8 +43,8 @@ llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM, } if (Ty->isPtrOrPtrVectorTy()) { auto *PtrTy = cast<llvm::PointerType>(Ty->getScalarType()); - unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( - PtrTy->getAddressSpace()); + unsigned PtrWidth = + CGM.getDataLayout().getPointerSizeInBits(PtrTy->getAddressSpace()); if (PtrWidth > 64) llvm_unreachable("pattern initialization of unsupported pointer width"); llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index 7848cf012633..554f1ea2a47d 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -104,5 +104,5 @@ void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) { I->setMetadata(llvm::LLVMContext::MD_nosanitize, - llvm::MDNode::get(CGM.getLLVMContext(), None)); + llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt)); } diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp index 8fb24fcecf53..63d975193c02 100644 --- a/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -15,13 +15,14 @@ #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/Basic/TargetInfo.h" +#include <optional> using namespace clang; using namespace CodeGen; using namespace swiftcall; static const SwiftABIInfo &getSwiftABIInfo(CodeGenModule &CGM) { - return cast<SwiftABIInfo>(CGM.getTargetCodeGenInfo().getABIInfo()); + return CGM.getTargetCodeGenInfo().getSwiftABIInfo(); } static bool isPowerOf2(unsigned n) { @@ -124,7 +125,7 @@ void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin, const ASTRecordLayout &layout) { // Unions are a special case. if (record->isUnion()) { - for (auto field : record->fields()) { + for (auto *field : record->fields()) { if (field->isBitField()) { addBitFieldData(field, begin, 0); } else { @@ -161,7 +162,7 @@ void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin, } // Add fields. - for (auto field : record->fields()) { + for (auto *field : record->fields()) { auto fieldOffsetInBits = layout.getFieldOffset(field->getFieldIndex()); if (field->isBitField()) { addBitFieldData(field, begin, fieldOffsetInBits); @@ -439,7 +440,7 @@ static bool isMergeableEntryType(llvm::Type *type) { // merge pointers, but (1) it doesn't currently matter in practice because // the chunk size is never greater than the size of a pointer and (2) // Swift IRGen uses integer types for a lot of things that are "really" - // just storing pointers (like Optional<SomePointer>). If we ever have a + // just storing pointers (like std::optional<SomePointer>). If we ever have a // target that would otherwise combine pointers, we should put some effort // into fixing those cases in Swift IRGen and then call out pointer types // here. @@ -590,9 +591,8 @@ SwiftAggLowering::getCoerceAndExpandTypes() const { hasPadding = true; } - if (!packed && !entry.Begin.isMultipleOf( - CharUnits::fromQuantity( - CGM.getDataLayout().getABITypeAlignment(entry.Type)))) + if (!packed && !entry.Begin.isMultipleOf(CharUnits::fromQuantity( + CGM.getDataLayout().getABITypeAlign(entry.Type)))) packed = true; elts.push_back(entry.Type); @@ -631,9 +631,8 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const { // Avoid copying the array of types when there's just a single element. if (Entries.size() == 1) { - return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift( - Entries.back().Type, - asReturnValue); + return getSwiftABIInfo(CGM).shouldPassIndirectly(Entries.back().Type, + asReturnValue); } SmallVector<llvm::Type*, 8> componentTys; @@ -641,31 +640,27 @@ bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const { for (auto &entry : Entries) { componentTys.push_back(entry.Type); } - return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys, - asReturnValue); + return getSwiftABIInfo(CGM).shouldPassIndirectly(componentTys, asReturnValue); } bool swiftcall::shouldPassIndirectly(CodeGenModule &CGM, ArrayRef<llvm::Type*> componentTys, bool asReturnValue) { - return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys, - asReturnValue); + return getSwiftABIInfo(CGM).shouldPassIndirectly(componentTys, asReturnValue); } CharUnits swiftcall::getMaximumVoluntaryIntegerSize(CodeGenModule &CGM) { // Currently always the size of an ordinary pointer. return CGM.getContext().toCharUnitsFromBits( - CGM.getContext().getTargetInfo().getPointerWidth(0)); + CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default)); } CharUnits swiftcall::getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type) { // For Swift's purposes, this is always just the store size of the type // rounded up to a power of 2. auto size = (unsigned long long) getTypeStoreSize(CGM, type).getQuantity(); - if (!isPowerOf2(size)) { - size = 1ULL << (llvm::findLastSet(size, llvm::ZB_Undefined) + 1); - } - assert(size >= CGM.getDataLayout().getABITypeAlignment(type)); + size = llvm::bit_ceil(size); + assert(CGM.getDataLayout().getABITypeAlign(type) <= size); return CharUnits::fromQuantity(size); } @@ -699,8 +694,7 @@ bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize, bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize, llvm::Type *eltTy, unsigned numElts) { assert(numElts > 1 && "illegal vector length"); - return getSwiftABIInfo(CGM) - .isLegalVectorTypeForSwift(vectorSize, eltTy, numElts); + return getSwiftABIInfo(CGM).isLegalVectorType(vectorSize, eltTy, numElts); } std::pair<llvm::Type*, unsigned> diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index d1ee61eab9d6..be1dbe8480c6 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -23,7 +23,6 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -107,7 +106,7 @@ static llvm::Type *getVAListElementType(CodeGenFunction &CGF) { } bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { - if (Ty->isPromotableIntegerType()) + if (getContext().isPromotableIntegerType(Ty)) return true; if (const auto *EIT = Ty->getAs<BitIntType>()) @@ -117,7 +116,9 @@ bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { return false; } -ABIInfo::~ABIInfo() {} +ABIInfo::~ABIInfo() = default; + +SwiftABIInfo::~SwiftABIInfo() = default; /// Does the given lowering require more than the given number of /// registers when expanded? @@ -140,7 +141,7 @@ static bool occupiesMoreThan(CodeGenTypes &cgt, if (type->isPointerTy()) { intCount++; } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) { - auto ptrWidth = cgt.getTarget().getPointerWidth(0); + auto ptrWidth = cgt.getTarget().getPointerWidth(LangAS::Default); intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; } else { assert(type->isVectorTy() || type->isFloatingPointTy()); @@ -151,12 +152,16 @@ static bool occupiesMoreThan(CodeGenTypes &cgt, return (intCount + fpCount > maxAllRegisters); } -bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, - llvm::Type *eltTy, - unsigned numElts) const { +bool SwiftABIInfo::shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, + bool AsReturnValue) const { + return occupiesMoreThan(CGT, ComponentTys, /*total=*/4); +} + +bool SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const { // The default implementation of this assumes that the target guarantees // 128-bit SIMD support but nothing more. - return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16); + return (VectorSize.getQuantity() > 8 && VectorSize.getQuantity() <= 16); } static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, @@ -317,13 +322,17 @@ static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, /// leaving one or more empty slots behind as padding. If this /// is false, the returned address might be less-aligned than /// DirectAlign. +/// \param ForceRightAdjust - Default is false. On big-endian platform and +/// if the argument is smaller than a slot, set this flag will force +/// right-adjust the argument in its slot irrespective of the type. static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Type *DirectTy, CharUnits DirectSize, CharUnits DirectAlign, CharUnits SlotSize, - bool AllowHigherAlign) { + bool AllowHigherAlign, + bool ForceRightAdjust = false) { // Cast the element type to i8* if necessary. Some platforms define // va_list as a struct containing an i8* instead of just an i8*. if (VAListAddr.getElementType() != CGF.Int8PtrTy) @@ -349,7 +358,7 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && - !DirectTy->isStructTy()) { + (!DirectTy->isStructTy() || ForceRightAdjust)) { Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); } @@ -370,11 +379,15 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. +/// \param ForceRightAdjust - Default is false. On big-endian platform and +/// if the argument is smaller than a slot, set this flag will force +/// right-adjust the argument in its slot irrespective of the type. static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, - bool AllowHigherAlign) { + bool AllowHigherAlign, + bool ForceRightAdjust = false) { // The size and alignment of the value that was passed directly. CharUnits DirectSize, DirectAlign; if (IsIndirect) { @@ -390,9 +403,9 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, if (IsIndirect) DirectTy = DirectTy->getPointerTo(0); - Address Addr = - emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign, - SlotSizeAndAlign, AllowHigherAlign); + Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, + DirectAlign, SlotSizeAndAlign, + AllowHigherAlign, ForceRightAdjust); if (IsIndirect) { Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align); @@ -814,7 +827,7 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { // This is a very simple ABI that relies a lot on DefaultABIInfo. //===----------------------------------------------------------------------===// -class WebAssemblyABIInfo final : public SwiftABIInfo { +class WebAssemblyABIInfo final : public ABIInfo { public: enum ABIKind { MVP = 0, @@ -827,7 +840,7 @@ private: public: explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind) - : SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {} + : ABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; @@ -845,22 +858,16 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - - bool isSwiftErrorInRegister() const override { - return false; - } }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, WebAssemblyABIInfo::ABIKind K) - : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {} + : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); + } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -1071,7 +1078,7 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, .Cases("y", "&y", "^Ym", true) .Default(false); if (IsMMXCons && Ty->isVectorTy()) { - if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() != + if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() != 64) { // Invalid MMX constraint return nullptr; @@ -1136,7 +1143,7 @@ struct CCState { }; /// X86_32ABIInfo - The X86-32 ABI information. -class X86_32ABIInfo : public SwiftABIInfo { +class X86_32ABIInfo : public ABIInfo { enum Class { Integer, Float @@ -1210,26 +1217,27 @@ public: X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) - : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), - IsRetSmallStructInRegABI(RetSmallStructInRegABI), - IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), - IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || - CGT.getTarget().getTriple().isOSCygMing()), - DefaultNumRegisterParameters(NumRegisterParameters) {} + : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI), + IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), + IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || + CGT.getTarget().getTriple().isOSCygMing()), + DefaultNumRegisterParameters(NumRegisterParameters) {} +}; + +class X86_32SwiftABIInfo : public SwiftABIInfo { +public: + explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { + bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, + bool AsReturnValue) const override { // LLVM's x86-32 lowering currently only assigns up to three // integer registers and three fp registers. Oddly, it'll use up to // four vector registers for vectors, but those can overlap with the // scalar registers. - return occupiesMoreThan(CGT, scalars, /*total*/ 3); - } - - bool isSwiftErrorInRegister() const override { - // x86-32 lowering does not support passing swifterror in a register. - return false; + return occupiesMoreThan(CGT, ComponentTys, /*total=*/3); } }; @@ -1240,7 +1248,9 @@ public: unsigned NumRegisterParameters, bool SoftFloatABI) : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, - NumRegisterParameters, SoftFloatABI)) {} + NumRegisterParameters, SoftFloatABI)) { + SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); + } static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); @@ -1769,23 +1779,22 @@ bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, } bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { - if (!updateFreeRegs(Ty, State)) - return false; + bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && + (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || + Ty->isReferenceType()); - if (IsMCUABI) + if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || + State.CC == llvm::CallingConv::X86_VectorCall)) return false; - if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) { - if (getContext().getTypeSize(Ty) > 32) - return false; + if (!updateFreeRegs(Ty, State)) + return false; - return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || - Ty->isReferenceType()); - } + if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) + return false; - return true; + // Return true to apply inreg to all legal parameters except for MCU targets. + return !IsMCUABI; } void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { @@ -2250,7 +2259,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { } /// X86_64ABIInfo - The X86_64 ABI information. -class X86_64ABIInfo : public SwiftABIInfo { +class X86_64ABIInfo : public ABIInfo { enum Class { Integer = 0, SSE, @@ -2396,10 +2405,9 @@ class X86_64ABIInfo : public SwiftABIInfo { bool Has64BitPointers; public: - X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : - SwiftABIInfo(CGT), AVXLevel(AVXLevel), - Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { - } + X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : ABIInfo(CGT), AVXLevel(AVXLevel), + Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} bool isPassedUsingAVXType(QualType type) const { unsigned neededInt, neededSSE; @@ -2409,7 +2417,7 @@ public: if (info.isDirect()) { llvm::Type *ty = info.getCoerceToType(); if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) - return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128; + return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; } return false; } @@ -2424,21 +2432,13 @@ public: bool has64BitPointers() const { return Has64BitPointers; } - - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - bool isSwiftErrorInRegister() const override { - return true; - } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. -class WinX86_64ABIInfo : public SwiftABIInfo { +class WinX86_64ABIInfo : public ABIInfo { public: WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : SwiftABIInfo(CGT), AVXLevel(AVXLevel), + : ABIInfo(CGT), AVXLevel(AVXLevel), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; @@ -2457,15 +2457,6 @@ public: return isX86VectorCallAggregateSmallEnough(NumMembers); } - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - - bool isSwiftErrorInRegister() const override { - return true; - } - private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, bool IsVectorCall, bool IsRegCall) const; @@ -2480,7 +2471,10 @@ private: class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {} + : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); + } const X86_64ABIInfo &getABIInfo() const { return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); @@ -2624,7 +2618,7 @@ void X86_64TargetCodeGenInfo::checkFunctionCallABI( llvm::StringMap<bool> CalleeMap; unsigned ArgIndex = 0; - // We need to loop through the actual call arguments rather than the the + // We need to loop through the actual call arguments rather than the // function's parameters, in case this variadic. for (const CallArg &Arg : Args) { // The "avx" feature changes how vectors >128 in size are passed. "avx512f" @@ -2722,7 +2716,10 @@ class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {} + : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); + } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -2871,7 +2868,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { Current = Integer; } else if (k == BuiltinType::Float || k == BuiltinType::Double || - k == BuiltinType::Float16) { + k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); @@ -3002,7 +2999,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, Current = Integer; else if (Size <= 128) Lo = Hi = Integer; - } else if (ET->isFloat16Type() || ET == getContext().FloatTy) { + } else if (ET->isFloat16Type() || ET == getContext().FloatTy || + ET->isBFloat16Type()) { Current = SSE; } else if (ET == getContext().DoubleTy) { Lo = Hi = SSE; @@ -3474,9 +3472,9 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, if (SourceSize > T0Size) T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); if (T1 == nullptr) { - // Check if IRType is a half + float. float type will be in IROffset+4 due + // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due // to its alignment. - if (T0->isHalfTy() && SourceSize > 4) + if (T0->is16bitFPTy() && SourceSize > 4) T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); // If we can't get a second FP type, return a simple half or float. // avx512fp16-abi.c:pr51813_2 shows it works to return float for @@ -3488,7 +3486,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, if (T0->isFloatTy() && T1->isFloatTy()) return llvm::FixedVectorType::get(T0, 2); - if (T0->isHalfTy() && T1->isHalfTy()) { + if (T0->is16bitFPTy() && T1->is16bitFPTy()) { llvm::Type *T2 = nullptr; if (SourceSize > 4) T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); @@ -3497,7 +3495,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, return llvm::FixedVectorType::get(T0, 4); } - if (T0->isHalfTy() || T1->isHalfTy()) + if (T0->is16bitFPTy() || T1->is16bitFPTy()) return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); return llvm::Type::getDoubleTy(getVMContext()); @@ -3594,7 +3592,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have // the second element at offset 8. Check for this: unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); - unsigned HiAlign = TD.getABITypeAlignment(Hi); + llvm::Align HiAlign = TD.getABITypeAlign(Hi); unsigned HiStart = llvm::alignTo(LoSize, HiAlign); assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); @@ -4171,13 +4169,13 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // FIXME: Our choice of alignment here and below is probably pessimistic. llvm::Value *V = CGF.Builder.CreateAlignedLoad( TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), - CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo))); + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); // Copy the second element. V = CGF.Builder.CreateAlignedLoad( TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), - CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi))); + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); @@ -4590,7 +4588,7 @@ bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const { Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. - if (Ty->isPromotableIntegerType()) + if (getContext().isPromotableIntegerType(Ty)) return true; if (!Is64Bit) @@ -4984,7 +4982,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. -class PPC64_SVR4_ABIInfo : public SwiftABIInfo { +class PPC64_SVR4_ABIInfo : public ABIInfo { public: enum ABIKind { ELFv1 = 0, @@ -4999,7 +4997,7 @@ private: public: PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool SoftFloatABI) - : SwiftABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {} + : ABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; @@ -5040,15 +5038,6 @@ public: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - - bool isSwiftErrorInRegister() const override { - return false; - } }; class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { @@ -5058,7 +5047,10 @@ public: PPC64_SVR4_ABIInfo::ABIKind Kind, bool SoftFloatABI) : TargetCodeGenInfo( - std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) {} + std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); + } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -5467,8 +5459,21 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } // Otherwise, just use the general rule. - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, - TypeInfo, SlotSize, /*AllowHigher*/ true); + // + // The PPC64 ABI passes some arguments in integer registers, even to variadic + // functions. To allow va_list to use the simple "void*" representation, + // variadic calls allocate space in the argument area for the integer argument + // registers, and variadic functions spill their integer argument registers to + // this area in their prologues. When aggregates smaller than a register are + // passed this way, they are passed in the least significant bits of the + // register, which means that after spilling on big-endian targets they will + // be right-aligned in their argument slot. This is uncommon; for a variety of + // reasons, other big-endian targets don't end up right-aligning aggregate + // types this way, and so right-alignment only applies to fundamental types. + // So on PPC64, we must force the use of right-alignment even for aggregates. + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, + SlotSize, /*AllowHigher*/ true, + /*ForceRightAdjust*/ true); } bool @@ -5492,7 +5497,7 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { -class AArch64ABIInfo : public SwiftABIInfo { +class AArch64ABIInfo : public ABIInfo { public: enum ABIKind { AAPCS = 0, @@ -5504,8 +5509,7 @@ private: ABIKind Kind; public: - AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) - : SwiftABIInfo(CGT), Kind(Kind) {} + AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {} private: ABIKind getABIKind() const { return Kind; } @@ -5553,26 +5557,26 @@ private: Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - bool isSwiftErrorInRegister() const override { - return true; - } - - bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, - unsigned elts) const override; - bool allowBFloatArgsAndRet() const override { return getTarget().hasBFloat16Type(); } }; +class AArch64SwiftABIInfo : public SwiftABIInfo { +public: + explicit AArch64SwiftABIInfo(CodeGenTypes &CGT) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} + + bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const override; +}; + class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) - : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {} + : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) { + SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT); + } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; @@ -5594,14 +5598,15 @@ public: if (TA == nullptr) return; - ParsedTargetAttr Attr = TA->parse(); + ParsedTargetAttr Attr = + CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); if (Attr.BranchProtection.empty()) return; TargetInfo::BranchProtectionInfo BPI; StringRef Error; - (void)CGM.getTarget().validateBranchProtection( - Attr.BranchProtection, Attr.Architecture, BPI, Error); + (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + Attr.CPU, BPI, Error); assert(Error.empty()); auto *Fn = cast<llvm::Function>(GV); @@ -5826,8 +5831,9 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, Alignment = getContext().getTypeUnadjustedAlign(Ty); Alignment = Alignment < 128 ? 64 : 128; } else { - Alignment = std::max(getContext().getTypeAlign(Ty), - (unsigned)getTarget().getPointerWidth(0)); + Alignment = + std::max(getContext().getTypeAlign(Ty), + (unsigned)getTarget().getPointerWidth(LangAS::Default)); } Size = llvm::alignTo(Size, Alignment); @@ -5946,13 +5952,13 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { return false; } -bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, - llvm::Type *eltTy, - unsigned elts) const { - if (!llvm::isPowerOf2_32(elts)) +bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, + llvm::Type *EltTy, + unsigned NumElts) const { + if (!llvm::isPowerOf2_32(NumElts)) return false; - if (totalSize.getQuantity() != 8 && - (totalSize.getQuantity() != 16 || elts == 1)) + if (VectorSize.getQuantity() != 8 && + (VectorSize.getQuantity() != 16 || NumElts == 1)) return false; return true; } @@ -5992,6 +5998,16 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true, CGF.CurFnInfo->getCallingConvention()); + // Empty records are ignored for parameter passing purposes. + if (AI.isIgnore()) { + uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; + CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); + VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); + auto *Load = CGF.Builder.CreateLoad(VAListAddr); + Address Addr = Address(Load, CGF.Int8Ty, SlotSize); + return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + } + bool IsIndirect = AI.isIndirect(); llvm::Type *BaseTy = CGF.ConvertType(Ty); @@ -6242,7 +6258,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); - uint64_t PointerSize = getTarget().getPointerWidth(0) / 8; + uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); // Empty records are ignored for parameter passing purposes. @@ -6290,7 +6306,7 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, namespace { -class ARMABIInfo : public SwiftABIInfo { +class ARMABIInfo : public ABIInfo { public: enum ABIKind { APCS = 0, @@ -6304,8 +6320,7 @@ private: bool IsFloatABISoftFP; public: - ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) - : SwiftABIInfo(CGT), Kind(_Kind) { + ARMABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) { setCCs(); IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" || CGT.getCodeGenOpts().FloatABI == ""; // default @@ -6369,22 +6384,23 @@ private: llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); +}; - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - bool isSwiftErrorInRegister() const override { - return true; - } - bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, - unsigned elts) const override; +class ARMSwiftABIInfo : public SwiftABIInfo { +public: + explicit ARMSwiftABIInfo(CodeGenTypes &CGT) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} + + bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) - : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {} + : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) { + SwiftInfo = std::make_unique<ARMSwiftABIInfo>(CGT); + } const ARMABIInfo &getABIInfo() const { return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo()); @@ -6422,13 +6438,13 @@ public: auto *Fn = cast<llvm::Function>(GV); if (const auto *TA = FD->getAttr<TargetAttr>()) { - ParsedTargetAttr Attr = TA->parse(); + ParsedTargetAttr Attr = + CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); if (!Attr.BranchProtection.empty()) { TargetInfo::BranchProtectionInfo BPI; StringRef DiagMsg; - StringRef Arch = Attr.Architecture.empty() - ? CGM.getTarget().getTargetOpts().CPU - : Attr.Architecture; + StringRef Arch = + Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU; if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection, Arch, BPI, DiagMsg)) { CGM.getDiags().Report( @@ -6451,11 +6467,11 @@ public: // If the Branch Protection attribute is missing, validate the target // Architecture attribute against Branch Protection command line // settings. - if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.Architecture)) + if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.CPU)) CGM.getDiags().Report( D->getLocation(), diag::warn_target_unsupported_branch_protection_attribute) - << Attr.Architecture; + << Attr.CPU; } } @@ -6690,7 +6706,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) { TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); - ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); + ABIAlign = std::clamp(TyAlign, (uint64_t)4, (uint64_t)8); } else { TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); } @@ -6986,16 +7002,15 @@ bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { } } -bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, - llvm::Type *eltTy, - unsigned numElts) const { - if (!llvm::isPowerOf2_32(numElts)) +bool ARMSwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const { + if (!llvm::isPowerOf2_32(NumElts)) return false; - unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + unsigned size = CGT.getDataLayout().getTypeStoreSizeInBits(EltTy); if (size > 64) return false; - if (vectorSize.getQuantity() != 8 && - (vectorSize.getQuantity() != 16 || numElts == 1)) + if (VectorSize.getQuantity() != 8 && + (VectorSize.getQuantity() != 16 || NumElts == 1)) return false; return true; } @@ -7046,10 +7061,10 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { - Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), - getVAListElementType(CGF), SlotSize); - Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - return Addr; + VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); + auto *Load = CGF.Builder.CreateLoad(VAListAddr); + Address Addr = Address(Load, CGF.Int8Ty, SlotSize); + return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); } CharUnits TySize = getContext().getTypeSizeInChars(Ty); @@ -7380,13 +7395,13 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { namespace { -class SystemZABIInfo : public SwiftABIInfo { +class SystemZABIInfo : public ABIInfo { bool HasVector; bool IsSoftFloatABI; public: SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF) - : SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {} + : ABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {} bool isPromotableIntegerTypeForABI(QualType Ty) const; bool isCompoundType(QualType Ty) const; @@ -7397,30 +7412,58 @@ public: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType ArgTy) const; - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - } - + void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - - bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, - bool asReturnValue) const override { - return occupiesMoreThan(CGT, scalars, /*total*/ 4); - } - bool isSwiftErrorInRegister() const override { - return false; - } }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { + // These are used for speeding up the search for a visible vector ABI. + mutable bool HasVisibleVecABIFlag = false; + mutable std::set<const Type *> SeenTypes; + + // Returns true (the first time) if Ty is or found to make use of a vector + // type (e.g. as a function argument). + bool isVectorTypeBased(const Type *Ty) const; + public: SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI) : TargetCodeGenInfo( - std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {} + std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); + } + + // The vector ABI is different when the vector facility is present and when + // a module e.g. defines an externally visible vector variable, a flag + // indicating a visible vector ABI is added. Eventually this will result in + // a GNU attribute indicating the vector ABI of the module. Ty is the type + // of a variable or function parameter that is globally visible. + void handleExternallyVisibleObjABI(const Type *Ty, + CodeGen::CodeGenModule &M) const { + if (!HasVisibleVecABIFlag && isVectorTypeBased(Ty)) { + M.getModule().addModuleFlag(llvm::Module::Warning, + "s390x-visible-vector-ABI", 1); + HasVisibleVecABIFlag = true; + } + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override { + if (!D) + return; + + // Check if the vector ABI becomes visible by an externally visible + // variable or function. + if (const auto *VD = dyn_cast<VarDecl>(D)) { + if (VD->isExternallyVisible()) + handleExternallyVisibleObjABI(VD->getType().getTypePtr(), M); + } + else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + if (FD->isExternallyVisible()) + handleExternallyVisibleObjABI(FD->getType().getTypePtr(), M); + } + } llvm::Value *testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, @@ -7579,6 +7622,9 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Every non-vector argument occupies 8 bytes and is passed by preference // in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are // always passed on the stack. + const SystemZTargetCodeGenInfo &SZCGI = + static_cast<const SystemZTargetCodeGenInfo &>( + CGT.getCGM().getTargetCodeGenInfo()); Ty = getContext().getCanonicalType(Ty); auto TyInfo = getContext().getTypeInfoInChars(Ty); llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty); @@ -7589,6 +7635,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, bool IsVector = false; CharUnits UnpaddedSize; CharUnits DirectAlign; + SZCGI.handleExternallyVisibleObjABI(Ty.getTypePtr(), CGT.getCGM()); if (IsIndirect) { DirectTy = llvm::PointerType::getUnqual(DirectTy); UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8); @@ -7783,6 +7830,51 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(nullptr); } +void SystemZABIInfo::computeInfo(CGFunctionInfo &FI) const { + const SystemZTargetCodeGenInfo &SZCGI = + static_cast<const SystemZTargetCodeGenInfo &>( + CGT.getCGM().getTargetCodeGenInfo()); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + unsigned Idx = 0; + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type); + if (FI.isVariadic() && Idx++ >= FI.getNumRequiredArgs()) + // Check if a vararg vector argument is passed, in which case the + // vector ABI becomes visible as the va_list could be passed on to + // other functions. + SZCGI.handleExternallyVisibleObjABI(I.type.getTypePtr(), CGT.getCGM()); + } +} + +bool SystemZTargetCodeGenInfo::isVectorTypeBased(const Type *Ty) const { + while (Ty->isPointerType() || Ty->isArrayType()) + Ty = Ty->getPointeeOrArrayElementType(); + if (!SeenTypes.insert(Ty).second) + return false; + if (Ty->isVectorType()) + return true; + if (const auto *RecordTy = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RecordTy->getDecl(); + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->hasDefinition()) + for (const auto &I : CXXRD->bases()) + if (isVectorTypeBased(I.getType().getTypePtr())) + return true; + for (const auto *FD : RD->fields()) + if (isVectorTypeBased(FD->getType().getTypePtr())) + return true; + } + if (const auto *FT = Ty->getAs<FunctionType>()) + if (isVectorTypeBased(FT->getReturnType().getTypePtr())) + return true; + if (const FunctionProtoType *Proto = Ty->getAs<FunctionProtoType>()) + for (auto ParamType : Proto->getParamTypes()) + if (isVectorTypeBased(ParamType.getTypePtr())) + return true; + return false; +} + //===----------------------------------------------------------------------===// // MSP430 ABI Implementation //===----------------------------------------------------------------------===// @@ -7867,7 +7959,7 @@ void MSP430TargetCodeGenInfo::setTargetAttributes( namespace { class MipsABIInfo : public ABIInfo { bool IsO32; - unsigned MinABIStackAlignInBytes, StackAlignInBytes; + const unsigned MinABIStackAlignInBytes, StackAlignInBytes; void CoerceToIntArgs(uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const; llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; @@ -8044,8 +8136,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { uint64_t TySize = getContext().getTypeSize(Ty); uint64_t Align = getContext().getTypeAlign(Ty) / 8; - Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), - (uint64_t)StackAlignInBytes); + Align = std::clamp(Align, (uint64_t)MinABIStackAlignInBytes, + (uint64_t)StackAlignInBytes); unsigned CurrOffset = llvm::alignTo(Offset, Align); Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; @@ -8200,7 +8292,7 @@ Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64. // Pointers are also promoted in the same way but this only matters for N32. unsigned SlotSizeInBits = IsO32 ? 32 : 64; - unsigned PtrWidth = getTarget().getPointerWidth(0); + unsigned PtrWidth = getTarget().getPointerWidth(LangAS::Default); bool DidPromote = false; if ((Ty->isIntegerType() && getContext().getIntWidth(Ty) < SlotSizeInBits) || @@ -8340,18 +8432,23 @@ public: : DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {} ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const { - if (isAggregateTypeForABI(Ty)) { - // On AVR, a return struct with size less than or equals to 8 bytes is - // returned directly via registers R18-R25. On AVRTiny, a return struct - // with size less than or equals to 4 bytes is returned directly via - // registers R22-R25. - if (getContext().getTypeSize(Ty) <= RetRegs * 8) - return ABIArgInfo::getDirect(); - // A return struct with larger size is returned via a stack - // slot, along with a pointer to it as the function's implicit argument. + // On AVR, a return struct with size less than or equals to 8 bytes is + // returned directly via registers R18-R25. On AVRTiny, a return struct + // with size less than or equals to 4 bytes is returned directly via + // registers R22-R25. + if (isAggregateTypeForABI(Ty) && + getContext().getTypeSize(Ty) <= RetRegs * 8) + return ABIArgInfo::getDirect(); + // A return value (struct or scalar) with larger size is returned via a + // stack slot, along with a pointer as the function's implicit argument. + if (getContext().getTypeSize(Ty) > RetRegs * 8) { LargeRet = true; return getNaturalAlignIndirect(Ty); } + // An i8 return value should not be extended to i16, since AVR has 8-bit + // registers. + if (Ty->isIntegralOrEnumerationType() && getContext().getTypeSize(Ty) <= 8) + return ABIArgInfo::getDirect(); // Otherwise we follow the default way which is compatible. return DefaultABIInfo::classifyReturnType(Ty); } @@ -9445,8 +9542,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const bool IsHIPKernel = M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>(); + const bool IsOpenMPkernel = + M.getLangOpts().OpenMPIsDevice && + (F->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL); - if (IsHIPKernel) + // TODO: This should be moved to language specific attributes instead. + if (IsHIPKernel || IsOpenMPkernel) F->addFnAttr("uniform-work-group-size", "true"); if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) @@ -9747,7 +9848,7 @@ private: // Check if Ty is a usable substitute for the coercion type. bool isUsableType(llvm::StructType *Ty) const { - return llvm::makeArrayRef(Elems) == Ty->elements(); + return llvm::ArrayRef(Elems) == Ty->elements(); } // Get the coercion type as a literal struct type. @@ -10302,7 +10403,7 @@ bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) { void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive) { if (!ID || IncompleteUsedCount) - return; // No key or it is is an incomplete sub-type so don't add. + return; // No key or it is an incomplete sub-type so don't add. Entry &E = Map[ID]; if (IsRecursive && !E.Str.empty()) { assert(E.State==Recursive && E.Str.size() == Str.size() && @@ -10907,11 +11008,6 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { } } - // We must track the number of GPRs used in order to conform to the RISC-V - // ABI, as integer scalars passed in registers should have signext/zeroext - // when promoted, but are anyext if passed on the stack. As GPR usage is - // different for variadic arguments, we must also track whether we are - // examining a vararg or not. int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; int ArgFPRsLeft = FLen ? NumArgFPRs : 0; int NumFixedArgs = FI.getNumRequiredArgs(); @@ -11001,9 +11097,22 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, // Unions aren't eligible unless they're empty (which is caught above). if (RD->isUnion()) return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl()); + CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); + bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, + Field1Ty, Field1Off, Field2Ty, + Field2Off); + if (!Ret) + return false; + } + } int ZeroWidthBitFieldCount = 0; for (const FieldDecl *FD : RD->fields()) { - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); QualType QTy = FD->getType(); if (FD->isBitField()) { @@ -11090,7 +11199,7 @@ ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( } CharUnits Field2Align = - CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty)); CharUnits Field1End = Field1Off + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); @@ -11176,7 +11285,6 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, } uint64_t NeededAlign = getContext().getTypeAlign(Ty); - bool MustUseStack = false; // Determine the number of GPRs needed to pass the current argument // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" // register pairs, so may consume 3 registers. @@ -11187,7 +11295,6 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, NeededArgGPRs = 2; if (NeededArgGPRs > ArgGPRsLeft) { - MustUseStack = true; NeededArgGPRs = ArgGPRsLeft; } @@ -11198,14 +11305,13 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - // All integral types are promoted to XLen width, unless passed on the - // stack. - if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) { + // All integral types are promoted to XLen width + if (Size < XLen && Ty->isIntegralOrEnumerationType()) { return extendType(Ty); } if (const auto *EIT = Ty->getAs<BitIntType>()) { - if (EIT->getNumBits() < XLen && !MustUseStack) + if (EIT->getNumBits() < XLen) return extendType(Ty); if (EIT->getNumBits() > 128 || (!getContext().getTargetInfo().hasInt128Type() && @@ -11522,6 +11628,524 @@ public: } // end anonymous namespace //===----------------------------------------------------------------------===// +// BPF ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class BPFABIInfo : public DefaultABIInfo { +public: + BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + ABIArgInfo classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + uint64_t Bits = getContext().getTypeSize(Ty); + if (Bits == 0) + return ABIArgInfo::getIgnore(); + + // If the aggregate needs 1 or 2 registers, do not use reference. + if (Bits <= 128) { + llvm::Type *CoerceTy; + if (Bits <= 64) { + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); + } else { + llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64); + CoerceTy = llvm::ArrayType::get(RegTy, 2); + } + return ABIArgInfo::getDirect(CoerceTy); + } else { + return getNaturalAlignIndirect(Ty); + } + } + + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + ASTContext &Context = getContext(); + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); + } + + ABIArgInfo classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + ASTContext &Context = getContext(); + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) + return getNaturalAlignIndirect(RetTy); + + // Caller will do necessary sign/zero extension. + return ABIArgInfo::getDirect(); + } + + void computeInfo(CGFunctionInfo &FI) const override { + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + +}; + +class BPFTargetCodeGenInfo : public TargetCodeGenInfo { +public: + BPFTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<BPFABIInfo>(CGT)) {} + + const BPFABIInfo &getABIInfo() const { + return static_cast<const BPFABIInfo&>(TargetCodeGenInfo::getABIInfo()); + } +}; + +} + +// LoongArch ABI Implementation. Documented at +// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html +// +//===----------------------------------------------------------------------===// + +namespace { +class LoongArchABIInfo : public DefaultABIInfo { +private: + // Size of the integer ('r') registers in bits. + unsigned GRLen; + // Size of the floating point ('f') registers in bits. + unsigned FRLen; + // Number of general-purpose argument registers. + static const int NumGARs = 8; + // Number of floating-point argument registers. + static const int NumFARs = 8; + bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; + +public: + LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) + : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} + + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft, + int &FARsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; + + bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; +}; +} // end anonymous namespace + +void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect, or if the type size is a scalar greater than 2*GRLen + // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct + // in LLVM IR, relying on the backend lowering code to rewrite the argument + // list and pass indirectly on LA32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + if (!IsRetIndirect && RetTy->isScalarType() && + getContext().getTypeSize(RetTy) > (2 * GRLen)) { + if (RetTy->isComplexType() && FRLen) { + QualType EltTy = RetTy->castAs<ComplexType>()->getElementType(); + IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen; + } else { + // This is a normal scalar > 2*GRLen, such as fp128 on LA32. + IsRetIndirect = true; + } + } + + // We must track the number of GARs and FARs used in order to conform to the + // LoongArch ABI. As GAR usage is different for variadic arguments, we must + // also track whether we are examining a vararg or not. + int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs; + int FARsLeft = FRLen ? NumFARs : 0; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + ArgInfo.info = classifyArgumentType( + ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft); + ArgNum++; + } +} + +// Returns true if the struct is a potential candidate to be passed in FARs (and +// GARs). If this function returns true, the caller is responsible for checking +// that if there is only a single field then that field is a float. +bool LoongArchABIInfo::detectFARsEligibleStructHelper( + QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, + llvm::Type *&Field2Ty, CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > GRLen) + return false; + // Can't be eligible if larger than the FP registers. Half precision isn't + // currently supported on LoongArch and the ABI hasn't been confirmed, so + // default to the integer ABI in that case. + if (IsFloat && (Size > FRLen || Size < 32)) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs<ComplexType>()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FRLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, + Field2Ty, Field2Off)) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs<RecordType>()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + if (isEmptyRecord(getContext(), Ty, true)) + return true; + const RecordDecl *RD = RTy->getDecl(); + // Unions aren't eligible unless they're empty (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl()); + if (!detectFARsEligibleStructHelper( + B.getType(), CurOff + Layout.getBaseClassOffset(BDecl), + Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + } + } + for (const FieldDecl *FD : RD->fields()) { + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Zero-width bitfields are ignored. + if (BitWidth == 0) + continue; + // Allow a bitfield with a type greater than GRLen as long as the + // bitwidth is GRLen or less. + if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) { + QTy = getContext().getIntTypeForBitwidth(GRLen, false); + } + } + + if (!detectFARsEligibleStructHelper( + QTy, + CurOff + getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FD->getFieldIndex())), + Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when +// flattened it contains a single fp value, fp+fp, or int+fp of appropriate +// size). If so, NeededFARs and NeededGARs are incremented appropriately. +bool LoongArchABIInfo::detectFARsEligibleStruct( + QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, + llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs, + int &NeededFARs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededGARs = 0; + NeededFARs = 0; + if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, + Field1Off, Field2Ty, Field2Off)) + return false; + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededFARs++; + else if (Field1Ty) + NeededGARs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededFARs++; + else if (Field2Ty) + NeededGARs++; + return true; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector<llvm::Type *, 3> CoerceElts; + SmallVector<llvm::Type *, 2> UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty)); + CharUnits Field1End = + Field1Off + + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1End) + Padding = Field2Off - Field1End; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked), + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked)); +} + +ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &GARsLeft, + int &FARsLeft) const { + assert(GARsLeft <= NumGARs && "GAR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (GARsLeft) + GARsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + + // Pass floating point values via FARs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && FARsLeft) { + FARsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FRLen) { + FARsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (IsFixed && FRLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededGARs = 0; + int NeededFARs = 0; + bool IsCandidate = detectFARsEligibleStruct( + Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs); + if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) { + GARsLeft -= NeededGARs; + FARsLeft -= NeededFARs; + return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } + + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + // Determine the number of GARs needed to pass the current argument + // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededGARs = 1; + if (!IsFixed && NeededAlign == 2 * GRLen) + NeededGARs = 2 + (GARsLeft % 2); + else if (Size > GRLen && Size <= 2 * GRLen) + NeededGARs = 2; + + if (NeededGARs > GARsLeft) + NeededGARs = GARsLeft; + + GARsLeft -= NeededGARs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to GRLen width. + if (Size < GRLen && Ty->isIntegralOrEnumerationType()) + return extendType(Ty); + + if (const auto *EIT = Ty->getAs<BitIntType>()) { + if (EIT->getNumBits() < GRLen) + return extendType(Ty); + if (EIT->getNumBits() > 128 || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } + + return ABIArgInfo::getDirect(); + } + + // Aggregates which are <= 2*GRLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * GRLen) { + // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is + // required, and a 2-element GRLen array if only GRLen alignment is + // required. + if (Size <= GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), GRLen)); + } + if (getContext().getTypeAlign(Ty) == 2 * GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * GRLen)); + } + return ABIArgInfo::getDirect( + llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2)); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + int GARsLeft = 2; + int FARsLeft = FRLen ? 2 : 0; + return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft); +} + +Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), + getVAListElementType(CGF), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*GRLen bytes are passed indirectly. + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, + /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo, + SlotSize, + /*AllowHigherAlign=*/true); +} + +ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // LA64 ABI requires unsigned 32 bit integers to be sign extended. + if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +} + +namespace { +class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, + unsigned FRLen) + : TargetCodeGenInfo( + std::make_unique<LoongArchABIInfo>(CGT, GRLen, FRLen)) {} +}; +} // namespace + +//===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// @@ -11666,7 +12290,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ABIStr = getTarget().getABI(); - unsigned XLen = getTarget().getPointerWidth(0); + unsigned XLen = getTarget().getPointerWidth(LangAS::Default); unsigned ABIFLen = 0; if (ABIStr.endswith("f")) ABIFLen = 32; @@ -11749,6 +12373,20 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { : hasFP64 ? 64 : 32)); } + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + return SetCGInfo(new BPFTargetCodeGenInfo(Types)); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { + StringRef ABIStr = getTarget().getABI(); + unsigned ABIFRLen = 0; + if (ABIStr.endswith("f")) + ABIFRLen = 32; + else if (ABIStr.endswith("d")) + ABIFRLen = 64; + return SetCGInfo(new LoongArchTargetCodeGenInfo( + Types, getTarget().getPointerWidth(LangAS::Default), ABIFRLen)); + } } } @@ -11835,8 +12473,8 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); llvm::SmallVector<llvm::Value *, 2> Args; Args.push_back(Cast); - for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I) - Args.push_back(I); + for (llvm::Argument &A : llvm::drop_begin(F->args())) + Args.push_back(&A); llvm::CallInst *call = Builder.CreateCall(Invoke, Args); call->setCallingConv(Invoke->getCallingConv()); Builder.CreateRetVoid(); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 30421612015b..c7c1ec7fce7e 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -38,6 +38,7 @@ class ABIInfo; class CallArgList; class CodeGenFunction; class CGBlockInfo; +class SwiftABIInfo; /// TargetCodeGenInfo - This class organizes various target-specific /// codegeneration issues, like target-specific attributes, builtins and so @@ -45,6 +46,12 @@ class CGBlockInfo; class TargetCodeGenInfo { std::unique_ptr<ABIInfo> Info; +protected: + // Target hooks supporting Swift calling conventions. The target must + // initialize this field if it claims to support these calling conventions + // by returning true from TargetInfo::checkCallingConvention for them. + std::unique_ptr<SwiftABIInfo> SwiftInfo; + public: TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info); virtual ~TargetCodeGenInfo(); @@ -52,6 +59,12 @@ public: /// getABIInfo() - Returns ABI info helper for the target. const ABIInfo &getABIInfo() const { return *Info; } + /// Returns Swift ABI info helper for the target. + const SwiftABIInfo &getSwiftABIInfo() const { + assert(SwiftInfo && "Swift ABI info has not been initialized"); + return *SwiftInfo; + } + /// setTargetAttributes - Provides a convenient hook to handle extra /// target-specific attributes for the given global. virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, diff --git a/clang/lib/CodeGen/VarBypassDetector.cpp b/clang/lib/CodeGen/VarBypassDetector.cpp index e8717a61ce5e..6eda83dfdef2 100644 --- a/clang/lib/CodeGen/VarBypassDetector.cpp +++ b/clang/lib/CodeGen/VarBypassDetector.cpp @@ -77,7 +77,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S, return false; ++StmtsToSkip; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case Stmt::GotoStmtClass: FromScopes.push_back({S, ParentScope}); |
