diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen')
49 files changed, 3143 insertions, 1519 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp index 481f5347d978..648c7b3df8ed 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp @@ -38,6 +38,7 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" @@ -45,7 +46,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" @@ -147,6 +147,14 @@ class EmitAssemblyHelper { return F; } + void + RunOptimizationPipeline(BackendAction Action, + std::unique_ptr<raw_pwrite_stream> &OS, + std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS); + void RunCodegenPipeline(BackendAction Action, + std::unique_ptr<raw_pwrite_stream> &OS, + std::unique_ptr<llvm::ToolOutputFile> &DwoOS); + public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, const HeaderSearchOptions &HeaderSearchOpts, @@ -164,11 +172,16 @@ public: std::unique_ptr<TargetMachine> TM; + // Emit output using the legacy pass manager for the optimization pipeline. + // This will be removed soon when using the legacy pass manager for the + // optimization pipeline is no longer supported. + void EmitAssemblyWithLegacyPassManager(BackendAction Action, + std::unique_ptr<raw_pwrite_stream> OS); + + // Emit output using the new pass manager for the optimization pipeline. This + // is the default. void EmitAssembly(BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS); - - void EmitAssemblyWithNewPassManager(BackendAction Action, - std::unique_ptr<raw_pwrite_stream> OS); }; // We need this wrapper to access LangOpts and CGOpts from extension functions @@ -234,6 +247,8 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { Opts.InlineBoolFlag = CGOpts.SanitizeCoverageInlineBoolFlag; Opts.PCTable = CGOpts.SanitizeCoveragePCTable; Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; + Opts.TraceLoads = CGOpts.SanitizeCoverageTraceLoads; + Opts.TraceStores = CGOpts.SanitizeCoverageTraceStores; return Opts; } @@ -474,6 +489,11 @@ static CodeGenFileType getCodeGenFileType(BackendAction Action) { } } +static bool actionRequiresCodeGen(BackendAction Action) { + return Action != Backend_EmitNothing && Action != Backend_EmitBC && + Action != Backend_EmitLL; +} + static bool initTargetOptions(DiagnosticsEngine &Diags, llvm::TargetOptions &Options, const CodeGenOptions &CodeGenOpts, @@ -539,6 +559,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.NoNaNsFPMath = LangOpts.NoHonorNaNs; Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; Options.UnsafeFPMath = LangOpts.UnsafeFPMath; + Options.ApproxFuncFPMath = LangOpts.ApproxFunc; Options.BBSections = llvm::StringSwitch<llvm::BasicBlockSection>(CodeGenOpts.BBSections) @@ -576,10 +597,25 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI; - Options.PseudoProbeForProfiling = CodeGenOpts.PseudoProbeForProfiling; Options.ValueTrackingVariableLocations = CodeGenOpts.ValueTrackingVariableLocations; Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; + Options.LoopAlignment = CodeGenOpts.LoopAlignment; + + switch (CodeGenOpts.getSwiftAsyncFramePointer()) { + case CodeGenOptions::SwiftAsyncFramePointerKind::Auto: + Options.SwiftAsyncFramePointer = + SwiftAsyncFramePointerMode::DeploymentBased; + break; + + case CodeGenOptions::SwiftAsyncFramePointerKind::Always: + Options.SwiftAsyncFramePointer = SwiftAsyncFramePointerMode::Always; + break; + + case CodeGenOptions::SwiftAsyncFramePointerKind::Never: + Options.SwiftAsyncFramePointer = SwiftAsyncFramePointerMode::Never; + break; + } Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; @@ -942,15 +978,13 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, return true; } -void EmitAssemblyHelper::EmitAssembly(BackendAction Action, - std::unique_ptr<raw_pwrite_stream> OS) { +void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager( + BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); - bool UsesCodeGen = (Action != Backend_EmitNothing && - Action != Backend_EmitBC && - Action != Backend_EmitLL); + bool UsesCodeGen = actionRequiresCodeGen(Action); CreateTargetMachine(UsesCodeGen); if (UsesCodeGen && !TM) @@ -977,6 +1011,12 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, CreatePasses(PerModulePasses, PerFunctionPasses); + // Add a verifier pass if requested. We don't have to do this if the action + // requires code generation because there will already be a verifier pass in + // the code-generation pipeline. + if (!UsesCodeGen && CodeGenOpts.VerifyModule) + PerModulePasses.add(createVerifierPass()); + legacy::PassManager CodeGenPasses; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); @@ -1069,16 +1109,16 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, DwoOS->keep(); } -static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { +static OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { switch (Opts.OptimizationLevel) { default: llvm_unreachable("Invalid optimization level!"); case 0: - return PassBuilder::OptimizationLevel::O0; + return OptimizationLevel::O0; case 1: - return PassBuilder::OptimizationLevel::O1; + return OptimizationLevel::O1; case 2: switch (Opts.OptimizeSize) { @@ -1086,17 +1126,17 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { llvm_unreachable("Invalid optimization level for size!"); case 0: - return PassBuilder::OptimizationLevel::O2; + return OptimizationLevel::O2; case 1: - return PassBuilder::OptimizationLevel::Os; + return OptimizationLevel::Os; case 2: - return PassBuilder::OptimizationLevel::Oz; + return OptimizationLevel::Oz; } case 3: - return PassBuilder::OptimizationLevel::O3; + return OptimizationLevel::O3; } } @@ -1104,7 +1144,7 @@ static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM, - PassBuilder::OptimizationLevel Level) { + OptimizationLevel Level) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); MPM.addPass(ModuleSanitizerCoveragePass( @@ -1118,11 +1158,11 @@ static void addSanitizers(const Triple &TargetTriple, bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); MPM.addPass( - MemorySanitizerPass({TrackOrigins, Recover, CompileKernel})); + ModuleMemorySanitizerPass({TrackOrigins, Recover, CompileKernel})); FunctionPassManager FPM; FPM.addPass( MemorySanitizerPass({TrackOrigins, Recover, CompileKernel})); - if (Level != PassBuilder::OptimizationLevel::O0) { + if (Level != OptimizationLevel::O0) { // MemorySanitizer inserts complex instrumentation that mostly // follows the logic of the original code, but operates on // "shadow" values. It can benefit from re-running some @@ -1141,26 +1181,24 @@ static void addSanitizers(const Triple &TargetTriple, MSanPass(SanitizerKind::KernelMemory, true); if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { - MPM.addPass(ThreadSanitizerPass()); + MPM.addPass(ModuleThreadSanitizerPass()); MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); } auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) { if (LangOpts.Sanitize.has(Mask)) { - bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); - bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; - bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts); bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; llvm::AsanDtorKind DestructorKind = CodeGenOpts.getSanitizeAddressDtor(); - llvm::AsanDetectStackUseAfterReturnMode UseAfterReturn = - CodeGenOpts.getSanitizeAddressUseAfterReturn(); + AddressSanitizerOptions Opts; + Opts.CompileKernel = CompileKernel; + Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask); + Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); MPM.addPass(ModuleAddressSanitizerPass( - CompileKernel, Recover, ModuleUseAfterScope, UseOdrIndicator, - DestructorKind)); - MPM.addPass(createModuleToFunctionPassAdaptor(AddressSanitizerPass( - CompileKernel, Recover, UseAfterScope, UseAfterReturn))); + Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); } }; ASanPass(SanitizerKind::Address, false); @@ -1170,8 +1208,8 @@ static void addSanitizers(const Triple &TargetTriple, if (LangOpts.Sanitize.has(Mask)) { bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); MPM.addPass(HWAddressSanitizerPass( - CompileKernel, Recover, - /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0)); + {CompileKernel, Recover, + /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0})); } }; HWASanPass(SanitizerKind::HWAddress, false); @@ -1183,29 +1221,9 @@ static void addSanitizers(const Triple &TargetTriple, }); } -/// A clean version of `EmitAssembly` that uses the new pass manager. -/// -/// Not all features are currently supported in this system, but where -/// necessary it falls back to the legacy pass manager to at least provide -/// basic functionality. -/// -/// This API is planned to have its functionality finished and then to replace -/// `EmitAssembly` at some point in the future when the default switches. -void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( - BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { - TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr); - setCommandLineOpts(CodeGenOpts); - - bool RequiresCodeGen = (Action != Backend_EmitNothing && - Action != Backend_EmitBC && - Action != Backend_EmitLL); - CreateTargetMachine(RequiresCodeGen); - - if (RequiresCodeGen && !TM) - return; - if (TM) - TheModule->setDataLayout(TM->createDataLayout()); - +void EmitAssemblyHelper::RunOptimizationPipeline( + BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS, + std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS) { Optional<PGOOptions> PGOOpt; if (CodeGenOpts.hasProfileIRInstr()) @@ -1260,6 +1278,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( "", PGOOptions::NoAction, PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling); } + if (TM) + TM->setPGOOption(PGOOpt); PipelineTuningOptions PTO; PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; @@ -1303,9 +1323,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); #include "llvm/Support/Extension.def" - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); }); - // Register the target library analysis directly and give it a customized // preset TLI. Triple TargetTriple(TheModule->getTargetTriple()); @@ -1325,26 +1342,26 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (!CodeGenOpts.DisableLLVMPasses) { // Map our optimization levels into one of the distinct levels used to // configure the pipeline. - PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + OptimizationLevel Level = mapToLevel(CodeGenOpts); bool IsThinLTO = CodeGenOpts.PrepareForThinLTO; bool IsLTO = CodeGenOpts.PrepareForLTO; if (LangOpts.ObjCAutoRefCount) { PB.registerPipelineStartEPCallback( - [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { - if (Level != PassBuilder::OptimizationLevel::O0) + [](ModulePassManager &MPM, OptimizationLevel Level) { + if (Level != OptimizationLevel::O0) MPM.addPass( createModuleToFunctionPassAdaptor(ObjCARCExpandPass())); }); PB.registerPipelineEarlySimplificationEPCallback( - [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { - if (Level != PassBuilder::OptimizationLevel::O0) + [](ModulePassManager &MPM, OptimizationLevel Level) { + if (Level != OptimizationLevel::O0) MPM.addPass(ObjCARCAPElimPass()); }); PB.registerScalarOptimizerLateEPCallback( - [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { - if (Level != PassBuilder::OptimizationLevel::O0) + [](FunctionPassManager &FPM, OptimizationLevel Level) { + if (Level != OptimizationLevel::O0) FPM.addPass(ObjCARCOptPass()); }); } @@ -1357,7 +1374,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // vtables so that codegen doesn't complain. if (IsThinLTOPostLink) PB.registerPipelineStartEPCallback( - [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + [](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, /*ImportSummary=*/nullptr, /*DropTypeTests=*/true)); @@ -1368,12 +1385,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( CodeGenOpts.InstrumentFunctionsAfterInlining || CodeGenOpts.InstrumentForProfiling) { PB.registerPipelineStartEPCallback( - [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + [](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(createModuleToFunctionPassAdaptor( EntryExitInstrumenterPass(/*PostInlining=*/false))); }); PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + [](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(createModuleToFunctionPassAdaptor( EntryExitInstrumenterPass(/*PostInlining=*/true))); }); @@ -1383,7 +1400,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // of the pipeline. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) PB.registerScalarOptimizerLateEPCallback( - [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + [](FunctionPassManager &FPM, OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); @@ -1394,15 +1411,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( - [Options](ModulePassManager &MPM, - PassBuilder::OptimizationLevel Level) { + [Options](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(GCOVProfilerPass(*Options)); }); if (Optional<InstrProfOptions> Options = getInstrProfOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( - [Options](ModulePassManager &MPM, - PassBuilder::OptimizationLevel Level) { + [Options](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(InstrProfiling(*Options, false)); }); @@ -1422,17 +1437,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } - // FIXME: We still use the legacy pass manager to do code generation. We - // create that pass manager here and use it as needed below. - legacy::PassManager CodeGenPasses; - bool NeedCodeGen = false; - std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS; + // Add a verifier pass if requested. We don't have to do this if the action + // requires code generation because there will already be a verifier pass in + // the code-generation pipeline. + if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) + MPM.addPass(VerifierPass()); - // Append any output we need to the pass manager. switch (Action) { - case Backend_EmitNothing: - break; - case Backend_EmitBC: if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { @@ -1448,8 +1459,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // Emit a module summary by default for Regular LTO except for ld64 // targets bool EmitLTOSummary = - (CodeGenOpts.PrepareForLTO && - !CodeGenOpts.DisableLLVMPasses && + (CodeGenOpts.PrepareForLTO && !CodeGenOpts.DisableLLVMPasses && llvm::Triple(TheModule->getTargetTriple()).getVendor() != llvm::Triple::Apple); if (EmitLTOSummary) { @@ -1467,10 +1477,28 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists)); break; + default: + break; + } + + // Now that we have all of the passes ready, run them. + PrettyStackTraceString CrashInfo("Optimizer"); + MPM.run(*TheModule, MAM); +} + +void EmitAssemblyHelper::RunCodegenPipeline( + BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS, + std::unique_ptr<llvm::ToolOutputFile> &DwoOS) { + // We still use the legacy PM to run the codegen pipeline since the new PM + // does not work with the codegen pipeline. + // FIXME: make the new PM work with the codegen pipeline. + legacy::PassManager CodeGenPasses; + + // Append any output we need to the pass manager. + switch (Action) { case Backend_EmitAssembly: case Backend_EmitMCNull: case Backend_EmitObj: - NeedCodeGen = true; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); if (!CodeGenOpts.SplitDwarfOutput.empty()) { @@ -1483,22 +1511,41 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // FIXME: Should we handle this error differently? return; break; + default: + return; } + PrettyStackTraceString CrashInfo("Code generation"); + CodeGenPasses.run(*TheModule); +} + +/// A clean version of `EmitAssembly` that uses the new pass manager. +/// +/// Not all features are currently supported in this system, but where +/// necessary it falls back to the legacy pass manager to at least provide +/// basic functionality. +/// +/// This API is planned to have its functionality finished and then to replace +/// `EmitAssembly` at some point in the future when the default switches. +void EmitAssemblyHelper::EmitAssembly(BackendAction Action, + std::unique_ptr<raw_pwrite_stream> OS) { + TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr); + setCommandLineOpts(CodeGenOpts); + + bool RequiresCodeGen = actionRequiresCodeGen(Action); + CreateTargetMachine(RequiresCodeGen); + + if (RequiresCodeGen && !TM) + return; + if (TM) + TheModule->setDataLayout(TM->createDataLayout()); + // Before executing passes, print the final values of the LLVM options. cl::PrintOptionValues(); - // Now that we have all of the passes ready, run them. - { - PrettyStackTraceString CrashInfo("Optimizer"); - MPM.run(*TheModule, MAM); - } - - // Now if needed, run the legacy PM for codegen. - if (NeedCodeGen) { - PrettyStackTraceString CrashInfo("Code generation"); - CodeGenPasses.run(*TheModule); - } + std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS; + RunOptimizationPipeline(Action, OS, ThinLinkOS); + RunCodegenPipeline(Action, OS, DwoOS); if (ThinLinkOS) ThinLinkOS->keep(); @@ -1526,7 +1573,7 @@ static void runThinLTOBackend( return; auto AddStream = [&](size_t Task) { - return std::make_unique<lto::NativeObjectStream>(std::move(OS)); + return std::make_unique<CachedFileStream>(std::move(OS)); }; lto::Config Conf; if (CGOpts.SaveTempsFilePrefix != "") { @@ -1622,16 +1669,17 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // If we are performing a ThinLTO importing compile, load the function index // into memory and pass it into runThinLTOBackend, which will run the // function importer and invoke LTO passes. - Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr = - llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile, - /*IgnoreEmptyThinLTOIndexFile*/true); - if (!IndexOrErr) { - logAllUnhandledErrors(IndexOrErr.takeError(), errs(), + std::unique_ptr<ModuleSummaryIndex> CombinedIndex; + if (Error E = llvm::getModuleSummaryIndexForFile( + CGOpts.ThinLTOIndexFile, + /*IgnoreEmptyThinLTOIndexFile*/ true) + .moveInto(CombinedIndex)) { + logAllUnhandledErrors(std::move(E), errs(), "Error loading index file '" + CGOpts.ThinLTOIndexFile + "': "); return; } - std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr); + // A null CombinedIndex means we should skip ThinLTO compilation // (LLVM will optionally ignore empty index files, returning null instead // of an error). @@ -1656,8 +1704,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M); - if (!CGOpts.LegacyPassManager) - AsmHelper.EmitAssemblyWithNewPassManager(Action, std::move(OS)); + if (CGOpts.LegacyPassManager) + AsmHelper.EmitAssemblyWithLegacyPassManager(Action, std::move(OS)); else AsmHelper.EmitAssembly(Action, std::move(OS)); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp index b6722ad4e4f1..326ca8d50533 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp @@ -664,6 +664,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__atomic_nand_fetch: PostOp = llvm::Instruction::And; // the NOT is special cased below LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: Op = llvm::AtomicRMWInst::Nand; break; @@ -906,6 +907,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_and: @@ -972,6 +974,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: @@ -1211,6 +1214,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_nand_fetch: PostOp = llvm::Instruction::And; // the NOT is special cased below LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: LibCallName = "__atomic_fetch_nand"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp index f39a56f81d41..2da2014345d8 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp @@ -2695,8 +2695,8 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { size = varOffset; // Conversely, we might have to prevent LLVM from inserting padding. - } else if (CGM.getDataLayout().getABITypeAlignment(varTy) - > varAlign.getQuantity()) { + } else if (CGM.getDataLayout().getABITypeAlignment(varTy) > + uint64_t(varAlign.getQuantity())) { packed = true; } types.push_back(varTy); @@ -2910,8 +2910,8 @@ llvm::Constant *CodeGenModule::getNSConcreteGlobalBlock() { if (NSConcreteGlobalBlock) return NSConcreteGlobalBlock; - NSConcreteGlobalBlock = - GetOrCreateLLVMGlobal("_NSConcreteGlobalBlock", Int8PtrTy, 0, nullptr); + NSConcreteGlobalBlock = GetOrCreateLLVMGlobal( + "_NSConcreteGlobalBlock", Int8PtrTy, LangAS::Default, nullptr); configureBlocksRuntimeObject(*this, NSConcreteGlobalBlock); return NSConcreteGlobalBlock; } @@ -2920,8 +2920,8 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() { if (NSConcreteStackBlock) return NSConcreteStackBlock; - NSConcreteStackBlock = - GetOrCreateLLVMGlobal("_NSConcreteStackBlock", Int8PtrTy, 0, nullptr); + NSConcreteStackBlock = GetOrCreateLLVMGlobal( + "_NSConcreteStackBlock", Int8PtrTy, LangAS::Default, nullptr); configureBlocksRuntimeObject(*this, NSConcreteStackBlock); return NSConcreteStackBlock; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp index 1a02965b223e..849423c8b9ba 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp @@ -3101,6 +3101,88 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(V); } + case Builtin::BI__builtin_elementwise_abs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result; + if (Op0->getType()->isIntOrIntVectorTy()) + Result = Builder.CreateBinaryIntrinsic( + llvm::Intrinsic::abs, Op0, Builder.getFalse(), nullptr, "elt.abs"); + else + Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::fabs, Op0, nullptr, + "elt.abs"); + return RValue::get(Result); + } + case Builtin::BI__builtin_elementwise_max: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result; + if (Op0->getType()->isIntOrIntVectorTy()) { + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() + ? llvm::Intrinsic::smax + : llvm::Intrinsic::umax, + Op0, Op1, nullptr, "elt.max"); + } else + Result = Builder.CreateMaxNum(Op0, Op1, "elt.max"); + return RValue::get(Result); + } + case Builtin::BI__builtin_elementwise_min: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result; + if (Op0->getType()->isIntOrIntVectorTy()) { + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() + ? llvm::Intrinsic::smin + : llvm::Intrinsic::umin, + Op0, Op1, nullptr, "elt.min"); + } else + Result = Builder.CreateMinNum(Op0, Op1, "elt.min"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_reduce_max: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs<VectorType>()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + else + return llvm::Intrinsic::vector_reduce_umax; + } + return llvm::Intrinsic::vector_reduce_fmax; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_reduce_min: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs<VectorType>()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + else + return llvm::Intrinsic::vector_reduce_umin; + } + return llvm::Intrinsic::vector_reduce_fmin; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>(); Value *MatValue = EmitScalarExpr(E->getArg(0)); @@ -5024,11 +5106,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } case Builtin::BIprintf: - if (getTarget().getTriple().isNVPTX()) - return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); - if (getTarget().getTriple().getArch() == Triple::amdgcn && - getLangOpts().HIP) - return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue); + if (getTarget().getTriple().isNVPTX() || + getTarget().getTriple().isAMDGCN()) { + if (getLangOpts().OpenMPIsDevice) + return EmitOpenMPDevicePrintfCallExpr(E); + if (getTarget().getTriple().isNVPTX()) + return EmitNVPTXDevicePrintfCallExpr(E); + if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP) + return EmitAMDGPUDevicePrintfCallExpr(E); + } + break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: @@ -8399,7 +8486,7 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { /// SVEBuiltinMemEltTy - Returns the memory element type for this memory /// access builtin. Only required if it can't be inferred from the base pointer /// operand. -llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) { +llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getMemEltType()) { case SVETypeFlags::MemEltTyDefault: return getEltType(TypeFlags); @@ -8415,7 +8502,7 @@ llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) { llvm_unreachable("Unknown MemEltType"); } -llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { +llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Invalid SVETypeFlag!"); @@ -8450,7 +8537,7 @@ llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { // Return the llvm predicate vector type corresponding to the specified element // TypeFlags. llvm::ScalableVectorType * -CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { +CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Unhandled SVETypeFlag!"); @@ -8519,7 +8606,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } -llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { +llvm::Value * +CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { Function *Ptrue = CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); @@ -8563,7 +8651,7 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, return C; } -Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { auto *ResultTy = getSVEType(TypeFlags); @@ -8615,7 +8703,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags, : Builder.CreateSExt(Call, ResultTy); } -Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { auto *SrcDataTy = getSVEType(TypeFlags); @@ -8670,7 +8758,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { // The gather prefetches are overloaded on the vector input - this can either @@ -8703,7 +8791,7 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value*> &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); @@ -8737,7 +8825,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags, return Builder.CreateCall(F, { Predicate, BasePtr }); } -Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value*> &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); @@ -8784,7 +8872,7 @@ Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags, // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and // svpmullt_pair intrinsics, with the exception that their results are bitcast // to a wider type. -Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned BuiltinID) { // Splat scalar operand to vector (intrinsics with _n infix) @@ -8802,14 +8890,14 @@ Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, return EmitSVEReinterpret(Call, Ty); } -Value *CodeGenFunction::EmitSVEMovl(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags, ArrayRef<Value *> Ops, unsigned BuiltinID) { llvm::Type *OverloadedTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); } -Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned BuiltinID) { auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); @@ -8918,8 +9006,10 @@ static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, Ops.insert(Ops.begin(), SplatUndef); } -SmallVector<llvm::Type *, 2> CodeGenFunction::getSVEOverloadTypes( - SVETypeFlags TypeFlags, llvm::Type *ResultType, ArrayRef<Value *> Ops) { +SmallVector<llvm::Type *, 2> +CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, + llvm::Type *ResultType, + ArrayRef<Value *> Ops) { if (TypeFlags.isOverloadNone()) return {}; @@ -12067,6 +12157,22 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, Intrinsic::ID IID = Intrinsic::not_intrinsic; switch (BuiltinID) { default: break; + case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddph512_mask: + case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: + IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512; + break; + case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: + IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512; + break; case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: Subtract = true; LLVM_FALLTHROUGH; @@ -12130,22 +12236,30 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, // Handle any required masking. Value *MaskFalseVal = nullptr; switch (BuiltinID) { + case clang::X86::BI__builtin_ia32_vfmaddph512_mask: case clang::X86::BI__builtin_ia32_vfmaddps512_mask: case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: MaskFalseVal = Ops[0]; break; + case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); break; + case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: @@ -12176,9 +12290,21 @@ static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0); Value *Res; if (Rnd != 4) { - Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ? - Intrinsic::x86_avx512_vfmadd_f32 : - Intrinsic::x86_avx512_vfmadd_f64; + Intrinsic::ID IID; + + switch (Ops[0]->getType()->getPrimitiveSizeInBits()) { + case 16: + IID = Intrinsic::x86_avx512fp16_vfmadd_f16; + break; + case 32: + IID = Intrinsic::x86_avx512_vfmadd_f32; + break; + case 64: + IID = Intrinsic::x86_avx512_vfmadd_f64; + break; + default: + llvm_unreachable("Unexpected size"); + } Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2], Ops[4]}); } else if (CGF.Builder.getIsFPConstrained()) { @@ -12385,23 +12511,8 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { return EmitX86CpuSupports(FeatureStr); } -uint64_t -CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { - // Processor features and mapping to processor feature value. - uint64_t FeaturesMask = 0; - for (const StringRef &FeatureStr : FeatureStrs) { - unsigned Feature = - StringSwitch<unsigned>(FeatureStr) -#define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, llvm::X86::FEATURE_##ENUM) -#include "llvm/Support/X86TargetParser.def" - ; - FeaturesMask |= (1ULL << Feature); - } - return FeaturesMask; -} - Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { - return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs)); + return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs)); } llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { @@ -12484,6 +12595,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, SmallVector<Value*, 4> Ops; bool IsMaskFCmp = false; + bool IsConjFMA = false; // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; @@ -12714,6 +12826,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storeups512_mask: return EmitX86MaskedStore(*this, Ops, Align(1)); + case X86::BI__builtin_ia32_storesh128_mask: case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: return EmitX86MaskedStore(*this, Ops, Align(1)); @@ -12765,14 +12878,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtw2ph512_mask: + case X86::BI__builtin_ia32_vcvtdq2ph512_mask: + case X86::BI__builtin_ia32_vcvtqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true); case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtuw2ph512_mask: + case X86::BI__builtin_ia32_vcvtudq2ph512_mask: + case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: + case X86::BI__builtin_ia32_vfmaddsh3_mask: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: return EmitScalarFMAExpr(*this, E, Ops, Ops[0]); @@ -12780,20 +12900,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmaddsd: return EmitScalarFMAExpr(*this, E, Ops, Constant::getNullValue(Ops[0]->getType())); + case X86::BI__builtin_ia32_vfmaddsh3_maskz: case X86::BI__builtin_ia32_vfmaddss3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_maskz: return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true); + case X86::BI__builtin_ia32_vfmaddsh3_mask3: case X86::BI__builtin_ia32_vfmaddss3_mask3: case X86::BI__builtin_ia32_vfmaddsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2); + case X86::BI__builtin_ia32_vfmsubsh3_mask3: case X86::BI__builtin_ia32_vfmsubss3_mask3: case X86::BI__builtin_ia32_vfmsubsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, /*NegAcc*/ true); + case X86::BI__builtin_ia32_vfmaddph: case X86::BI__builtin_ia32_vfmaddps: case X86::BI__builtin_ia32_vfmaddpd: + case X86::BI__builtin_ia32_vfmaddph256: case X86::BI__builtin_ia32_vfmaddps256: case X86::BI__builtin_ia32_vfmaddpd256: + case X86::BI__builtin_ia32_vfmaddph512_mask: + case X86::BI__builtin_ia32_vfmaddph512_maskz: + case X86::BI__builtin_ia32_vfmaddph512_mask3: case X86::BI__builtin_ia32_vfmaddps512_mask: case X86::BI__builtin_ia32_vfmaddps512_maskz: case X86::BI__builtin_ia32_vfmaddps512_mask3: @@ -12802,7 +12930,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmaddpd512_maskz: case X86::BI__builtin_ia32_vfmaddpd512_mask3: case X86::BI__builtin_ia32_vfmsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubph512_mask3: return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false); + case X86::BI__builtin_ia32_vfmaddsubph512_mask: + case X86::BI__builtin_ia32_vfmaddsubph512_maskz: + case X86::BI__builtin_ia32_vfmaddsubph512_mask3: + case X86::BI__builtin_ia32_vfmsubaddph512_mask3: case X86::BI__builtin_ia32_vfmaddsubps512_mask: case X86::BI__builtin_ia32_vfmaddsubps512_maskz: case X86::BI__builtin_ia32_vfmaddsubps512_mask3: @@ -12849,6 +12982,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_loaddqudi512_mask: return EmitX86MaskedLoad(*this, Ops, Align(1)); + case X86::BI__builtin_ia32_loadsh128_mask: case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: return EmitX86MaskedLoad(*this, Ops, Align(1)); @@ -13728,6 +13862,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_selectq_128: case X86::BI__builtin_ia32_selectq_256: case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectph_128: + case X86::BI__builtin_ia32_selectph_256: + case X86::BI__builtin_ia32_selectph_512: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: @@ -13735,6 +13872,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_selectpd_256: case X86::BI__builtin_ia32_selectpd_512: return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); + case X86::BI__builtin_ia32_selectsh_128: case X86::BI__builtin_ia32_selectss_128: case X86::BI__builtin_ia32_selectsd_128: { Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); @@ -13967,15 +14105,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } + case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: { unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. if (CC != 4) { - Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ? - Intrinsic::x86_avx512_mask_sqrt_sd : - Intrinsic::x86_avx512_mask_sqrt_ss; + Intrinsic::ID IID; + + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_sqrtsh_round_mask: + IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh; + break; + case X86::BI__builtin_ia32_sqrtsd_round_mask: + IID = Intrinsic::x86_avx512_mask_sqrt_sd; + break; + case X86::BI__builtin_ia32_sqrtss_round_mask: + IID = Intrinsic::x86_avx512_mask_sqrt_ss; + break; + } return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); @@ -13997,6 +14148,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtph256: + case X86::BI__builtin_ia32_sqrtph: + case X86::BI__builtin_ia32_sqrtph512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { if (Ops.size() == 2) { @@ -14004,9 +14158,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. if (CC != 4) { - Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ? - Intrinsic::x86_avx512_sqrt_ps_512 : - Intrinsic::x86_avx512_sqrt_pd_512; + Intrinsic::ID IID; + + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_sqrtph512: + IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; + break; + case X86::BI__builtin_ia32_sqrtps512: + IID = Intrinsic::x86_avx512_sqrt_ps_512; + break; + case X86::BI__builtin_ia32_sqrtpd512: + IID = Intrinsic::x86_avx512_sqrt_pd_512; + break; + } return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } } @@ -14174,28 +14340,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0]}); } case X86::BI__builtin_ia32_reduce_fadd_pd512: - case X86::BI__builtin_ia32_reduce_fadd_ps512: { + case X86::BI__builtin_ia32_reduce_fadd_ps512: + case X86::BI__builtin_ia32_reduce_fadd_ph512: + case X86::BI__builtin_ia32_reduce_fadd_ph256: + case X86::BI__builtin_ia32_reduce_fadd_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmul_pd512: - case X86::BI__builtin_ia32_reduce_fmul_ps512: { + case X86::BI__builtin_ia32_reduce_fmul_ps512: + case X86::BI__builtin_ia32_reduce_fmul_ph512: + case X86::BI__builtin_ia32_reduce_fmul_ph256: + case X86::BI__builtin_ia32_reduce_fmul_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmax_pd512: - case X86::BI__builtin_ia32_reduce_fmax_ps512: { + case X86::BI__builtin_ia32_reduce_fmax_ps512: + case X86::BI__builtin_ia32_reduce_fmax_ph512: + case X86::BI__builtin_ia32_reduce_fmax_ph256: + case X86::BI__builtin_ia32_reduce_fmax_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } case X86::BI__builtin_ia32_reduce_fmin_pd512: - case X86::BI__builtin_ia32_reduce_fmin_ps512: { + case X86::BI__builtin_ia32_reduce_fmin_ps512: + case X86::BI__builtin_ia32_reduce_fmin_ph512: + case X86::BI__builtin_ia32_reduce_fmin_ph256: + case X86::BI__builtin_ia32_reduce_fmin_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); Builder.getFastMathFlags().setNoNaNs(); @@ -14311,6 +14489,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_fpclassph128_mask: + case X86::BI__builtin_ia32_fpclassph256_mask: + case X86::BI__builtin_ia32_fpclassph512_mask: case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: { @@ -14322,6 +14503,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_fpclassph128_mask: + ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; + break; + case X86::BI__builtin_ia32_fpclassph256_mask: + ID = Intrinsic::x86_avx512fp16_fpclass_ph_256; + break; + case X86::BI__builtin_ia32_fpclassph512_mask: + ID = Intrinsic::x86_avx512fp16_fpclass_ph_512; + break; case X86::BI__builtin_ia32_fpclassps128_mask: ID = Intrinsic::x86_avx512_fpclass_ps_128; break; @@ -14459,6 +14649,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); + case X86::BI__builtin_ia32_cmpph128_mask: + case X86::BI__builtin_ia32_cmpph256_mask: + case X86::BI__builtin_ia32_cmpph512_mask: case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: @@ -14800,7 +14993,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < 3; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16); Ptr = Builder.CreateBitCast( @@ -14816,7 +15009,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); - for (int i = 0; i < 7; ++i) { + for (int i = 0; i < 4; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16); Ptr = Builder.CreateBitCast( @@ -14941,6 +15134,36 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Builder.SetInsertPoint(End); return Builder.CreateExtractValue(Call, 0); } + case X86::BI__builtin_ia32_vfcmaddcph512_mask: + IsConjFMA = true; + LLVM_FALLTHROUGH; + case X86::BI__builtin_ia32_vfmaddcph512_mask: { + Intrinsic::ID IID = IsConjFMA + ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512 + : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512; + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + return EmitX86Select(*this, Ops[3], Call, Ops[0]); + } + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: + IsConjFMA = true; + LLVM_FALLTHROUGH; + case X86::BI__builtin_ia32_vfmaddcsh_round_mask: { + Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh + : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1)); + return EmitX86Select(*this, And, Call, Ops[0]); + } + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: + IsConjFMA = true; + LLVM_FALLTHROUGH; + case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: { + Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh + : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + static constexpr int Mask[] = {0, 5, 6, 7}; + return Builder.CreateShuffleVector(Call, Ops[2], Mask); + } } } @@ -14948,8 +15171,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { SmallVector<Value*, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + if (E->getArg(i)->getType()->isArrayType()) + Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); + else + Ops.push_back(EmitScalarExpr(E->getArg(i))); + } Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -15519,6 +15746,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt}); return Builder.CreateAnd(Rotate, Ops[2]); } + case PPC::BI__builtin_ppc_load2r: { + Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); + Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); + Value *LoadIntrinsic = Builder.CreateCall(F, Ops); + return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); + } // FMA variations case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: @@ -15774,6 +16007,17 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } return Call; } + if (BuiltinID == PPC::BI__builtin_vsx_build_pair || + BuiltinID == PPC::BI__builtin_mma_build_acc) { + // Reverse the order of the operands for LE, so the + // same builtin call can be used on both LE and BE + // without the need for the programmer to swap operands. + // The operands are reversed starting from the second argument, + // the first operand is the pointer to the pair/accumulator + // that is being built. + if (getTarget().isLittleEndian()) + std::reverse(Ops.begin() + 1, Ops.end()); + } bool Accumulate; switch (BuiltinID) { #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ @@ -15831,7 +16075,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // store. Value *LoadedVal = Pair.first.getScalarVal(); Builder.CreateStore(LoadedVal, OldValAddr); - return Pair.second; + return Builder.CreateZExt(Pair.second, Builder.getInt32Ty()); } case PPC::BI__builtin_ppc_fetch_and_add: case PPC::BI__builtin_ppc_fetch_and_addlp: { @@ -15930,6 +16174,21 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt)) .getScalarVal(); + case PPC::BI__builtin_ppc_test_data_class: { + llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType(); + unsigned IntrinsicID; + if (ArgType->isDoubleTy()) + IntrinsicID = Intrinsic::ppc_test_data_class_d; + else if (ArgType->isFloatTy()) + IntrinsicID = Intrinsic::ppc_test_data_class_f; + else + llvm_unreachable("Invalid Argument Type"); + return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops, + "test_data_class"); + } + case PPC::BI__builtin_ppc_swdiv: + case PPC::BI__builtin_ppc_swdivs: + return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv"); } } @@ -15940,11 +16199,9 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, const CallExpr *E = nullptr) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); auto *Call = CGF.Builder.CreateCall(F); - Call->addAttribute( - AttributeList::ReturnIndex, + Call->addRetAttr( Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); - Call->addAttribute(AttributeList::ReturnIndex, - Attribute::getWithAlignment(Call->getContext(), Align(4))); + Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); if (!E) return Call; QualType BuiltinRetType = E->getType(); @@ -16220,6 +16477,74 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy); return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 }); } + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { + Intrinsic::ID IID; + llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + ArgTy = llvm::Type::getFloatTy(getLLVMContext()); + IID = Intrinsic::amdgcn_global_atomic_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + ArgTy = llvm::FixedVectorType::get( + llvm::Type::getHalfTy(getLLVMContext()), 2); + IID = Intrinsic::amdgcn_global_atomic_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + IID = Intrinsic::amdgcn_global_atomic_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + IID = Intrinsic::amdgcn_global_atomic_fmin; + break; + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + IID = Intrinsic::amdgcn_global_atomic_fmax; + break; + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + IID = Intrinsic::amdgcn_flat_atomic_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + IID = Intrinsic::amdgcn_flat_atomic_fmin; + break; + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: + IID = Intrinsic::amdgcn_flat_atomic_fmax; + break; + } + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = + CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); + return Builder.CreateCall(F, {Addr, Val}); + } + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: { + Intrinsic::ID IID; + llvm::Type *ArgTy; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: + ArgTy = llvm::Type::getFloatTy(getLLVMContext()); + IID = Intrinsic::amdgcn_ds_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: + ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); + IID = Intrinsic::amdgcn_ds_fadd; + break; + } + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue( + llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true)); + llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue( + llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0)); + llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy}); + return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec")); @@ -17749,6 +18074,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f32x4: + case WebAssembly::BI__builtin_wasm_pmin_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_pmax_f32x4: + case WebAssembly::BI__builtin_wasm_pmax_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f32x4: @@ -18035,6 +18376,93 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } + case WebAssembly::BI__builtin_wasm_fma_f32x4: + case WebAssembly::BI__builtin_wasm_fms_f32x4: + case WebAssembly::BI__builtin_wasm_fma_f64x2: + case WebAssembly::BI__builtin_wasm_fms_f64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_fma_f32x4: + case WebAssembly::BI__builtin_wasm_fma_f64x2: + IntNo = Intrinsic::wasm_fma; + break; + case WebAssembly::BI__builtin_wasm_fms_f32x4: + case WebAssembly::BI__builtin_wasm_fms_f64x2: + IntNo = Intrinsic::wasm_fms; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } + case WebAssembly::BI__builtin_wasm_laneselect_i8x16: + case WebAssembly::BI__builtin_wasm_laneselect_i16x8: + case WebAssembly::BI__builtin_wasm_laneselect_i32x4: + case WebAssembly::BI__builtin_wasm_laneselect_i64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } + case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: { + Value *Src = EmitScalarExpr(E->getArg(0)); + Value *Indices = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle); + return Builder.CreateCall(Callee, {Src, Indices}); + } + case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: + case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: + IntNo = Intrinsic::wasm_relaxed_min; + break; + case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: + IntNo = Intrinsic::wasm_relaxed_max; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType()); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2: + case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: + IntNo = Intrinsic::wasm_relaxed_trunc_signed; + break; + case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: + IntNo = Intrinsic::wasm_relaxed_trunc_unsigned; + break; + case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2: + IntNo = Intrinsic::wasm_relaxed_trunc_zero_signed; + break; + case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: + IntNo = Intrinsic::wasm_relaxed_trunc_zero_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo); + return Builder.CreateCall(Callee, {Vec}); + } default: return nullptr; } @@ -18295,6 +18723,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID = Intrinsic::not_intrinsic; unsigned NF = 1; + constexpr unsigned TAIL_UNDISTURBED = 0; // Required for overloaded intrinsics. llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp index 88030fee501b..a1b4431ca8c4 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp @@ -177,7 +177,7 @@ public: llvm::Function *finalizeModule() override; }; -} +} // end anonymous namespace std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const { if (CGM.getLangOpts().HIP) @@ -237,11 +237,10 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const { // hipError_t hipLaunchByPtr(char *); return CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr"); - } else { - // cudaError_t cudaLaunch(char *); - return CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch"); } + // cudaError_t cudaLaunch(char *); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch"); } llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const { @@ -253,8 +252,8 @@ llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const { } llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { - auto CallbackFnTy = getCallbackFnTy(); - auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy(); + auto *CallbackFnTy = getCallbackFnTy(); + auto *RegisterGlobalsFnTy = getRegisterGlobalsFnTy(); llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy, VoidPtrTy, CallbackFnTy->getPointerTo()}; return llvm::FunctionType::get(VoidTy, Params, false); @@ -397,7 +396,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, QualType QT = cudaLaunchKernelFD->getType(); QualType CQT = QT.getCanonicalType(); llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); - llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); + llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); @@ -473,7 +472,7 @@ static void replaceManagedVar(llvm::GlobalVariable *Var, // variable with instructions. for (auto &&Op : WorkItem) { auto *CE = cast<llvm::ConstantExpr>(Op); - auto *NewInst = llvm::createReplacementInstr(CE, I); + auto *NewInst = CE->getAsInstruction(I); NewInst->replaceUsesOfWith(OldV, NewV); OldV = CE; NewV = NewInst; @@ -590,7 +589,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { uint64_t VarSize = CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); if (Info.Flags.isManaged()) { - auto ManagedVar = new llvm::GlobalVariable( + auto *ManagedVar = new llvm::GlobalVariable( CGM.getModule(), Var->getType(), /*isConstant=*/false, Var->getLinkage(), /*Init=*/Var->isDeclaration() @@ -823,7 +822,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { GpuBinaryHandle, CharUnits::fromQuantity(GpuBinaryHandle->getAlignment())); { - auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); + auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType()); llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero); @@ -842,7 +841,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { CtorBuilder.SetInsertPoint(ExitBlock); // Call __hip_register_globals(GpuBinaryHandle); if (RegisterGlobalsFunc) { - auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); + auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue); } } @@ -958,7 +957,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity( GpuBinaryHandle->getAlignment())); - auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr); + auto *HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr); // There is only one HIP fat binary per linked module, however there are // multiple destructor functions. Make sure the fat binary is unregistered // only once. @@ -1071,7 +1070,7 @@ void CGNVCUDARuntime::transformManagedVars() { llvm::GlobalVariable *Var = Info.Var; if (Info.Flags.getKind() == DeviceVarFlags::Variable && Info.Flags.isManaged()) { - auto ManagedVar = new llvm::GlobalVariable( + auto *ManagedVar = new llvm::GlobalVariable( CGM.getModule(), Var->getType(), /*isConstant=*/false, Var->getLinkage(), /*Init=*/Var->isDeclaration() @@ -1148,6 +1147,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, Var->setAlignment(CGM.getPointerAlign().getAsAlign()); Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); + CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); KernelHandles[F] = Var; KernelStubs[Var] = F; return Var; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp index 47a4ed35be85..d830a7e01709 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp @@ -1271,12 +1271,26 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // perform the conversion. if (auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(Ty)) { if (auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) { + // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + // vector, use a vector insert and bitcast the result. + bool NeedsBitcast = false; + auto PredType = + llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16); + llvm::Type *OrigType = Ty; + if (ScalableDst == PredType && + FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) { + ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2); + NeedsBitcast = true; + } if (ScalableDst->getElementType() == FixedSrc->getElementType()) { auto *Load = CGF.Builder.CreateLoad(Src); auto *UndefVec = llvm::UndefValue::get(ScalableDst); auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); - return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, Zero, - "castScalableSve"); + llvm::Value *Result = CGF.Builder.CreateInsertVector( + ScalableDst, UndefVec, Load, Zero, "castScalableSve"); + if (NeedsBitcast) + Result = CGF.Builder.CreateBitCast(Result, OrigType); + return Result; } } } @@ -1550,11 +1564,11 @@ bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) { default: return false; case BuiltinType::Float: - return getTarget().useObjCFPRetForRealType(TargetInfo::Float); + return getTarget().useObjCFPRetForRealType(FloatModeKind::Float); case BuiltinType::Double: - return getTarget().useObjCFPRetForRealType(TargetInfo::Double); + return getTarget().useObjCFPRetForRealType(FloatModeKind::Double); case BuiltinType::LongDouble: - return getTarget().useObjCFPRetForRealType(TargetInfo::LongDouble); + return getTarget().useObjCFPRetForRealType(FloatModeKind::LongDouble); } } @@ -1733,6 +1747,21 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } +static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs, + const Decl *Callee) { + if (!Callee) + return; + + SmallVector<StringRef, 4> Attrs; + + for (const AssumptionAttr *AA : Callee->specific_attrs<AssumptionAttr>()) + AA->getAssumption().split(Attrs, ","); + + if (!Attrs.empty()) + FuncAttrs.addAttribute(llvm::AssumptionAttrKey, + llvm::join(Attrs.begin(), Attrs.end(), ",")); +} + bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context, QualType ReturnType) { // We can't just discard the return value for a record type with a @@ -1814,6 +1843,8 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, FuncAttrs.addAttribute("no-infs-fp-math", "true"); if (LangOpts.NoHonorNaNs) FuncAttrs.addAttribute("no-nans-fp-math", "true"); + if (LangOpts.ApproxFunc) + FuncAttrs.addAttribute("approx-func-fp-math", "true"); if (LangOpts.UnsafeFPMath) FuncAttrs.addAttribute("unsafe-fp-math", "true"); if (CodeGenOpts.SoftFloat) @@ -1871,7 +1902,7 @@ void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), /* AttrOnCallSite = */ false, FuncAttrs); // TODO: call GetCPUAndFeaturesAttributes? - F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + F.addFnAttrs(FuncAttrs); } void CodeGenModule::addDefaultFunctionDefinitionAttributes( @@ -2006,6 +2037,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); + // Attach assumption attributes to the declaration. If this is a call + // site, attach assumptions from the caller to the call as well. + AddAttributesFromAssumes(FuncAttrs, TargetDecl); + bool HasOptnone = false; // The NoBuiltinAttr attached to the target FunctionDecl. const NoBuiltinAttr *NBA = nullptr; @@ -2052,24 +2087,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // allows it to work on indirect virtual function calls. if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoMerge); - - // Add known guaranteed alignment for allocation functions. - if (unsigned BuiltinID = Fn->getBuiltinID()) { - switch (BuiltinID) { - case Builtin::BIaligned_alloc: - case Builtin::BIcalloc: - case Builtin::BImalloc: - case Builtin::BImemalign: - case Builtin::BIrealloc: - case Builtin::BIstrdup: - case Builtin::BIstrndup: - RetAttrs.addAlignmentAttr(Context.getTargetInfo().getNewAlign() / - Context.getTargetInfo().getCharWidth()); - break; - default: - break; - } - } } // 'const', 'pure' and 'noalias' attributed functions are also nounwind. @@ -2123,18 +2140,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, llvm::toStringRef(CodeGenOpts.UniformWGSize)); } } - - std::string AssumptionValueStr; - for (AssumptionAttr *AssumptionA : - TargetDecl->specific_attrs<AssumptionAttr>()) { - std::string AS = AssumptionA->getAssumption().str(); - if (!AS.empty() && !AssumptionValueStr.empty()) - AssumptionValueStr += ","; - AssumptionValueStr += AS; - } - - if (!AssumptionValueStr.empty()) - FuncAttrs.addAttribute(llvm::AssumptionAttrKey, AssumptionValueStr); } // Attach "no-builtins" attributes to: @@ -2227,7 +2232,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // C++ explicitly makes returning undefined values UB. C's rule only applies // to used values, so we never mark them noundef for now. bool HasStrictReturn = getLangOpts().CPlusPlus; - if (TargetDecl) { + if (TargetDecl && HasStrictReturn) { if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl)) HasStrictReturn &= !FDecl->isExternC(); else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl)) @@ -2790,7 +2795,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // so the UBSAN check could function. llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(EmitScalarExpr(AVAttr->getAlignment())); - unsigned AlignmentInt = + uint64_t AlignmentInt = AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment); if (AI->getParamAlign().valueOrOne() < AlignmentInt) { AI->removeAttr(llvm::Attribute::AttrKind::Alignment); @@ -2857,9 +2862,18 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // llvm.experimental.vector.extract to convert back to the original // VLST. if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(ConvertType(Ty))) { - auto *Coerced = Fn->getArg(FirstIRArg); + llvm::Value *Coerced = Fn->getArg(FirstIRArg); if (auto *VecTyFrom = dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) { + // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + // vector, bitcast the source and use a vector extract. + auto PredType = + llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + if (VecTyFrom == PredType && + VecTyTo->getElementType() == Builder.getInt8Ty()) { + VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + Coerced = Builder.CreateBitCast(Coerced, VecTyFrom); + } if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); @@ -4503,10 +4517,8 @@ maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx, if (CurAlign >= NewAlign) return Attrs; llvm::Attribute AlignAttr = llvm::Attribute::getWithAlignment(Ctx, NewAlign); - return Attrs - .removeAttribute(Ctx, llvm::AttributeList::ReturnIndex, - llvm::Attribute::AttrKind::Alignment) - .addAttribute(Ctx, llvm::AttributeList::ReturnIndex, AlignAttr); + return Attrs.removeRetAttribute(Ctx, llvm::Attribute::AttrKind::Alignment) + .addRetAttribute(Ctx, AlignAttr); } template <typename AlignedAttrTy> class AbstractAssumeAlignedAttrEmitter { @@ -5005,12 +5017,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); // Materialize to a temporary. - addr = CreateTempAlloca( - RV.getScalarVal()->getType(), - CharUnits::fromQuantity(std::max( - (unsigned)layout->getAlignment().value(), scalarAlign)), - "tmp", - /*ArraySize=*/nullptr, &AllocaAddr); + addr = + CreateTempAlloca(RV.getScalarVal()->getType(), + CharUnits::fromQuantity(std::max( + layout->getAlignment().value(), scalarAlign)), + "tmp", + /*ArraySize=*/nullptr, &AllocaAddr); tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); @@ -5167,15 +5179,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) if (FD->hasAttr<StrictFPAttr>()) // All calls within a strictfp function are marked strictfp - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::StrictFP); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP); // Add call-site nomerge attribute if exists. if (InNoMergeAttributedStmt) - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoMerge); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -5185,15 +5193,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) { Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::AlwaysInline); + Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoInline); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); } // Decide whether to use a call or an invoke. @@ -5209,7 +5214,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = true; } else { // Otherwise, nounwind call sites will never throw. - CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind); + CannotThrow = Attrs.hasFnAttr(llvm::Attribute::NoUnwind); if (auto *FPtr = dyn_cast<llvm::Function>(CalleePtr)) if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind)) @@ -5232,9 +5237,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) if (FD->hasAttr<StrictFPAttr>()) // All calls within a strictfp function are marked strictfp - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::StrictFP); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP); AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl); Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs); @@ -5261,8 +5264,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) { if (const auto *A = FD->getAttr<CFGuardAttr>()) { if (A->getGuard() == CFGuardAttr::GuardArg::nocf && !CI->getCalledFunction()) - Attrs = Attrs.addAttribute( - getLLVMContext(), llvm::AttributeList::FunctionIndex, "guard_nocf"); + Attrs = Attrs.addFnAttribute(getLLVMContext(), "guard_nocf"); } } @@ -5306,6 +5308,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, TargetDecl->hasAttr<MSAllocatorAttr>()) getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy->getPointeeType(), Loc); + // Add metadata if calling an __attribute__((error(""))) or warning fn. + if (TargetDecl && TargetDecl->hasAttr<ErrorAttr>()) { + llvm::ConstantInt *Line = + llvm::ConstantInt::get(Int32Ty, Loc.getRawEncoding()); + llvm::ConstantAsMetadata *MD = llvm::ConstantAsMetadata::get(Line); + llvm::MDTuple *MDT = llvm::MDNode::get(getLLVMContext(), {MD}); + CI->setMetadata("srcloc", MDT); + } + // 4. Finish the call. // If the call doesn't return, finish the basic block and clear the @@ -5321,8 +5332,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // attributes of the called function. if (auto *F = CI->getCalledFunction()) F->removeFnAttr(llvm::Attribute::NoReturn); - CI->removeAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoReturn); + CI->removeFnAttr(llvm::Attribute::NoReturn); // Avoid incompatibility with ASan which relies on the `noreturn` // attribute to insert handler calls. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp index 9895a23b7093..0df64d4d5d26 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp @@ -1424,6 +1424,11 @@ static bool CanSkipVTablePointerInitialization(CodeGenFunction &CGF, if (!ClassDecl->isDynamicClass()) return true; + // For a final class, the vtable pointer is known to already point to the + // class's vtable. + if (ClassDecl->isEffectivelyFinal()) + return true; + if (!Dtor->hasTrivialBody()) return false; @@ -2502,6 +2507,8 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { // Apply the offsets. Address VTableField = LoadCXXThisAddress(); + unsigned ThisAddrSpace = + VTableField.getPointer()->getType()->getPointerAddressSpace(); if (!NonVirtualOffset.isZero() || VirtualOffset) VTableField = ApplyNonVirtualAndVirtualOffset( @@ -2516,12 +2523,11 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true) ->getPointerTo(ProgAS) ->getPointerTo(GlobalsAS); - // vtable field is is derived from `this` pointer, therefore it should be in - // default address space. - VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast( - VTableField, VTablePtrTy->getPointerTo()); - VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast( - VTableAddressPoint, VTablePtrTy); + // vtable field is is derived from `this` pointer, therefore they should be in + // the same addr space. Note that this might not be LLVM address space 0. + VTableField = Builder.CreateBitCast(VTableField, + VTablePtrTy->getPointerTo(ThisAddrSpace)); + VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTablePtrTy); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp index 81c910f40bf8..af651e6f44b7 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp @@ -25,6 +25,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" @@ -52,7 +53,7 @@ using namespace clang::CodeGen; static uint32_t getTypeAlignIfRequired(const Type *Ty, const ASTContext &Ctx) { auto TI = Ctx.getTypeInfo(Ty); - return TI.AlignIsRequired ? TI.Align : 0; + return TI.isAlignRequired() ? TI.Align : 0; } static uint32_t getTypeAlignIfRequired(QualType Ty, const ASTContext &Ctx) { @@ -243,6 +244,11 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { PP.SplitTemplateClosers = true; } + PP.SuppressInlineNamespace = false; + PP.PrintCanonicalTypes = true; + PP.UsePreferredNames = false; + PP.AlwaysIncludeTypeForTemplateArgument = true; + // Apply -fdebug-prefix-map. PP.Callbacks = &PrintCB; return PP; @@ -385,7 +391,7 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { } else { PresumedLoc PLoc = SM.getPresumedLoc(Loc); FileName = PLoc.getFilename(); - + if (FileName.empty()) { FileName = TheCU->getFile()->getFilename(); } else { @@ -830,11 +836,12 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::BFloat16: case BuiltinType::Float128: case BuiltinType::Double: - // FIXME: For targets where long double and __float128 have the same size, - // they are currently indistinguishable in the debugger without some - // special treatment. However, there is currently no consensus on encoding - // and this should be updated once a DWARF encoding exists for distinct - // floating point types of the same size. + case BuiltinType::Ibm128: + // FIXME: For targets where long double, __ibm128 and __float128 have the + // same size, they are currently indistinguishable in the debugger without + // some special treatment. However, there is currently no consensus on + // encoding and this should be updated once a DWARF encoding exists for + // distinct floating point types of the same size. Encoding = llvm::dwarf::DW_ATE_float; break; case BuiltinType::ShortAccum: @@ -867,23 +874,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { break; } - switch (BT->getKind()) { - case BuiltinType::Long: - BTName = "long int"; - break; - case BuiltinType::LongLong: - BTName = "long long int"; - break; - case BuiltinType::ULong: - BTName = "long unsigned int"; - break; - case BuiltinType::ULongLong: - BTName = "long long unsigned int"; - break; - default: - BTName = BT->getName(CGM.getLangOpts()); - break; - } + BTName = BT->getName(CGM.getLangOpts()); // Bit size and offset of the type. uint64_t Size = CGM.getContext().getTypeSize(BT); return DBuilder.createBasicType(BTName, Size, Encoding); @@ -914,34 +905,98 @@ llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) { return DBuilder.createBasicType("complex", Size, Encoding); } -llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty, - llvm::DIFile *Unit) { +static void stripUnusedQualifiers(Qualifiers &Q) { + // Ignore these qualifiers for now. + Q.removeObjCGCAttr(); + Q.removeAddressSpace(); + Q.removeObjCLifetime(); + Q.removeUnaligned(); +} + +static llvm::dwarf::Tag getNextQualifier(Qualifiers &Q) { + if (Q.hasConst()) { + Q.removeConst(); + return llvm::dwarf::DW_TAG_const_type; + } + if (Q.hasVolatile()) { + Q.removeVolatile(); + return llvm::dwarf::DW_TAG_volatile_type; + } + if (Q.hasRestrict()) { + Q.removeRestrict(); + return llvm::dwarf::DW_TAG_restrict_type; + } + return (llvm::dwarf::Tag)0; +} + +// Strip MacroQualifiedTypeLoc and AttributedTypeLoc +// as their corresponding types will be ignored +// during code generation. Stripping them allows +// to maintain proper TypeLoc for a given type +// during code generation. +static TypeLoc StripMacroAttributed(TypeLoc TL) { + if (!TL) + return TL; + + while (true) { + if (auto MTL = TL.getAs<MacroQualifiedTypeLoc>()) + TL = MTL.getInnerLoc(); + else if (auto ATL = TL.getAs<AttributedTypeLoc>()) + TL = ATL.getModifiedLoc(); + else + break; + } + return TL; +} + +llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty, llvm::DIFile *Unit, + TypeLoc TL) { QualifierCollector Qc; const Type *T = Qc.strip(Ty); - // Ignore these qualifiers for now. - Qc.removeObjCGCAttr(); - Qc.removeAddressSpace(); - Qc.removeObjCLifetime(); + stripUnusedQualifiers(Qc); // We will create one Derived type for one qualifier and recurse to handle any // additional ones. - llvm::dwarf::Tag Tag; - if (Qc.hasConst()) { - Tag = llvm::dwarf::DW_TAG_const_type; - Qc.removeConst(); - } else if (Qc.hasVolatile()) { - Tag = llvm::dwarf::DW_TAG_volatile_type; - Qc.removeVolatile(); - } else if (Qc.hasRestrict()) { - Tag = llvm::dwarf::DW_TAG_restrict_type; - Qc.removeRestrict(); - } else { + llvm::dwarf::Tag Tag = getNextQualifier(Qc); + if (!Tag) { assert(Qc.empty() && "Unknown type qualifier for debug info"); return getOrCreateType(QualType(T, 0), Unit); } - auto *FromTy = getOrCreateType(Qc.apply(CGM.getContext(), T), Unit); + QualType NextTy = Qc.apply(CGM.getContext(), T); + TypeLoc NextTL; + if (NextTy.hasQualifiers()) + NextTL = TL; + else if (TL) { + if (auto QTL = TL.getAs<QualifiedTypeLoc>()) + NextTL = StripMacroAttributed(QTL.getNextTypeLoc()); + } + auto *FromTy = getOrCreateType(NextTy, Unit, NextTL); + + // No need to fill in the Name, Line, Size, Alignment, Offset in case of + // CVR derived types. + return DBuilder.createQualifiedType(Tag, FromTy); +} + +llvm::DIType *CGDebugInfo::CreateQualifiedType(const FunctionProtoType *F, + llvm::DIFile *Unit) { + FunctionProtoType::ExtProtoInfo EPI = F->getExtProtoInfo(); + Qualifiers &Q = EPI.TypeQuals; + stripUnusedQualifiers(Q); + + // We will create one Derived type for one qualifier and recurse to handle any + // additional ones. + llvm::dwarf::Tag Tag = getNextQualifier(Q); + if (!Tag) { + assert(Q.empty() && "Unknown type qualifier for debug info"); + return nullptr; + } + + auto *FromTy = + getOrCreateType(CGM.getContext().getFunctionType(F->getReturnType(), + F->getParamTypes(), EPI), + Unit); // No need to fill in the Name, Line, Size, Alignment, Offset in case of // CVR derived types. @@ -961,10 +1016,10 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCObjectPointerType *Ty, Ty->getPointeeType(), Unit); } -llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, - llvm::DIFile *Unit) { +llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, llvm::DIFile *Unit, + TypeLoc TL) { return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty, - Ty->getPointeeType(), Unit); + Ty->getPointeeType(), Unit, TL); } /// \return whether a C++ mangling exists for the type defined by TD. @@ -1105,7 +1160,8 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty, QualType PointeeTy, - llvm::DIFile *Unit) { + llvm::DIFile *Unit, + TypeLoc TL) { // Bit size, align and offset of the type. // Size is always the size of a pointer. We can't use getTypeSize here // because that does not return the correct value for references. @@ -1115,13 +1171,52 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, Optional<unsigned> DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(AddressSpace); + llvm::DINodeArray Annotations = nullptr; + TypeLoc NextTL; + if (TL) { + SmallVector<llvm::Metadata *, 4> Annots; + NextTL = TL.getNextTypeLoc(); + if (NextTL) { + // Traverse all MacroQualifiedTypeLoc, QualifiedTypeLoc and + // AttributedTypeLoc type locations so we can collect + // BTFTypeTag attributes for this pointer. + while (true) { + if (auto MTL = NextTL.getAs<MacroQualifiedTypeLoc>()) { + NextTL = MTL.getInnerLoc(); + } else if (auto QTL = NextTL.getAs<QualifiedTypeLoc>()) { + NextTL = QTL.getNextTypeLoc(); + } else if (auto ATL = NextTL.getAs<AttributedTypeLoc>()) { + if (const auto *A = ATL.getAttrAs<BTFTypeTagAttr>()) { + StringRef BTFTypeTag = A->getBTFTypeTag(); + if (!BTFTypeTag.empty()) { + llvm::Metadata *Ops[2] = { + llvm::MDString::get(CGM.getLLVMContext(), + StringRef("btf_type_tag")), + llvm::MDString::get(CGM.getLLVMContext(), BTFTypeTag)}; + Annots.insert(Annots.begin(), + llvm::MDNode::get(CGM.getLLVMContext(), Ops)); + } + } + NextTL = ATL.getModifiedLoc(); + } else { + break; + } + } + } + + NextTL = StripMacroAttributed(TL.getNextTypeLoc()); + if (Annots.size() > 0) + Annotations = DBuilder.getOrCreateArray(Annots); + } + if (Tag == llvm::dwarf::DW_TAG_reference_type || Tag == llvm::dwarf::DW_TAG_rvalue_reference_type) return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit), Size, Align, DWARFAddressSpace); else - return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size, - Align, DWARFAddressSpace); + return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit, NextTL), + Size, Align, DWARFAddressSpace, + StringRef(), Annotations); } llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, @@ -1226,7 +1321,8 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, SmallString<128> NS; llvm::raw_svector_ostream OS(NS); - Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false); + Ty->getTemplateName().print(OS, getPrintingPolicy(), + TemplateName::Qualified::None); printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy()); SourceLocation Loc = AliasDecl->getLocation(); @@ -1237,8 +1333,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile *Unit) { + TypeLoc TL; + if (const TypeSourceInfo *TSI = Ty->getDecl()->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); llvm::DIType *Underlying = - getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit); + getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit, TL); if (Ty->getDecl()->hasAttr<NoDebugAttr>()) return Underlying; @@ -1249,9 +1348,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, uint32_t Align = getDeclAlignIfRequired(Ty->getDecl(), CGM.getContext()); // Typedefs are derived from some other type. + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(Ty->getDecl()); return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc), - getDeclContextDescriptor(Ty->getDecl()), Align); + getDeclContextDescriptor(Ty->getDecl()), Align, + Annotations); } static unsigned getDwarfCC(CallingConv CC) { @@ -1300,27 +1401,74 @@ static unsigned getDwarfCC(CallingConv CC) { return 0; } +static llvm::DINode::DIFlags getRefFlags(const FunctionProtoType *Func) { + llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + if (Func->getExtProtoInfo().RefQualifier == RQ_LValue) + Flags |= llvm::DINode::FlagLValueReference; + if (Func->getExtProtoInfo().RefQualifier == RQ_RValue) + Flags |= llvm::DINode::FlagRValueReference; + return Flags; +} + llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, - llvm::DIFile *Unit) { + llvm::DIFile *Unit, TypeLoc TL) { + const auto *FPT = dyn_cast<FunctionProtoType>(Ty); + if (FPT) { + if (llvm::DIType *QTy = CreateQualifiedType(FPT, Unit)) + return QTy; + } + + // Create the type without any qualifiers + SmallVector<llvm::Metadata *, 16> EltTys; // Add the result type at least. - EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit)); + TypeLoc RetTL; + if (TL) { + if (auto FTL = TL.getAs<FunctionTypeLoc>()) + RetTL = FTL.getReturnLoc(); + } + EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit, RetTL)); + llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; // Set up remainder of arguments if there is a prototype. // otherwise emit it as a variadic function. - if (isa<FunctionNoProtoType>(Ty)) + if (!FPT) { EltTys.push_back(DBuilder.createUnspecifiedParameter()); - else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) { - for (const QualType &ParamType : FPT->param_types()) - EltTys.push_back(getOrCreateType(ParamType, Unit)); + } else { + Flags = getRefFlags(FPT); + bool DoneWithTL = false; + if (TL) { + if (auto FTL = TL.getAs<FunctionTypeLoc>()) { + DoneWithTL = true; + unsigned Idx = 0; + unsigned FTL_NumParams = FTL.getNumParams(); + for (const QualType &ParamType : FPT->param_types()) { + TypeLoc ParamTL; + if (Idx < FTL_NumParams) { + if (ParmVarDecl *Param = FTL.getParam(Idx)) { + if (const TypeSourceInfo *TSI = Param->getTypeSourceInfo()) + ParamTL = TSI->getTypeLoc(); + } + } + EltTys.push_back(getOrCreateType(ParamType, Unit, ParamTL)); + Idx++; + } + } + } + + if (!DoneWithTL) { + for (const QualType &ParamType : FPT->param_types()) + EltTys.push_back(getOrCreateType(ParamType, Unit)); + } if (FPT->isVariadic()) EltTys.push_back(DBuilder.createUnspecifiedParameter()); } llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys); - return DBuilder.createSubroutineType(EltTypeArray, llvm::DINode::FlagZero, - getDwarfCC(Ty->getCallConv())); + llvm::DIType *F = DBuilder.createSubroutineType( + EltTypeArray, Flags, getDwarfCC(Ty->getCallConv())); + return F; } /// Convert an AccessSpecifier into the corresponding DINode flag. @@ -1377,17 +1525,19 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset; uint64_t OffsetInBits = StorageOffsetInBits + Offset; llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD); + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(BitFieldDecl); return DBuilder.createBitFieldMemberType( RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits, - Flags, DebugType); + Flags, DebugType, Annotations); } llvm::DIType * CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, - llvm::DIScope *scope, const RecordDecl *RD) { - llvm::DIType *debugType = getOrCreateType(type, tunit); + llvm::DIScope *scope, const RecordDecl *RD, + llvm::DINodeArray Annotations, TypeLoc TL) { + llvm::DIType *debugType = getOrCreateType(type, tunit, TL); // Get the location for the field. llvm::DIFile *file = getOrCreateFile(loc); @@ -1404,7 +1554,7 @@ CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, llvm::DINode::DIFlags flags = getAccessFlag(AS, RD); return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align, - offsetInBits, flags, debugType); + offsetInBits, flags, debugType, Annotations); } void CGDebugInfo::CollectRecordLambdaFields( @@ -1494,9 +1644,13 @@ void CGDebugInfo::CollectRecordNormalField( FieldType = createBitFieldType(field, RecordTy, RD); } else { auto Align = getDeclAlignIfRequired(field, CGM.getContext()); - FieldType = - createFieldType(name, type, field->getLocation(), field->getAccess(), - OffsetInBits, Align, tunit, RecordTy, RD); + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(field); + TypeLoc TL; + if (const TypeSourceInfo *TSI = field->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + FieldType = createFieldType(name, type, field->getLocation(), + field->getAccess(), OffsetInBits, Align, tunit, + RecordTy, RD, Annotations, TL); } elements.push_back(FieldType); @@ -1584,10 +1738,25 @@ llvm::DISubroutineType * CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit, bool decl) { + FunctionProtoType::ExtProtoInfo EPI = Func->getExtProtoInfo(); + Qualifiers &Qc = EPI.TypeQuals; + Qc.removeConst(); + Qc.removeVolatile(); + Qc.removeRestrict(); + Qc.removeUnaligned(); + // Keep the removed qualifiers in sync with + // CreateQualifiedType(const FunctionPrototype*, DIFile *Unit) + // On a 'real' member function type, these qualifiers are carried on the type + // of the first parameter, not as separate DW_TAG_const_type (etc) decorator + // tags around them. (But, in the raw function types with qualifiers, they have + // to use wrapper types.) + // Add "this" pointer. - llvm::DITypeRefArray Args( - cast<llvm::DISubroutineType>(getOrCreateType(QualType(Func, 0), Unit)) - ->getTypeArray()); + const auto *OriginalFunc = cast<llvm::DISubroutineType>( + getOrCreateType(CGM.getContext().getFunctionType( + Func->getReturnType(), Func->getParamTypes(), EPI), + Unit)); + llvm::DITypeRefArray Args = OriginalFunc->getTypeArray(); assert(Args.size() && "Invalid number of arguments!"); SmallVector<llvm::Metadata *, 16> Elts; @@ -1629,13 +1798,7 @@ CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr, llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts); - llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; - if (Func->getExtProtoInfo().RefQualifier == RQ_LValue) - Flags |= llvm::DINode::FlagLValueReference; - if (Func->getExtProtoInfo().RefQualifier == RQ_RValue) - Flags |= llvm::DINode::FlagRValueReference; - - return DBuilder.createSubroutineType(EltTypeArray, Flags, + return DBuilder.createSubroutineType(EltTypeArray, OriginalFunc->getFlags(), getDwarfCC(Func->getCallConv())); } @@ -1887,23 +2050,25 @@ void CGDebugInfo::CollectCXXBasesAux( } llvm::DINodeArray -CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, - ArrayRef<TemplateArgument> TAList, +CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs, llvm::DIFile *Unit) { + if (!OArgs) + return llvm::DINodeArray(); + TemplateArgs &Args = *OArgs; SmallVector<llvm::Metadata *, 16> TemplateParams; - for (unsigned i = 0, e = TAList.size(); i != e; ++i) { - const TemplateArgument &TA = TAList[i]; + for (unsigned i = 0, e = Args.Args.size(); i != e; ++i) { + const TemplateArgument &TA = Args.Args[i]; StringRef Name; bool defaultParameter = false; - if (TPList) - Name = TPList->getParam(i)->getName(); + if (Args.TList) + Name = Args.TList->getParam(i)->getName(); switch (TA.getKind()) { case TemplateArgument::Type: { llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit); - if (TPList) + if (Args.TList) if (auto *templateType = - dyn_cast_or_null<TemplateTypeParmDecl>(TPList->getParam(i))) + dyn_cast_or_null<TemplateTypeParmDecl>(Args.TList->getParam(i))) if (templateType->hasDefaultArgument()) defaultParameter = templateType->getDefaultArgument() == TA.getAsType(); @@ -1914,9 +2079,9 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, } break; case TemplateArgument::Integral: { llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit); - if (TPList && CGM.getCodeGenOpts().DwarfVersion >= 5) - if (auto *templateType = - dyn_cast_or_null<NonTypeTemplateParmDecl>(TPList->getParam(i))) + if (Args.TList && CGM.getCodeGenOpts().DwarfVersion >= 5) + if (auto *templateType = dyn_cast_or_null<NonTypeTemplateParmDecl>( + Args.TList->getParam(i))) if (templateType->hasDefaultArgument() && !templateType->getDefaultArgument()->isValueDependent()) defaultParameter = llvm::APSInt::isSameValue( @@ -1993,15 +2158,19 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, TemplateParams.push_back(DBuilder.createTemplateValueParameter( TheCU, Name, TTy, defaultParameter, V)); } break; - case TemplateArgument::Template: + case TemplateArgument::Template: { + std::string QualName; + llvm::raw_string_ostream OS(QualName); + TA.getAsTemplate().getAsTemplateDecl()->printQualifiedName( + OS, getPrintingPolicy()); TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( - TheCU, Name, nullptr, - TA.getAsTemplate().getAsTemplateDecl()->getQualifiedNameAsString())); + TheCU, Name, nullptr, OS.str())); break; + } case TemplateArgument::Pack: TemplateParams.push_back(DBuilder.createTemplateParameterPack( TheCU, Name, nullptr, - CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit))); + CollectTemplateParams({{nullptr, TA.getPackAsArray()}}, Unit))); break; case TemplateArgument::Expression: { const Expr *E = TA.getAsExpr(); @@ -2024,43 +2193,72 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, return DBuilder.getOrCreateArray(TemplateParams); } -llvm::DINodeArray -CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, - llvm::DIFile *Unit) { +Optional<CGDebugInfo::TemplateArgs> +CGDebugInfo::GetTemplateArgs(const FunctionDecl *FD) const { if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplateSpecialization) { const TemplateParameterList *TList = FD->getTemplateSpecializationInfo() ->getTemplate() ->getTemplateParameters(); - return CollectTemplateParams( - TList, FD->getTemplateSpecializationArgs()->asArray(), Unit); + return {{TList, FD->getTemplateSpecializationArgs()->asArray()}}; } - return llvm::DINodeArray(); + return None; } - -llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, - llvm::DIFile *Unit) { +Optional<CGDebugInfo::TemplateArgs> +CGDebugInfo::GetTemplateArgs(const VarDecl *VD) const { // Always get the full list of parameters, not just the ones from the // specialization. A partial specialization may have fewer parameters than // there are arguments. - auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL); + auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VD); if (!TS) - return llvm::DINodeArray(); + return None; VarTemplateDecl *T = TS->getSpecializedTemplate(); const TemplateParameterList *TList = T->getTemplateParameters(); auto TA = TS->getTemplateArgs().asArray(); - return CollectTemplateParams(TList, TA, Unit); + return {{TList, TA}}; +} +Optional<CGDebugInfo::TemplateArgs> +CGDebugInfo::GetTemplateArgs(const RecordDecl *RD) const { + if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) { + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. + TemplateParameterList *TPList = + TSpecial->getSpecializedTemplate()->getTemplateParameters(); + const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); + return {{TPList, TAList.asArray()}}; + } + return None; } -llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( - const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) { - // Always get the full list of parameters, not just the ones from the - // specialization. A partial specialization may have fewer parameters than - // there are arguments. - TemplateParameterList *TPList = - TSpecial->getSpecializedTemplate()->getTemplateParameters(); - const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); - return CollectTemplateParams(TPList, TAList.asArray(), Unit); +llvm::DINodeArray +CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, + llvm::DIFile *Unit) { + return CollectTemplateParams(GetTemplateArgs(FD), Unit); +} + +llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, + llvm::DIFile *Unit) { + return CollectTemplateParams(GetTemplateArgs(VL), Unit); +} + +llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(const RecordDecl *RD, + llvm::DIFile *Unit) { + return CollectTemplateParams(GetTemplateArgs(RD), Unit); +} + +llvm::DINodeArray CGDebugInfo::CollectBTFDeclTagAnnotations(const Decl *D) { + if (!D->hasAttr<BTFDeclTagAttr>()) + return nullptr; + + SmallVector<llvm::Metadata *, 4> Annotations; + for (const auto *I : D->specific_attrs<BTFDeclTagAttr>()) { + llvm::Metadata *Ops[2] = { + llvm::MDString::get(CGM.getLLVMContext(), StringRef("btf_decl_tag")), + llvm::MDString::get(CGM.getLLVMContext(), I->getBTFDeclTag())}; + Annotations.push_back(llvm::MDNode::get(CGM.getLLVMContext(), Ops)); + } + return DBuilder.getOrCreateArray(Annotations); } llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { @@ -3210,7 +3408,8 @@ void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr()); } -llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { +llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit, + TypeLoc TL) { if (Ty.isNull()) return nullptr; @@ -3227,7 +3426,7 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { if (auto *T = getTypeOrNull(Ty)) return T; - llvm::DIType *Res = CreateTypeNode(Ty, Unit); + llvm::DIType *Res = CreateTypeNode(Ty, Unit, TL); void *TyPtr = Ty.getAsOpaquePtr(); // And update the type cache. @@ -3271,10 +3470,11 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { return nullptr; } -llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { +llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit, + TypeLoc TL) { // Handle qualifiers, which recursively handles what they refer to. if (Ty.hasLocalQualifiers()) - return CreateQualifiedType(Ty, Unit); + return CreateQualifiedType(Ty, Unit, TL); // Work out details of type. switch (Ty->getTypeClass()) { @@ -3303,7 +3503,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Complex: return CreateType(cast<ComplexType>(Ty)); case Type::Pointer: - return CreateType(cast<PointerType>(Ty), Unit); + return CreateType(cast<PointerType>(Ty), Unit, TL); case Type::BlockPointer: return CreateType(cast<BlockPointerType>(Ty), Unit); case Type::Typedef: @@ -3314,7 +3514,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { return CreateEnumType(cast<EnumType>(Ty)); case Type::FunctionProto: case Type::FunctionNoProto: - return CreateType(cast<FunctionType>(Ty), Unit); + return CreateType(cast<FunctionType>(Ty), Unit, TL); case Type::ConstantArray: case Type::VariableArray: case Type::IncompleteArray: @@ -3435,9 +3635,10 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { Flags |= llvm::DINode::FlagExportSymbols; } + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D); llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, - Flags, Identifier); + Flags, Identifier, Annotations); // Elements of composite types usually have back to the type, creating // uniquing cycles. Distinct nodes are more efficient. @@ -3858,7 +4059,26 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, getDwarfCC(CC)); } - return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F)); + TypeLoc TL; + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + if (const TypeSourceInfo *TSI = FD->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + } + return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F, TL)); +} + +QualType +CGDebugInfo::getFunctionType(const FunctionDecl *FD, QualType RetTy, + const SmallVectorImpl<const VarDecl *> &Args) { + CallingConv CC = CallingConv::CC_C; + if (FD) + if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>()) + CC = SrcFnTy->getCallConv(); + SmallVector<QualType, 16> ArgTypes; + for (const VarDecl *VD : Args) + ArgTypes.push_back(VD->getType()); + return CGM.getContext().getFunctionType(RetTy, ArgTypes, + FunctionProtoType::ExtProtoInfo(CC)); } void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc, @@ -3935,10 +4155,13 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc, unsigned ScopeLine = getLineNumber(ScopeLoc); llvm::DISubroutineType *DIFnType = getOrCreateFunctionType(D, FnType, Unit); llvm::DISubprogram *Decl = nullptr; - if (D) + llvm::DINodeArray Annotations = nullptr; + if (D) { Decl = isa<ObjCMethodDecl>(D) ? getObjCMethodDeclaration(D, DIFnType, LineNo, Flags, SPFlags) : getFunctionDeclaration(D); + Annotations = CollectBTFDeclTagAnnotations(D); + } // FIXME: The function declaration we're constructing here is mostly reusing // declarations from CXXMethodDecl and not constructing new ones for arbitrary @@ -3947,7 +4170,8 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc, // are emitted as CU level entities by the backend. llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, DIFnType, ScopeLine, - FlagsForDef, SPFlagsForDef, TParamsArray.get(), Decl); + FlagsForDef, SPFlagsForDef, TParamsArray.get(), Decl, nullptr, + Annotations); Fn->setSubprogram(SP); // We might get here with a VarDecl in the case we're generating // code for the initialization of globals. Do not record these decls @@ -4006,10 +4230,11 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, if (CGM.getLangOpts().Optimize) SPFlags |= llvm::DISubprogram::SPFlagOptimized; + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D); llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, - TParamsArray.get(), getFunctionDeclaration(D)); + TParamsArray.get(), getFunctionDeclaration(D), nullptr, Annotations); if (IsDeclForCallSite) Fn->setSubprogram(SP); @@ -4241,8 +4466,12 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, uint64_t XOffset = 0; if (VD->hasAttr<BlocksAttr>()) Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType; - else - Ty = getOrCreateType(VD->getType(), Unit); + else { + TypeLoc TL; + if (const TypeSourceInfo *TSI = VD->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + Ty = getOrCreateType(VD->getType(), Unit, TL); + } // If there is no debug info for this type then do not emit debug info // for this variable. @@ -4337,8 +4566,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, // Use DW_OP_deref to tell the debugger to load the pointer and treat it as // the address of the variable. if (UsePointerValue) { - assert(std::find(Expr.begin(), Expr.end(), llvm::dwarf::DW_OP_deref) == - Expr.end() && + assert(!llvm::is_contained(Expr, llvm::dwarf::DW_OP_deref) && "Debug info already contains DW_OP_deref."); Expr.push_back(llvm::dwarf::DW_OP_deref); } @@ -4346,8 +4574,10 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, // Create the descriptor for the variable. llvm::DILocalVariable *D = nullptr; if (ArgNo) { + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(VD); D = DBuilder.createParameterVariable(Scope, Name, *ArgNo, Unit, Line, Ty, - CGM.getLangOpts().Optimize, Flags); + CGM.getLangOpts().Optimize, Flags, + Annotations); } else { // For normal local variable, we will try to find out whether 'VD' is the // copy parameter of coroutine. @@ -4653,7 +4883,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, llvm::DIType *fieldType; if (capture->isByRef()) { TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy); - auto Align = PtrInfo.AlignIsRequired ? PtrInfo.Align : 0; + auto Align = PtrInfo.isAlignRequired() ? PtrInfo.Align : 0; // FIXME: This recomputes the layout of the BlockByRefWrapper. uint64_t xoffset; fieldType = @@ -4740,14 +4970,172 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls( return GVE; } +namespace { +struct ReconstitutableType : public RecursiveASTVisitor<ReconstitutableType> { + bool Reconstitutable = true; + bool VisitVectorType(VectorType *FT) { + Reconstitutable = false; + return false; + } + bool VisitAtomicType(AtomicType *FT) { + Reconstitutable = false; + return false; + } + bool TraverseEnumType(EnumType *ET) { + // Unnamed enums can't be reconstituted due to a lack of column info we + // produce in the DWARF, so we can't get Clang's full name back. + if (const auto *ED = dyn_cast<EnumDecl>(ET->getDecl())) { + if (!ED->getIdentifier()) { + Reconstitutable = false; + return false; + } + } + return true; + } + bool VisitFunctionProtoType(FunctionProtoType *FT) { + // noexcept is not encoded in DWARF, so the reversi + Reconstitutable &= !isNoexceptExceptionSpec(FT->getExceptionSpecType()); + return Reconstitutable; + } + bool TraverseRecordType(RecordType *RT) { + // Unnamed classes/lambdas can't be reconstituted due to a lack of column + // info we produce in the DWARF, so we can't get Clang's full name back. + // But so long as it's not one of those, it doesn't matter if some sub-type + // of the record (a template parameter) can't be reconstituted - because the + // un-reconstitutable type itself will carry its own name. + const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()); + if (!RD) + return true; + if (RD->isLambda() || !RD->getIdentifier()) { + Reconstitutable = false; + return false; + } + return true; + } +}; +} // anonymous namespace + +// Test whether a type name could be rebuilt from emitted debug info. +static bool IsReconstitutableType(QualType QT) { + ReconstitutableType T; + T.TraverseType(QT); + return T.Reconstitutable; +} + std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const { std::string Name; llvm::raw_string_ostream OS(Name); - if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) { - PrintingPolicy PP = getPrintingPolicy(); - PP.PrintCanonicalTypes = true; - PP.SuppressInlineNamespace = false; + const NamedDecl *ND = dyn_cast<NamedDecl>(D); + if (!ND) + return Name; + codegenoptions::DebugTemplateNamesKind TemplateNamesKind = + CGM.getCodeGenOpts().getDebugSimpleTemplateNames(); + Optional<TemplateArgs> Args; + + bool IsOperatorOverload = false; // isa<CXXConversionDecl>(ND); + if (auto *RD = dyn_cast<CXXRecordDecl>(ND)) { + Args = GetTemplateArgs(RD); + } else if (auto *FD = dyn_cast<FunctionDecl>(ND)) { + Args = GetTemplateArgs(FD); + auto NameKind = ND->getDeclName().getNameKind(); + IsOperatorOverload |= + NameKind == DeclarationName::CXXOperatorName || + NameKind == DeclarationName::CXXConversionFunctionName; + } else if (auto *VD = dyn_cast<VarDecl>(ND)) { + Args = GetTemplateArgs(VD); + } + std::function<bool(ArrayRef<TemplateArgument>)> HasReconstitutableArgs = + [&](ArrayRef<TemplateArgument> Args) { + return llvm::all_of(Args, [&](const TemplateArgument &TA) { + switch (TA.getKind()) { + case TemplateArgument::Template: + // Easy to reconstitute - the value of the parameter in the debug + // info is the string name of the template. (so the template name + // itself won't benefit from any name rebuilding, but that's a + // representational limitation - maybe DWARF could be + // changed/improved to use some more structural representation) + return true; + case TemplateArgument::Declaration: + // Reference and pointer non-type template parameters point to + // variables, functions, etc and their value is, at best (for + // variables) represented as an address - not a reference to the + // DWARF describing the variable/function/etc. This makes it hard, + // possibly impossible to rebuild the original name - looking up the + // address in the executable file's symbol table would be needed. + return false; + case TemplateArgument::NullPtr: + // These could be rebuilt, but figured they're close enough to the + // declaration case, and not worth rebuilding. + return false; + case TemplateArgument::Pack: + // A pack is invalid if any of the elements of the pack are invalid. + return HasReconstitutableArgs(TA.getPackAsArray()); + case TemplateArgument::Integral: + // Larger integers get encoded as DWARF blocks which are a bit + // harder to parse back into a large integer, etc - so punting on + // this for now. Re-parsing the integers back into APInt is probably + // feasible some day. + return TA.getAsIntegral().getBitWidth() <= 64; + case TemplateArgument::Type: + return IsReconstitutableType(TA.getAsType()); + default: + llvm_unreachable("Other, unresolved, template arguments should " + "not be seen here"); + } + }); + }; + // A conversion operator presents complications/ambiguity if there's a + // conversion to class template that is itself a template, eg: + // template<typename T> + // operator ns::t1<T, int>(); + // This should be named, eg: "operator ns::t1<float, int><float>" + // (ignoring clang bug that means this is currently "operator t1<float>") + // but if the arguments were stripped, the consumer couldn't differentiate + // whether the template argument list for the conversion type was the + // function's argument list (& no reconstitution was needed) or not. + // This could be handled if reconstitutable names had a separate attribute + // annotating them as such - this would remove the ambiguity. + // + // Alternatively the template argument list could be parsed enough to check + // whether there's one list or two, then compare that with the DWARF + // description of the return type and the template argument lists to determine + // how many lists there should be and if one is missing it could be assumed(?) + // to be the function's template argument list & then be rebuilt. + // + // Other operator overloads that aren't conversion operators could be + // reconstituted but would require a bit more nuance about detecting the + // difference between these different operators during that rebuilding. + bool Reconstitutable = + Args && HasReconstitutableArgs(Args->Args) && !IsOperatorOverload; + + PrintingPolicy PP = getPrintingPolicy(); + + if (TemplateNamesKind == codegenoptions::DebugTemplateNamesKind::Full || + !Reconstitutable) { ND->getNameForDiagnostic(OS, PP, Qualified); + } else { + bool Mangled = + TemplateNamesKind == codegenoptions::DebugTemplateNamesKind::Mangled; + // check if it's a template + if (Mangled) + OS << "_STN"; + + OS << ND->getDeclName(); + std::string EncodedOriginalName; + llvm::raw_string_ostream EncodedOriginalNameOS(EncodedOriginalName); + EncodedOriginalNameOS << ND->getDeclName(); + + if (Mangled) { + OS << "|"; + printTemplateArgumentList(OS, Args->Args, PP); + printTemplateArgumentList(EncodedOriginalNameOS, Args->Args, PP); +#ifndef NDEBUG + std::string CanonicalOriginalName; + llvm::raw_string_ostream OriginalOS(CanonicalOriginalName); + ND->getNameForDiagnostic(OriginalOS, PP, Qualified); + assert(EncodedOriginalNameOS.str() == OriginalOS.str()); +#endif + } } return Name; } @@ -4807,12 +5195,17 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, } AppendAddressSpaceXDeref(AddressSpace, Expr); + TypeLoc TL; + if (const TypeSourceInfo *TSI = D->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D); GVE = DBuilder.createGlobalVariableExpression( - DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasLocalLinkage(), true, + DContext, DeclName, LinkageName, Unit, LineNo, + getOrCreateType(T, Unit, TL), Var->hasLocalLinkage(), true, Expr.empty() ? nullptr : DBuilder.createExpression(Expr), getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, - Align); + Align, Annotations); Var->addDebugInfo(GVE); } DeclCache[D->getCanonicalDecl()].reset(GVE); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h index b01165f85a6c..a7b72fa5f5a6 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h @@ -178,14 +178,19 @@ class CGDebugInfo { llvm::DIType *CreateType(const ComplexType *Ty); llvm::DIType *CreateType(const AutoType *Ty); llvm::DIType *CreateType(const ExtIntType *Ty); - llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg); + llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg, + TypeLoc TL = TypeLoc()); + llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty, + llvm::DIFile *Fg); llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const TemplateSpecializationType *Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const ObjCObjectPointerType *Ty, llvm::DIFile *F); - llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F, + TypeLoc TL = TypeLoc()); llvm::DIType *CreateType(const BlockPointerType *Ty, llvm::DIFile *F); - llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F, + TypeLoc TL = TypeLoc()); /// Get structure or union type. llvm::DIType *CreateType(const RecordType *Tyg); llvm::DIType *CreateTypeDefinition(const RecordType *Ty); @@ -240,7 +245,8 @@ class CGDebugInfo { /// \return namespace descriptor for the given namespace decl. llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N); llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty, - QualType PointeeTy, llvm::DIFile *F); + QualType PointeeTy, llvm::DIFile *F, + TypeLoc TL = TypeLoc()); llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache); /// A helper function to create a subprogram for a single member @@ -272,9 +278,12 @@ class CGDebugInfo { llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes, llvm::DINode::DIFlags StartingFlags); + struct TemplateArgs { + const TemplateParameterList *TList; + llvm::ArrayRef<TemplateArgument> Args; + }; /// A helper function to collect template parameters. - llvm::DINodeArray CollectTemplateParams(const TemplateParameterList *TPList, - ArrayRef<TemplateArgument> TAList, + llvm::DINodeArray CollectTemplateParams(Optional<TemplateArgs> Args, llvm::DIFile *Unit); /// A helper function to collect debug info for function template /// parameters. @@ -286,17 +295,25 @@ class CGDebugInfo { llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD, llvm::DIFile *Unit); + Optional<TemplateArgs> GetTemplateArgs(const VarDecl *) const; + Optional<TemplateArgs> GetTemplateArgs(const RecordDecl *) const; + Optional<TemplateArgs> GetTemplateArgs(const FunctionDecl *) const; + /// A helper function to collect debug info for template /// parameters. - llvm::DINodeArray - CollectCXXTemplateParams(const ClassTemplateSpecializationDecl *TS, - llvm::DIFile *F); + llvm::DINodeArray CollectCXXTemplateParams(const RecordDecl *TS, + llvm::DIFile *F); + + /// A helper function to collect debug info for btf_decl_tag annotations. + llvm::DINodeArray CollectBTFDeclTagAnnotations(const Decl *D); llvm::DIType *createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, llvm::DIScope *scope, - const RecordDecl *RD = nullptr); + const RecordDecl *RD = nullptr, + llvm::DINodeArray Annotations = nullptr, + TypeLoc TL = TypeLoc()); llvm::DIType *createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, @@ -417,6 +434,9 @@ public: /// location will be reused. void EmitLocation(CGBuilderTy &Builder, SourceLocation Loc); + QualType getFunctionType(const FunctionDecl *FD, QualType RetTy, + const SmallVectorImpl<const VarDecl *> &Args); + /// Emit a call to llvm.dbg.function.start to indicate /// start of a new function. /// \param Loc The location of the function header. @@ -613,7 +633,8 @@ private: Optional<StringRef> Source); /// Get the type from the cache or create a new type if necessary. - llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg); + llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg, + TypeLoc TL = TypeLoc()); /// Get a reference to a clang module. If \p CreateSkeletonCU is true, /// this also creates a split dwarf skeleton compile unit. @@ -628,7 +649,8 @@ private: llvm::DICompositeType *getOrCreateLimitedType(const RecordType *Ty); /// Create type metadata for a source language type. - llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg); + llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg, + TypeLoc TL = TypeLoc()); /// Create new member and increase Offset by FType's size. llvm::DIType *CreateMemberType(llvm::DIFile *Unit, QualType FType, diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp index 5b3d39f20b41..941671c61482 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp @@ -1142,7 +1142,7 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D, GV->setAlignment(Align.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CacheEntry = GV; - } else if (CacheEntry->getAlignment() < Align.getQuantity()) { + } else if (CacheEntry->getAlignment() < uint64_t(Align.getQuantity())) { CacheEntry->setAlignment(Align.getAsAlign()); } @@ -1447,6 +1447,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { address = OpenMPLocalAddr; + AllocaAddr = OpenMPLocalAddr; } else if (Ty->isConstantSizeType()) { // If this value is an array or struct with a statically determinable // constant initializer, there are optimizations we can do. @@ -1492,6 +1493,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). address = ReturnValue; + AllocaAddr = ReturnValue; if (const RecordType *RecordTy = Ty->getAs<RecordType>()) { const auto *RD = RecordTy->getDecl(); @@ -1503,7 +1505,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // applied. llvm::Value *Zero = Builder.getFalse(); Address NRVOFlag = - CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo"); + CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo", + /*ArraySize=*/nullptr, &AllocaAddr); EnsureInsertPoint(); Builder.CreateStore(Zero, NRVOFlag); @@ -1605,10 +1608,11 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { DI->setLocation(D.getLocation()); // If NRVO, use a pointer to the return address. - if (UsePointerValue) + if (UsePointerValue) { DebugAddr = ReturnValuePointer; - - (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder, + AllocaAddr = ReturnValuePointer; + } + (void)DI->EmitDeclareOfAutoVariable(&D, AllocaAddr.getPointer(), Builder, UsePointerValue); } @@ -2450,6 +2454,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, } Address DeclPtr = Address::invalid(); + Address AllocaPtr = Address::invalid(); bool DoStore = false; bool IsScalar = hasScalarEvaluationKind(Ty); // If we already have a pointer to the argument, reuse the input pointer. @@ -2464,6 +2469,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // from the default address space. auto AllocaAS = CGM.getASTAllocaAddressSpace(); auto *V = DeclPtr.getPointer(); + AllocaPtr = DeclPtr; auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS; auto DestLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default; @@ -2500,10 +2506,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, : Address::invalid(); if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { DeclPtr = OpenMPLocalAddr; + AllocaPtr = DeclPtr; } else { // Otherwise, create a temporary to hold the value. DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D), - D.getName() + ".addr"); + D.getName() + ".addr", &AllocaPtr); } DoStore = true; } @@ -2579,7 +2586,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().hasReducedDebugInfo() && !CurFuncIsThunk) { llvm::DILocalVariable *DILocalVar = DI->EmitDeclareOfArgVariable( - &D, DeclPtr.getPointer(), ArgNo, Builder); + &D, AllocaPtr.getPointer(), ArgNo, Builder); if (const auto *Var = dyn_cast_or_null<ParmVarDecl>(&D)) DI->getParamDbgMappings().insert({Var, DILocalVar}); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp index 553fedebfe56..d22f9dc3b68c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp @@ -581,6 +581,16 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // llvm.used to prevent linker GC. addUsedGlobal(COMDATKey); } + + // If we used a COMDAT key for the global ctor, the init function can be + // discarded if the global ctor entry is discarded. + // FIXME: Do we need to restrict this to ELF and Wasm? + llvm::Comdat *C = Addr->getComdat(); + if (COMDATKey && C && + (getTarget().getTriple().isOSBinFormatELF() || + getTarget().getTriple().isOSBinFormatWasm())) { + Fn->setComdat(C); + } } else { I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash. if (I == DelayedCXXInitPosition.end()) { diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp index 9f65e9eb120c..aff9c77d53c7 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp @@ -477,11 +477,11 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) { return; ExceptionSpecificationType EST = Proto->getExceptionSpecType(); - if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) { - // noexcept functions are simple terminate scopes. - if (!getLangOpts().EHAsynch) // -EHa: HW exception still can occur - EHStack.pushTerminate(); - } else if (EST == EST_Dynamic || EST == EST_DynamicNone) { + // In C++17 and later, 'throw()' aka EST_DynamicNone is treated the same way + // as noexcept. In earlier standards, it is handled in this block, along with + // 'throw(X...)'. + if (EST == EST_Dynamic || + (EST == EST_DynamicNone && !getLangOpts().CPlusPlus17)) { // TODO: Revisit exception specifications for the MS ABI. There is a way to // encode these in an object file but MSVC doesn't do anything with it. if (getTarget().getCXXABI().isMicrosoft()) @@ -521,6 +521,10 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) { /*ForEH=*/true); Filter->setFilter(I, EHType); } + } else if (Proto->canThrow() == CT_Cannot) { + // noexcept functions are simple terminate scopes. + if (!getLangOpts().EHAsynch) // -EHa: HW exception still can occur + EHStack.pushTerminate(); } } @@ -580,10 +584,8 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { return; ExceptionSpecificationType EST = Proto->getExceptionSpecType(); - if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot && - !EHStack.empty() /* possible empty when under async exceptions */) { - EHStack.popTerminate(); - } else if (EST == EST_Dynamic || EST == EST_DynamicNone) { + if (EST == EST_Dynamic || + (EST == EST_DynamicNone && !getLangOpts().CPlusPlus17)) { // TODO: Revisit exception specifications for the MS ABI. There is a way to // encode these in an object file but MSVC doesn't do anything with it. if (getTarget().getCXXABI().isMicrosoft()) @@ -599,6 +601,10 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { EHFilterScope &filterScope = cast<EHFilterScope>(*EHStack.begin()); emitFilterDispatchBlock(*this, filterScope); EHStack.popFilter(); + } else if (Proto->canThrow() == CT_Cannot && + /* possible empty when under async exceptions */ + !EHStack.empty()) { + EHStack.popTerminate(); } } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp index bf514aab8851..4332e74dbb24 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MatrixBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" @@ -94,7 +95,7 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // otherwise alloca is inserted at the current insertion point of the // builder. if (!ArraySize) - Builder.SetInsertPoint(AllocaInsertPt); + Builder.SetInsertPoint(getPostAllocaInsertPoint()); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); @@ -122,23 +123,10 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name) { CharUnits Align = - CharUnits::fromQuantity(CGM.getDataLayout().getABITypeAlignment(Ty)); + CharUnits::fromQuantity(CGM.getDataLayout().getPrefTypeAlignment(Ty)); return CreateTempAlloca(Ty, Align, Name); } -void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) { - auto *Alloca = Var.getPointer(); - assert(isa<llvm::AllocaInst>(Alloca) || - (isa<llvm::AddrSpaceCastInst>(Alloca) && - isa<llvm::AllocaInst>( - cast<llvm::AddrSpaceCastInst>(Alloca)->getPointerOperand()))); - - auto *Store = new llvm::StoreInst(Init, Alloca, /*volatile*/ false, - Var.getAlignment().getAsAlign()); - llvm::BasicBlock *Block = AllocaInsertPt->getParent(); - Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); -} - Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) { CharUnits Align = getContext().getTypeAlignInChars(Ty); return CreateTempAlloca(ConvertType(Ty), Align, Name); @@ -580,8 +568,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Perform derived-to-base casts and/or field accesses, to get from the // temporary object we created (and, potentially, for which we extended // the lifetime) to the subobject we're binding the reference to. - for (unsigned I = Adjustments.size(); I != 0; --I) { - SubobjectAdjustment &Adjustment = Adjustments[I-1]; + for (SubobjectAdjustment &Adjustment : llvm::reverse(Adjustments)) { switch (Adjustment.Kind) { case SubobjectAdjustment::DerivedToBaseAdjustment: Object = @@ -667,9 +654,9 @@ bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { } bool CodeGenFunction::sanitizePerformTypeCheck() const { - return SanOpts.has(SanitizerKind::Null) | - SanOpts.has(SanitizerKind::Alignment) | - SanOpts.has(SanitizerKind::ObjectSize) | + return SanOpts.has(SanitizerKind::Null) || + SanOpts.has(SanitizerKind::Alignment) || + SanOpts.has(SanitizerKind::ObjectSize) || SanOpts.has(SanitizerKind::Vptr); } @@ -1642,7 +1629,7 @@ static bool getRangeForType(CodeGenFunction &CGF, QualType Ty, } else { assert(NumPositiveBits <= Bitwidth); End = llvm::APInt(Bitwidth, 1) << NumPositiveBits; - Min = llvm::APInt(Bitwidth, 0); + Min = llvm::APInt::getZero(Bitwidth); } } return true; @@ -1939,10 +1926,15 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { return EmitLoadOfGlobalRegLValue(LV); if (LV.isMatrixElt()) { + llvm::Value *Idx = LV.getMatrixIdx(); + if (CGM.getCodeGenOpts().OptimizationLevel > 0) { + const auto *const MatTy = LV.getType()->getAs<ConstantMatrixType>(); + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened()); + } llvm::LoadInst *Load = Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified()); - return RValue::get( - Builder.CreateExtractElement(Load, LV.getMatrixIdx(), "matrixext")); + return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext")); } assert(LV.isBitField() && "Unknown LValue type!"); @@ -2080,9 +2072,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, return EmitStoreThroughGlobalRegLValue(Src, Dst); if (Dst.isMatrixElt()) { - llvm::Value *Vec = Builder.CreateLoad(Dst.getMatrixAddress()); - Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(), - Dst.getMatrixIdx(), "matins"); + llvm::Value *Idx = Dst.getMatrixIdx(); + if (CGM.getCodeGenOpts().OptimizationLevel > 0) { + const auto *const MatTy = Dst.getType()->getAs<ConstantMatrixType>(); + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened()); + } + llvm::Instruction *Load = Builder.CreateLoad(Dst.getMatrixAddress()); + llvm::Value *Vec = + Builder.CreateInsertElement(Load, Src.getScalarVal(), Idx, "matins"); Builder.CreateStore(Vec, Dst.getMatrixAddress(), Dst.isVolatileQualified()); return; @@ -3498,7 +3496,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); + TrapCall->addFnAttr(A); } TrapCall->setDoesNotReturn(); TrapCall->setDoesNotThrow(); @@ -3522,7 +3520,7 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) { if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); + TrapCall->addFnAttr(A); } return TrapCall; @@ -4684,10 +4682,28 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_UserDefinedConversion: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: - case CK_NoOp: case CK_LValueToRValue: return EmitLValue(E->getSubExpr()); + case CK_NoOp: { + // CK_NoOp can model a qualification conversion, which can remove an array + // bound and change the IR type. + // FIXME: Once pointee types are removed from IR, remove this. + LValue LV = EmitLValue(E->getSubExpr()); + if (LV.isSimple()) { + Address V = LV.getAddress(*this); + if (V.isValid()) { + llvm::Type *T = + ConvertTypeForMem(E->getType()) + ->getPointerTo( + cast<llvm::PointerType>(V.getType())->getAddressSpace()); + if (V.getType() != T) + LV.setAddress(Builder.CreateBitCast(V, T)); + } + } + return LV; + } + case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { const auto *DerivedClassTy = @@ -4879,12 +4895,28 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); if (auto builtinID = FD->getBuiltinID()) { - // Replaceable builtin provide their own implementation of a builtin. Unless - // we are in the builtin implementation itself, don't call the actual - // builtin. If we are in the builtin implementation, avoid trivial infinite + std::string FDInlineName = (FD->getName() + ".inline").str(); + // When directing calling an inline builtin, call it through it's mangled + // name to make it clear it's not the actual builtin. + if (FD->isInlineBuiltinDeclaration() && + CGF.CurFn->getName() != FDInlineName) { + llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD); + llvm::Function *Fn = llvm::cast<llvm::Function>(CalleePtr); + llvm::Module *M = Fn->getParent(); + llvm::Function *Clone = M->getFunction(FDInlineName); + if (!Clone) { + Clone = llvm::Function::Create(Fn->getFunctionType(), + llvm::GlobalValue::InternalLinkage, + Fn->getAddressSpace(), FDInlineName, M); + Clone->addFnAttr(llvm::Attribute::AlwaysInline); + } + return CGCallee::forDirect(Clone, GD); + } + + // Replaceable builtins provide their own implementation of a builtin. If we + // are in an inline builtin implementation, avoid trivial infinite // recursion. - if (!FD->isInlineBuiltinDeclaration() || - CGF.CurFn->getName() == FD->getName()) + else return CGCallee::forBuiltin(builtinID, FD); } @@ -4893,6 +4925,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { FD->hasAttr<CUDAGlobalAttr>()) CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub( cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts())); + return CGCallee::forDirect(CalleePtr, GD); } @@ -5306,9 +5339,13 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee // Generate function declaration DISuprogram in order to be used // in debug info about call sites. if (CGDebugInfo *DI = getDebugInfo()) { - if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) - DI->EmitFuncDeclForCallSite(CallOrInvoke, QualType(FnType, 0), + if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) { + FunctionArgList Args; + QualType ResTy = BuildFunctionArgList(CalleeDecl, Args); + DI->EmitFuncDeclForCallSite(CallOrInvoke, + DI->getFunctionType(CalleeDecl, ResTy, Args), CalleeDecl); + } } return Call; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp index 1e81ad9f2dc7..5b56a587fa5f 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp @@ -127,6 +127,8 @@ public: } void VisitConstantExpr(ConstantExpr *E) { + EnsureDest(E->getType()); + if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) { CGF.EmitAggregateStore(Result, Dest.getAddress(), E->getType().isVolatileQualified()); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp index f42759e9db50..cc838bf38c6c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp @@ -1326,8 +1326,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, llvm::Function *Fn = dyn_cast<llvm::Function>(CalleePtr); if (CalleeDecl->isReplaceableGlobalAllocationFunction() && Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { - CallOrInvoke->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); + CallOrInvoke->addFnAttr(llvm::Attribute::Builtin); } return RV; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp index 47e41261e095..2c3d01153cf9 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp @@ -1369,7 +1369,7 @@ llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) { const Expr *Inner = CE->getSubExpr()->IgnoreImplicit(); QualType RetType; if (auto *Call = dyn_cast<CallExpr>(Inner)) - RetType = Call->getCallReturnType(CGF->getContext()); + RetType = Call->getCallReturnType(CGM.getContext()); else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Inner)) RetType = Ctor->getType(); llvm::Constant *Res = @@ -1714,6 +1714,8 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, QualType destType) { + assert(!destType->isVoidType() && "can't emit a void constant"); + Expr::EvalResult Result; bool Success = false; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp index 418f23bd1a97..ae9434f96529 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp @@ -419,6 +419,11 @@ public: Value *VisitExpr(Expr *S); Value *VisitConstantExpr(ConstantExpr *E) { + // A constant expression of type 'void' generates no code and produces no + // value. + if (E->getType()->isVoidType()) + return nullptr; + if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) { if (E->isGLValue()) return CGF.Builder.CreateLoad(Address( @@ -1647,7 +1652,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { for (unsigned i = 2; i < E->getNumSubExprs(); ++i) { llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2); // Check for -1 and output it as undef in the IR. - if (Idx.isSigned() && Idx.isAllOnesValue()) + if (Idx.isSigned() && Idx.isAllOnes()) Indices.push_back(-1); else Indices.push_back(Idx.getZExtValue()); @@ -1775,13 +1780,18 @@ Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) { // integer value. Value *RowIdx = Visit(E->getRowIdx()); Value *ColumnIdx = Visit(E->getColumnIdx()); + + const auto *MatrixTy = E->getBase()->getType()->castAs<ConstantMatrixType>(); + unsigned NumRows = MatrixTy->getNumRows(); + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + Value *Idx = MB.CreateIndex(RowIdx, ColumnIdx, NumRows); + if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0) + MB.CreateIndexAssumption(Idx, MatrixTy->getNumElementsFlattened()); + Value *Matrix = Visit(E->getBase()); // TODO: Should we emit bounds checks with SanitizerKind::ArrayBounds? - llvm::MatrixBuilder<CGBuilderTy> MB(Builder); - return MB.CreateExtractElement( - Matrix, RowIdx, ColumnIdx, - E->getBase()->getType()->castAs<ConstantMatrixType>()->getNumRows()); + return Builder.CreateExtractElement(Matrix, Idx, "matrixext"); } static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx, @@ -2063,11 +2073,25 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // perform the bitcast. if (const auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) { if (const auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy)) { + // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + // vector, use a vector insert and bitcast the result. + bool NeedsBitCast = false; + auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + llvm::Type *OrigType = DstTy; + if (ScalableDst == PredType && + FixedSrc->getElementType() == Builder.getInt8Ty()) { + DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy); + NeedsBitCast = true; + } if (FixedSrc->getElementType() == ScalableDst->getElementType()) { llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); - return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero, - "castScalableSve"); + llvm::Value *Result = Builder.CreateInsertVector( + DstTy, UndefVec, Src, Zero, "castScalableSve"); + if (NeedsBitCast) + Result = Builder.CreateBitCast(Result, OrigType); + return Result; } } } @@ -2077,6 +2101,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // perform the bitcast. if (const auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) { if (const auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) { + // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + // vector, bitcast the source and use a vector extract. + auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + if (ScalableSrc == PredType && + FixedDst->getElementType() == Builder.getInt8Ty()) { + SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy); + Src = Builder.CreateBitCast(Src, SrcTy); + } if (ScalableSrc->getElementType() == FixedDst->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve"); @@ -2087,10 +2120,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // Perform VLAT <-> VLST bitcast through memory. // TODO: since the llvm.experimental.vector.{insert,extract} intrinsics // require the element types of the vectors to be the same, we - // need to keep this around for casting between predicates, or more - // generally for bitcasts between VLAT <-> VLST where the element - // types of the vectors are not the same, until we figure out a better - // way of doing these casts. + // need to keep this around for bitcasts between VLAT <-> VLST where + // the element types of the vectors are not the same, until we figure + // out a better way of doing these casts. if ((isa<llvm::FixedVectorType>(SrcTy) && isa<llvm::ScalableVectorType>(DstTy)) || (isa<llvm::ScalableVectorType>(SrcTy) && @@ -2127,10 +2159,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: - case CK_NoOp: case CK_UserDefinedConversion: return Visit(const_cast<Expr*>(E)); + case CK_NoOp: { + llvm::Value *V = Visit(const_cast<Expr *>(E)); + if (V) { + // CK_NoOp can model a pointer qualification conversion, which can remove + // an array bound and change the IR type. + // FIXME: Once pointee types are removed from IR, remove this. + llvm::Type *T = ConvertType(DestTy); + if (T != V->getType()) + V = Builder.CreateBitCast(V, T); + } + return V; + } + case CK_BaseToDerived: { const CXXRecordDecl *DerivedClassDecl = DestTy->getPointeeCXXRecordDecl(); assert(DerivedClassDecl && "BaseToDerived arg isn't a C++ object pointer!"); @@ -2658,7 +2702,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, amt = llvm::ConstantFP::get(VMContext, llvm::APFloat(static_cast<double>(amount))); else { - // Remaining types are Half, LongDouble or __float128. Convert from float. + // Remaining types are Half, LongDouble, __ibm128 or __float128. Convert + // from float. llvm::APFloat F(static_cast<float>(amount)); bool ignored; const llvm::fltSemantics *FS; @@ -2668,6 +2713,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, FS = &CGF.getTarget().getFloat128Format(); else if (value->getType()->isHalfTy()) FS = &CGF.getTarget().getHalfFormat(); + else if (value->getType()->isPPC_FP128Ty()) + FS = &CGF.getTarget().getIbm128Format(); else FS = &CGF.getTarget().getLongDoubleFormat(); F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored); @@ -4763,11 +4810,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // vector to get a vec4, then a bitcast if the target type is different. if (NumElementsSrc == 3 && NumElementsDst != 3) { Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); - - if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - DstTy); - } + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + DstTy); Src->setName("astype"); return Src; @@ -4777,12 +4821,10 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // to vec4 if the original type is not vec4, then a shuffle vector to // get a vec3. if (NumElementsSrc != 3 && NumElementsDst == 3) { - if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { - auto *Vec4Ty = llvm::FixedVectorType::get( - cast<llvm::VectorType>(DstTy)->getElementType(), 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - Vec4Ty); - } + auto *Vec4Ty = llvm::FixedVectorType::get( + cast<llvm::VectorType>(DstTy)->getElementType(), 4); + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + Vec4Ty); Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); Src->setName("astype"); @@ -4942,7 +4984,7 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, auto *GEP = cast<llvm::GEPOperator>(GEPVal); assert(GEP->getPointerOperand() == BasePtr && - "BasePtr must be the the base of the GEP."); + "BasePtr must be the base of the GEP."); assert(GEP->isInBounds() && "Expected inbounds GEP"); auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp index f860623e2bc3..fdd2fa18bb4a 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -21,13 +21,14 @@ using namespace clang; using namespace CodeGen; -static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { +namespace { +llvm::Function *GetVprintfDeclaration(llvm::Module &M) { llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), llvm::Type::getInt8PtrTy(M.getContext())}; llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); - if (auto* F = M.getFunction("vprintf")) { + if (auto *F = M.getFunction("vprintf")) { // Our CUDA system header declares vprintf with the right signature, so // nobody else should have been able to declare vprintf with a bogus // signature. @@ -41,6 +42,28 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); } +llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) { + const char *Name = "__llvm_omp_vprintf"; + llvm::Module &M = CGM.getModule(); + llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), + llvm::Type::getInt8PtrTy(M.getContext()), + llvm::Type::getInt32Ty(M.getContext())}; + llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( + llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); + + if (auto *F = M.getFunction(Name)) { + if (F->getFunctionType() != VprintfFuncType) { + CGM.Error(SourceLocation(), + "Invalid type declaration for __llvm_omp_vprintf"); + return nullptr; + } + return F; + } + + return llvm::Function::Create( + VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M); +} + // Transforms a call to printf into a call to the NVPTX vprintf syscall (which // isn't particularly special; it's invoked just like a regular function). // vprintf takes two args: A format string, and a pointer to a buffer containing @@ -66,39 +89,22 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { // // Note that by the time this function runs, E's args have already undergone the // standard C vararg promotion (short -> int, float -> double, etc.). -RValue -CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue) { - assert(getTarget().getTriple().isNVPTX()); - assert(E->getBuiltinCallee() == Builtin::BIprintf); - assert(E->getNumArgs() >= 1); // printf always has at least one arg. - const llvm::DataLayout &DL = CGM.getDataLayout(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - - CallArgList Args; - EmitCallArgs(Args, - E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), - E->arguments(), E->getDirectCallee(), - /* ParamsToSkip = */ 0); - - // We don't know how to emit non-scalar varargs. - if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) { - return !A.getRValue(*this).isScalar(); - })) { - CGM.ErrorUnsupported(E, "non-scalar arg to printf"); - return RValue::get(llvm::ConstantInt::get(IntTy, 0)); - } +std::pair<llvm::Value *, llvm::TypeSize> +packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) { + const llvm::DataLayout &DL = CGF->CGM.getDataLayout(); + llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext(); + CGBuilderTy &Builder = CGF->Builder; // Construct and fill the args buffer that we'll pass to vprintf. - llvm::Value *BufferPtr; if (Args.size() <= 1) { - // If there are no args, pass a null pointer to vprintf. - BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); + // If there are no args, pass a null pointer and size 0 + llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); + return {BufferPtr, llvm::TypeSize::Fixed(0)}; } else { llvm::SmallVector<llvm::Type *, 8> ArgTypes; for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) - ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType()); + ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType()); // Using llvm::StructType is correct only because printf doesn't accept // aggregates. If we had to handle aggregates here, we'd have to manually @@ -106,25 +112,71 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, // that the alignment of the llvm type was the same as the alignment of the // clang type. llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); - llvm::Value *Alloca = CreateTempAlloca(AllocaTy); + llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy); for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); - llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal(); + llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal(); Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); } - BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + llvm::Value *BufferPtr = + Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + return {BufferPtr, DL.getTypeAllocSize(AllocaTy)}; } +} - // Invoke vprintf and return. - llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); - return RValue::get(Builder.CreateCall( - VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr})); +bool containsNonScalarVarargs(CodeGenFunction *CGF, CallArgList Args) { + return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) { + return !A.getRValue(*CGF).isScalar(); + }); } -RValue -CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue) { +RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF, + llvm::Function *Decl, bool WithSizeArg) { + CodeGenModule &CGM = CGF->CGM; + CGBuilderTy &Builder = CGF->Builder; + assert(E->getBuiltinCallee() == Builtin::BIprintf); + assert(E->getNumArgs() >= 1); // printf always has at least one arg. + + // Uses the same format as nvptx for the argument packing, but also passes + // an i32 for the total size of the passed pointer + CallArgList Args; + CGF->EmitCallArgs(Args, + E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), + E->arguments(), E->getDirectCallee(), + /* ParamsToSkip = */ 0); + + // We don't know how to emit non-scalar varargs. + if (containsNonScalarVarargs(CGF, Args)) { + CGM.ErrorUnsupported(E, "non-scalar arg to printf"); + return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0)); + } + + auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args); + llvm::Value *BufferPtr = r.first; + + llvm::SmallVector<llvm::Value *, 3> Vec = { + Args[0].getRValue(*CGF).getScalarVal(), BufferPtr}; + if (WithSizeArg) { + // Passing > 32bit of data as a local alloca doesn't work for nvptx or + // amdgpu + llvm::Constant *Size = + llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()), + static_cast<uint32_t>(r.second.getFixedSize())); + + Vec.push_back(Size); + } + return RValue::get(Builder.CreateCall(Decl, Vec)); +} +} // namespace + +RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) { + assert(getTarget().getTriple().isNVPTX()); + return EmitDevicePrintfCallExpr( + E, this, GetVprintfDeclaration(CGM.getModule()), false); +} + +RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn); assert(E->getBuiltinCallee() == Builtin::BIprintf || E->getBuiltinCallee() == Builtin::BI__builtin_printf); @@ -154,3 +206,10 @@ CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); return RValue::get(Printf); } + +RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) { + assert(getTarget().getTriple().isNVPTX() || + getTarget().getTriple().isAMDGCN()); + return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM), + true); +} diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp index 937a0e8a3b69..ac26f0d4232c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp @@ -1555,6 +1555,12 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, argCK = CK_AnyPointerToBlockPointerCast; } else if (ivarRef.getType()->isPointerType()) { argCK = CK_BitCast; + } else if (argLoad.getType()->isAtomicType() && + !ivarRef.getType()->isAtomicType()) { + argCK = CK_AtomicToNonAtomic; + } else if (!argLoad.getType()->isAtomicType() && + ivarRef.getType()->isAtomicType()) { + argCK = CK_NonAtomicToAtomic; } ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, ivarRef.getType(), argCK, &argLoad, VK_PRValue, FPOptionsOverride()); @@ -2108,6 +2114,13 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, setARCRuntimeFunctionLinkage(CGM, RTF.getCallee()); } +static llvm::Function *getARCIntrinsic(llvm::Intrinsic::ID IntID, + CodeGenModule &CGM) { + llvm::Function *fn = CGM.getIntrinsic(IntID); + setARCRuntimeFunctionLinkage(CGM, fn); + return fn; +} + /// Perform an operation having the signature /// i8* (i8*) /// where a null input causes a no-op and returns null. @@ -2118,10 +2131,8 @@ static llvm::Value *emitARCValueOperation( if (isa<llvm::ConstantPointerNull>(value)) return value; - if (!fn) { - fn = CGF.CGM.getIntrinsic(IntID); - setARCRuntimeFunctionLinkage(CGF.CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(IntID, CGF.CGM); // Cast the argument to 'id'. llvm::Type *origType = returnType ? returnType : value->getType(); @@ -2140,10 +2151,8 @@ static llvm::Value *emitARCValueOperation( static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr, llvm::Function *&fn, llvm::Intrinsic::ID IntID) { - if (!fn) { - fn = CGF.CGM.getIntrinsic(IntID); - setARCRuntimeFunctionLinkage(CGF.CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(IntID, CGF.CGM); // Cast the argument to 'id*'. llvm::Type *origType = addr.getElementType(); @@ -2168,10 +2177,8 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, Address addr, bool ignored) { assert(addr.getElementType() == value->getType()); - if (!fn) { - fn = CGF.CGM.getIntrinsic(IntID); - setARCRuntimeFunctionLinkage(CGF.CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(IntID, CGF.CGM); llvm::Type *origType = value->getType(); @@ -2193,10 +2200,8 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, Address dst, Address src, llvm::Intrinsic::ID IntID) { assert(dst.getType() == src.getType()); - if (!fn) { - fn = CGF.CGM.getIntrinsic(IntID); - setARCRuntimeFunctionLinkage(CGF.CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(IntID, CGF.CGM); llvm::Value *args[] = { CGF.Builder.CreateBitCast(dst.getPointer(), CGF.Int8PtrPtrTy), @@ -2340,13 +2345,22 @@ static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value, // retainRV or claimRV calls in the IR. We currently do this only when the // optimization level isn't -O0 since global-isel, which is currently run at // -O0, doesn't know about the operand bundle. + ObjCEntrypoints &EPs = CGF.CGM.getObjCEntrypoints(); + llvm::Function *&EP = IsRetainRV + ? EPs.objc_retainAutoreleasedReturnValue + : EPs.objc_unsafeClaimAutoreleasedReturnValue; + llvm::Intrinsic::ID IID = + IsRetainRV ? llvm::Intrinsic::objc_retainAutoreleasedReturnValue + : llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue; + EP = getARCIntrinsic(IID, CGF.CGM); - // FIXME: Do this when the target isn't aarch64. + llvm::Triple::ArchType Arch = CGF.CGM.getTriple().getArch(); + + // FIXME: Do this on all targets and at -O0 too. This can be enabled only if + // the target backend knows how to handle the operand bundle. if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0 && - CGF.CGM.getTarget().getTriple().isAArch64()) { - llvm::Value *bundleArgs[] = {llvm::ConstantInt::get( - CGF.Int64Ty, - llvm::objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))}; + (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::x86_64)) { + llvm::Value *bundleArgs[] = {EP}; llvm::OperandBundleDef OB("clang.arc.attachedcall", bundleArgs); auto *oldCall = cast<llvm::CallBase>(value); llvm::CallBase *newCall = llvm::CallBase::addOperandBundle( @@ -2362,13 +2376,6 @@ static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value, CGF.CGM.getTargetCodeGenInfo().markARCOptimizedReturnCallsAsNoTail(); llvm::CallInst::TailCallKind tailKind = isNoTail ? llvm::CallInst::TCK_NoTail : llvm::CallInst::TCK_None; - ObjCEntrypoints &EPs = CGF.CGM.getObjCEntrypoints(); - llvm::Function *&EP = IsRetainRV - ? EPs.objc_retainAutoreleasedReturnValue - : EPs.objc_unsafeClaimAutoreleasedReturnValue; - llvm::Intrinsic::ID IID = - IsRetainRV ? llvm::Intrinsic::objc_retainAutoreleasedReturnValue - : llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue; return emitARCValueOperation(CGF, value, nullptr, EP, IID, tailKind); } @@ -2401,10 +2408,8 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, if (isa<llvm::ConstantPointerNull>(value)) return; llvm::Function *&fn = CGM.getObjCEntrypoints().objc_release; - if (!fn) { - fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release); - setARCRuntimeFunctionLinkage(CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(llvm::Intrinsic::objc_release, CGM); // Cast the argument to 'id'. value = Builder.CreateBitCast(value, Int8PtrTy); @@ -2447,10 +2452,8 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr, assert(addr.getElementType() == value->getType()); llvm::Function *&fn = CGM.getObjCEntrypoints().objc_storeStrong; - if (!fn) { - fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong); - setARCRuntimeFunctionLinkage(CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(llvm::Intrinsic::objc_storeStrong, CGM); llvm::Value *args[] = { Builder.CreateBitCast(addr.getPointer(), Int8PtrPtrTy), @@ -2603,10 +2606,8 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) { /// Essentially objc_storeWeak(addr, nil). void CodeGenFunction::EmitARCDestroyWeak(Address addr) { llvm::Function *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; - if (!fn) { - fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak); - setARCRuntimeFunctionLinkage(CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(llvm::Intrinsic::objc_destroyWeak, CGM); // Cast the argument to 'id*'. addr = Builder.CreateBitCast(addr, Int8PtrPtrTy); @@ -2651,10 +2652,8 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr, /// call i8* \@objc_autoreleasePoolPush(void) llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { llvm::Function *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; - if (!fn) { - fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush); - setARCRuntimeFunctionLinkage(CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush, CGM); return EmitNounwindRuntimeCall(fn); } @@ -2679,10 +2678,8 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { EmitRuntimeCallOrInvoke(fn, value); } else { llvm::FunctionCallee &fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; - if (!fn) { - fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop); - setARCRuntimeFunctionLinkage(CGM, fn); - } + if (!fn) + fn = getARCIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop, CGM); EmitRuntimeCall(fn, value); } @@ -3344,7 +3341,8 @@ struct ARCRetainExprEmitter : TryEmitResult result = visitExpr(e); // Avoid the block-retain if this is a block literal that doesn't need to be // copied to the heap. - if (e->getBlockDecl()->canAvoidCopyToHeap()) + if (CGF.CGM.getCodeGenOpts().ObjCAvoidHeapifyLocalBlocks && + e->getBlockDecl()->canAvoidCopyToHeap()) result.setInt(true); return result; } @@ -3697,7 +3695,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( FunctionDecl *FD = FunctionDecl::Create( C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - FunctionTy, nullptr, SC_Static, false, false); + FunctionTy, nullptr, SC_Static, false, false, false); FunctionArgList args; ParmVarDecl *Params[2]; @@ -3787,7 +3785,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( FunctionDecl *FD = FunctionDecl::Create( C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - FunctionTy, nullptr, SC_Static, false, false); + FunctionTy, nullptr, SC_Static, false, false, false); FunctionArgList args; ParmVarDecl *Params[2]; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp index 3f361f4e7931..e016644150b4 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp @@ -2651,35 +2651,6 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, } } - // If the return type is something that goes in an integer register, the - // runtime will handle 0 returns. For other cases, we fill in the 0 value - // ourselves. - // - // The language spec says the result of this kind of message send is - // undefined, but lots of people seem to have forgotten to read that - // paragraph and insist on sending messages to nil that have structure - // returns. With GCC, this generates a random return value (whatever happens - // to be on the stack / in those registers at the time) on most platforms, - // and generates an illegal instruction trap on SPARC. With LLVM it corrupts - // the stack. - bool isPointerSizedReturn = (ResultType->isAnyPointerType() || - ResultType->isIntegralOrEnumerationType() || ResultType->isVoidType()); - - llvm::BasicBlock *startBB = nullptr; - llvm::BasicBlock *messageBB = nullptr; - llvm::BasicBlock *continueBB = nullptr; - - if (!isPointerSizedReturn) { - startBB = Builder.GetInsertBlock(); - messageBB = CGF.createBasicBlock("msgSend"); - continueBB = CGF.createBasicBlock("continue"); - - llvm::Value *isNil = Builder.CreateICmpEQ(Receiver, - llvm::Constant::getNullValue(Receiver->getType())); - Builder.CreateCondBr(isNil, continueBB, messageBB); - CGF.EmitBlock(messageBB); - } - IdTy = cast<llvm::PointerType>(CGM.getTypes().ConvertType(ASTIdTy)); llvm::Value *cmd; if (Method) @@ -2703,6 +2674,96 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs); + // Message sends are expected to return a zero value when the + // receiver is nil. At one point, this was only guaranteed for + // simple integer and pointer types, but expectations have grown + // over time. + // + // Given a nil receiver, the GNU runtime's message lookup will + // return a stub function that simply sets various return-value + // registers to zero and then returns. That's good enough for us + // if and only if (1) the calling conventions of that stub are + // compatible with the signature we're using and (2) the registers + // it sets are sufficient to produce a zero value of the return type. + // Rather than doing a whole target-specific analysis, we assume it + // only works for void, integer, and pointer types, and in all + // other cases we do an explicit nil check is emitted code. In + // addition to ensuring we produe a zero value for other types, this + // sidesteps the few outright CC incompatibilities we know about that + // could otherwise lead to crashes, like when a method is expected to + // return on the x87 floating point stack or adjust the stack pointer + // because of an indirect return. + bool hasParamDestroyedInCallee = false; + bool requiresExplicitZeroResult = false; + bool requiresNilReceiverCheck = [&] { + // We never need a check if we statically know the receiver isn't nil. + if (!canMessageReceiverBeNull(CGF, Method, /*IsSuper*/ false, + Class, Receiver)) + return false; + + // If there's a consumed argument, we need a nil check. + if (Method && Method->hasParamDestroyedInCallee()) { + hasParamDestroyedInCallee = true; + } + + // If the return value isn't flagged as unused, and the result + // type isn't in our narrow set where we assume compatibility, + // we need a nil check to ensure a nil value. + if (!Return.isUnused()) { + if (ResultType->isVoidType()) { + // void results are definitely okay. + } else if (ResultType->hasPointerRepresentation() && + CGM.getTypes().isZeroInitializable(ResultType)) { + // Pointer types should be fine as long as they have + // bitwise-zero null pointers. But do we need to worry + // about unusual address spaces? + } else if (ResultType->isIntegralOrEnumerationType()) { + // Bitwise zero should always be zero for integral types. + // FIXME: we probably need a size limit here, but we've + // never imposed one before + } else { + // Otherwise, use an explicit check just to be sure. + requiresExplicitZeroResult = true; + } + } + + return hasParamDestroyedInCallee || requiresExplicitZeroResult; + }(); + + // We will need to explicitly zero-initialize an aggregate result slot + // if we generally require explicit zeroing and we have an aggregate + // result. + bool requiresExplicitAggZeroing = + requiresExplicitZeroResult && CGF.hasAggregateEvaluationKind(ResultType); + + // The block we're going to end up in after any message send or nil path. + llvm::BasicBlock *continueBB = nullptr; + // The block that eventually branched to continueBB along the nil path. + llvm::BasicBlock *nilPathBB = nullptr; + // The block to do explicit work in along the nil path, if necessary. + llvm::BasicBlock *nilCleanupBB = nullptr; + + // Emit the nil-receiver check. + if (requiresNilReceiverCheck) { + llvm::BasicBlock *messageBB = CGF.createBasicBlock("msgSend"); + continueBB = CGF.createBasicBlock("continue"); + + // If we need to zero-initialize an aggregate result or destroy + // consumed arguments, we'll need a separate cleanup block. + // Otherwise we can just branch directly to the continuation block. + if (requiresExplicitAggZeroing || hasParamDestroyedInCallee) { + nilCleanupBB = CGF.createBasicBlock("nilReceiverCleanup"); + } else { + nilPathBB = Builder.GetInsertBlock(); + } + + llvm::Value *isNil = Builder.CreateICmpEQ(Receiver, + llvm::Constant::getNullValue(Receiver->getType())); + Builder.CreateCondBr(isNil, nilCleanupBB ? nilCleanupBB : continueBB, + messageBB); + CGF.EmitBlock(messageBB); + } + // Get the IMP to call llvm::Value *imp; @@ -2744,36 +2805,48 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); - - if (!isPointerSizedReturn) { - messageBB = CGF.Builder.GetInsertBlock(); + if (requiresNilReceiverCheck) { + llvm::BasicBlock *nonNilPathBB = CGF.Builder.GetInsertBlock(); CGF.Builder.CreateBr(continueBB); + + // Emit the nil path if we decided it was necessary above. + if (nilCleanupBB) { + CGF.EmitBlock(nilCleanupBB); + + if (hasParamDestroyedInCallee) { + destroyCalleeDestroyedArguments(CGF, Method, CallArgs); + } + + if (requiresExplicitAggZeroing) { + assert(msgRet.isAggregate()); + Address addr = msgRet.getAggregateAddress(); + CGF.EmitNullInitialization(addr, ResultType); + } + + nilPathBB = CGF.Builder.GetInsertBlock(); + CGF.Builder.CreateBr(continueBB); + } + + // Enter the continuation block and emit a phi if required. CGF.EmitBlock(continueBB); if (msgRet.isScalar()) { llvm::Value *v = msgRet.getScalarVal(); llvm::PHINode *phi = Builder.CreatePHI(v->getType(), 2); - phi->addIncoming(v, messageBB); - phi->addIncoming(llvm::Constant::getNullValue(v->getType()), startBB); + phi->addIncoming(v, nonNilPathBB); + phi->addIncoming(CGM.EmitNullConstant(ResultType), nilPathBB); msgRet = RValue::get(phi); } else if (msgRet.isAggregate()) { - Address v = msgRet.getAggregateAddress(); - llvm::PHINode *phi = Builder.CreatePHI(v.getType(), 2); - llvm::Type *RetTy = v.getElementType(); - Address NullVal = CGF.CreateTempAlloca(RetTy, v.getAlignment(), "null"); - CGF.InitTempAlloca(NullVal, llvm::Constant::getNullValue(RetTy)); - phi->addIncoming(v.getPointer(), messageBB); - phi->addIncoming(NullVal.getPointer(), startBB); - msgRet = RValue::getAggregate(Address(phi, v.getAlignment())); + // Aggregate zeroing is handled in nilCleanupBB when it's required. } else /* isComplex() */ { std::pair<llvm::Value*,llvm::Value*> v = msgRet.getComplexVal(); llvm::PHINode *phi = Builder.CreatePHI(v.first->getType(), 2); - phi->addIncoming(v.first, messageBB); + phi->addIncoming(v.first, nonNilPathBB); phi->addIncoming(llvm::Constant::getNullValue(v.first->getType()), - startBB); + nilPathBB); llvm::PHINode *phi2 = Builder.CreatePHI(v.second->getType(), 2); - phi2->addIncoming(v.second, messageBB); + phi2->addIncoming(v.second, nonNilPathBB); phi2->addIncoming(llvm::Constant::getNullValue(v.second->getType()), - startBB); + nilPathBB); msgRet = RValue::getComplex(phi, phi2); } } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp index 3de67bb4bbc5..5b925359ac25 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp @@ -1754,37 +1754,9 @@ struct NullReturnState { // Okay, start emitting the null-receiver block. CGF.EmitBlock(NullBB); - // Release any consumed arguments we've got. + // Destroy any consumed arguments we've got. if (Method) { - CallArgList::const_iterator I = CallArgs.begin(); - for (ObjCMethodDecl::param_const_iterator i = Method->param_begin(), - e = Method->param_end(); i != e; ++i, ++I) { - const ParmVarDecl *ParamDecl = (*i); - if (ParamDecl->hasAttr<NSConsumedAttr>()) { - RValue RV = I->getRValue(CGF); - assert(RV.isScalar() && - "NullReturnState::complete - arg not on object"); - CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime); - } else { - QualType QT = ParamDecl->getType(); - auto *RT = QT->getAs<RecordType>(); - if (RT && RT->getDecl()->isParamDestroyedInCallee()) { - RValue RV = I->getRValue(CGF); - QualType::DestructionKind DtorKind = QT.isDestructedType(); - switch (DtorKind) { - case QualType::DK_cxx_destructor: - CGF.destroyCXXObject(CGF, RV.getAggregateAddress(), QT); - break; - case QualType::DK_nontrivial_c_struct: - CGF.destroyNonTrivialCStruct(CGF, RV.getAggregateAddress(), QT); - break; - default: - llvm_unreachable("unexpected dtor kind"); - break; - } - } - } - } + CGObjCRuntime::destroyCalleeDestroyedArguments(CGF, Method, CallArgs); } // The phi code below assumes that we haven't needed any control flow yet. @@ -2151,15 +2123,6 @@ CodeGen::RValue CGObjCMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF, Method, Class, ObjCTypes); } -static bool isWeakLinkedClass(const ObjCInterfaceDecl *ID) { - do { - if (ID->isWeakImported()) - return true; - } while ((ID = ID->getSuperClass())); - - return false; -} - CodeGen::RValue CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, ReturnValueSlot Return, @@ -2200,32 +2163,8 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, CGM.getContext().getCanonicalType(ResultType) && "Result type mismatch!"); - bool ReceiverCanBeNull = true; - - // Super dispatch assumes that self is non-null; even the messenger - // doesn't have a null check internally. - if (IsSuper) { - ReceiverCanBeNull = false; - - // If this is a direct dispatch of a class method, check whether the class, - // or anything in its hierarchy, was weak-linked. - } else if (ClassReceiver && Method && Method->isClassMethod()) { - ReceiverCanBeNull = isWeakLinkedClass(ClassReceiver); - - // If we're emitting a method, and self is const (meaning just ARC, for now), - // and the receiver is a load of self, then self is a valid object. - } else if (auto CurMethod = - dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl)) { - auto Self = CurMethod->getSelfDecl(); - if (Self->getType().isConstQualified()) { - if (auto LI = dyn_cast<llvm::LoadInst>(Arg0->stripPointerCasts())) { - llvm::Value *SelfAddr = CGF.GetAddrOfLocalVar(Self).getPointer(); - if (SelfAddr == LI->getPointerOperand()) { - ReceiverCanBeNull = false; - } - } - } - } + bool ReceiverCanBeNull = + canMessageReceiverBeNull(CGF, Method, IsSuper, ClassReceiver, Arg0); bool RequiresNullCheck = false; @@ -2261,14 +2200,8 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, RequiresNullCheck = false; // Emit a null-check if there's a consumed argument other than the receiver. - if (!RequiresNullCheck && CGM.getLangOpts().ObjCAutoRefCount && Method) { - for (const auto *ParamDecl : Method->parameters()) { - if (ParamDecl->isDestroyedInCallee()) { - RequiresNullCheck = true; - break; - } - } - } + if (!RequiresNullCheck && Method && Method->hasParamDestroyedInCallee()) + RequiresNullCheck = true; NullReturnState nullReturn; if (RequiresNullCheck) { @@ -4788,9 +4721,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF, // matched and avoid generating code for falling off the end if // so. bool AllMatched = false; - for (unsigned I = 0, N = AtTryStmt->getNumCatchStmts(); I != N; ++I) { - const ObjCAtCatchStmt *CatchStmt = AtTryStmt->getCatchStmt(I); - + for (const ObjCAtCatchStmt *CatchStmt : AtTryStmt->catch_stmts()) { const VarDecl *CatchParam = CatchStmt->getCatchParamDecl(); const ObjCObjectPointerType *OPT = nullptr; @@ -6741,33 +6672,53 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { } } - values.add(emitMethodList(listName, MethodListType::CategoryInstanceMethods, - instanceMethods)); - values.add(emitMethodList(listName, MethodListType::CategoryClassMethods, - classMethods)); + auto instanceMethodList = emitMethodList( + listName, MethodListType::CategoryInstanceMethods, instanceMethods); + auto classMethodList = emitMethodList( + listName, MethodListType::CategoryClassMethods, classMethods); + values.add(instanceMethodList); + values.add(classMethodList); + // Keep track of whether we have actual metadata to emit. + bool isEmptyCategory = + instanceMethodList->isNullValue() && classMethodList->isNullValue(); const ObjCCategoryDecl *Category = - Interface->FindCategoryDeclaration(OCD->getIdentifier()); + Interface->FindCategoryDeclaration(OCD->getIdentifier()); if (Category) { SmallString<256> ExtName; - llvm::raw_svector_ostream(ExtName) << Interface->getObjCRuntimeNameAsString() << "_$_" - << OCD->getName(); - values.add(EmitProtocolList("_OBJC_CATEGORY_PROTOCOLS_$_" - + Interface->getObjCRuntimeNameAsString() + "_$_" - + Category->getName(), - Category->protocol_begin(), - Category->protocol_end())); - values.add(EmitPropertyList("_OBJC_$_PROP_LIST_" + ExtName.str(), - OCD, Category, ObjCTypes, false)); - values.add(EmitPropertyList("_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), - OCD, Category, ObjCTypes, true)); + llvm::raw_svector_ostream(ExtName) + << Interface->getObjCRuntimeNameAsString() << "_$_" << OCD->getName(); + auto protocolList = + EmitProtocolList("_OBJC_CATEGORY_PROTOCOLS_$_" + + Interface->getObjCRuntimeNameAsString() + "_$_" + + Category->getName(), + Category->protocol_begin(), Category->protocol_end()); + auto propertyList = EmitPropertyList("_OBJC_$_PROP_LIST_" + ExtName.str(), + OCD, Category, ObjCTypes, false); + auto classPropertyList = + EmitPropertyList("_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), OCD, + Category, ObjCTypes, true); + values.add(protocolList); + values.add(propertyList); + values.add(classPropertyList); + isEmptyCategory &= protocolList->isNullValue() && + propertyList->isNullValue() && + classPropertyList->isNullValue(); } else { values.addNullPointer(ObjCTypes.ProtocolListnfABIPtrTy); values.addNullPointer(ObjCTypes.PropertyListPtrTy); values.addNullPointer(ObjCTypes.PropertyListPtrTy); } - unsigned Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy); + if (isEmptyCategory) { + // Empty category, don't emit any metadata. + values.abandon(); + MethodDefinitions.clear(); + return; + } + + unsigned Size = + CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy); values.addInt(ObjCTypes.IntTy, Size); llvm::GlobalVariable *GCATV = diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.cpp index 108f6fc7ba60..33ae3c7c2b28 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -163,8 +163,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, // Enter the catch, if there is one. if (S.getNumCatchStmts()) { - for (unsigned I = 0, N = S.getNumCatchStmts(); I != N; ++I) { - const ObjCAtCatchStmt *CatchStmt = S.getCatchStmt(I); + for (const ObjCAtCatchStmt *CatchStmt : S.catch_stmts()) { const VarDecl *CatchDecl = CatchStmt->getCatchParamDecl(); Handlers.push_back(CatchHandler()); @@ -385,6 +384,83 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, return MessageSendInfo(argsInfo, signatureType); } +bool CGObjCRuntime::canMessageReceiverBeNull(CodeGenFunction &CGF, + const ObjCMethodDecl *method, + bool isSuper, + const ObjCInterfaceDecl *classReceiver, + llvm::Value *receiver) { + // Super dispatch assumes that self is non-null; even the messenger + // doesn't have a null check internally. + if (isSuper) + return false; + + // If this is a direct dispatch of a class method, check whether the class, + // or anything in its hierarchy, was weak-linked. + if (classReceiver && method && method->isClassMethod()) + return isWeakLinkedClass(classReceiver); + + // If we're emitting a method, and self is const (meaning just ARC, for now), + // and the receiver is a load of self, then self is a valid object. + if (auto curMethod = + dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl)) { + auto self = curMethod->getSelfDecl(); + if (self->getType().isConstQualified()) { + if (auto LI = dyn_cast<llvm::LoadInst>(receiver->stripPointerCasts())) { + llvm::Value *selfAddr = CGF.GetAddrOfLocalVar(self).getPointer(); + if (selfAddr == LI->getPointerOperand()) { + return false; + } + } + } + } + + // Otherwise, assume it can be null. + return true; +} + +bool CGObjCRuntime::isWeakLinkedClass(const ObjCInterfaceDecl *ID) { + do { + if (ID->isWeakImported()) + return true; + } while ((ID = ID->getSuperClass())); + + return false; +} + +void CGObjCRuntime::destroyCalleeDestroyedArguments(CodeGenFunction &CGF, + const ObjCMethodDecl *method, + const CallArgList &callArgs) { + CallArgList::const_iterator I = callArgs.begin(); + for (auto i = method->param_begin(), e = method->param_end(); + i != e; ++i, ++I) { + const ParmVarDecl *param = (*i); + if (param->hasAttr<NSConsumedAttr>()) { + RValue RV = I->getRValue(CGF); + assert(RV.isScalar() && + "NullReturnState::complete - arg not on object"); + CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime); + } else { + QualType QT = param->getType(); + auto *RT = QT->getAs<RecordType>(); + if (RT && RT->getDecl()->isParamDestroyedInCallee()) { + RValue RV = I->getRValue(CGF); + QualType::DestructionKind DtorKind = QT.isDestructedType(); + switch (DtorKind) { + case QualType::DK_cxx_destructor: + CGF.destroyCXXObject(CGF, RV.getAggregateAddress(), QT); + break; + case QualType::DK_nontrivial_c_struct: + CGF.destroyNonTrivialCStruct(CGF, RV.getAggregateAddress(), QT); + break; + default: + llvm_unreachable("unexpected dtor kind"); + break; + } + } + } + } +} + llvm::Constant * clang::CodeGen::emitObjCProtocolObject(CodeGenModule &CGM, const ObjCProtocolDecl *protocol) { diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h b/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h index f56101df77b6..bb27c38db204 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h @@ -337,6 +337,23 @@ public: MessageSendInfo getMessageSendInfo(const ObjCMethodDecl *method, QualType resultType, CallArgList &callArgs); + bool canMessageReceiverBeNull(CodeGenFunction &CGF, + const ObjCMethodDecl *method, + bool isSuper, + const ObjCInterfaceDecl *classReceiver, + llvm::Value *receiver); + static bool isWeakLinkedClass(const ObjCInterfaceDecl *cls); + + /// Destroy the callee-destroyed arguments of the given method, + /// if it has any. Used for nil-receiver paths in message sends. + /// Never does anything if the method does not satisfy + /// hasParamDestroyedInCallee(). + /// + /// \param callArgs - just the formal arguments, not including implicit + /// arguments such as self and cmd + static void destroyCalleeDestroyedArguments(CodeGenFunction &CGF, + const ObjCMethodDecl *method, + const CallArgList &callArgs); // FIXME: This probably shouldn't be here, but the code to compute // it is here. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ca98c7a57446..75709b3c7e78 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1448,8 +1448,8 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, const char *FileName = PLoc.getFilename(); unsigned Line = PLoc.getLine(); unsigned Column = PLoc.getColumn(); - SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, - Line, Column); + SrcLocStr = + OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); } unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), @@ -1560,13 +1560,22 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { } llvm::FunctionCallee -CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { +CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, + bool IsGPUDistribute) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" - : "__kmpc_for_static_init_4u") - : (IVSigned ? "__kmpc_for_static_init_8" - : "__kmpc_for_static_init_8u"); + StringRef Name; + if (IsGPUDistribute) + Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" + : "__kmpc_distribute_static_init_4u") + : (IVSigned ? "__kmpc_distribute_static_init_8" + : "__kmpc_distribute_static_init_8u"); + else + Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" + : "__kmpc_for_static_init_4u") + : (IVSigned ? "__kmpc_for_static_init_8" + : "__kmpc_for_static_init_8u"); + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { @@ -2112,7 +2121,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, Address ZeroAddrBound = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".bound.zero.addr"); - CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; // ThreadId for serialized parallels is 0. OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); @@ -2826,7 +2835,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned); + createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -2841,8 +2850,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit( llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned); + llvm::FunctionCallee StaticInitFunction; + bool isGPUDistribute = + CGM.getLangOpts().OpenMPIsDevice && + (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); + StaticInitFunction = createForStaticInitFunction( + Values.IVSize, Values.IVSigned, isGPUDistribute); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, OMPC_SCHEDULE_MODIFIER_unknown, Values); @@ -2863,9 +2877,16 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_for_static_fini), - Args); + if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && + (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), + Args); + else + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_for_static_fini), + Args); } void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, @@ -3892,7 +3913,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, SharedRefLValue.getTBAAInfo()); } else if (CGF.LambdaCaptureFields.count( Pair.second.Original->getCanonicalDecl()) > 0 || - dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { + isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } else { // Processing for implicitly captured variables. @@ -4401,14 +4422,14 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (NumOfElements) { NumOfElements = CGF.Builder.CreateNUWAdd( llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); - OpaqueValueExpr OVE( + auto *OVE = new (C) OpaqueValueExpr( Loc, C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), VK_PRValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, RValue::get(NumOfElements)); KmpTaskAffinityInfoArrayTy = - C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, + C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); // Properly emit variable-sized array. auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, @@ -4759,8 +4780,8 @@ emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue NumLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), C.getUIntPtrType()); - CGF.InitTempAlloca(NumLVal.getAddress(CGF), - llvm::ConstantInt::get(CGF.IntPtrTy, 0)); + CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), + NumLVal.getAddress(CGF)); llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); CGF.EmitStoreOfScalar(Add, NumLVal); @@ -4861,7 +4882,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( bool HasRegularWithIterators = false; llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); llvm::Value *NumOfRegularWithIterators = - llvm::ConstantInt::get(CGF.IntPtrTy, 1); + llvm::ConstantInt::get(CGF.IntPtrTy, 0); // Calculate number of depobj dependecies and regular deps with the iterators. for (const OMPTaskDataTy::DependData &D : Dependencies) { if (D.DepKind == OMPC_DEPEND_depobj) { @@ -4875,12 +4896,15 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( continue; } // Include number of iterations, if any. + if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); + llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( + Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); NumOfRegularWithIterators = - CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); + CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); } HasRegularWithIterators = true; continue; @@ -4899,13 +4923,13 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( NumOfElements = CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); } - OpaqueValueExpr OVE(Loc, - C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), - VK_PRValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + auto *OVE = new (C) OpaqueValueExpr( + Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), + VK_PRValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, RValue::get(NumOfElements)); KmpDependInfoArrayTy = - C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, + C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); // Properly emit variable-sized array. @@ -6240,21 +6264,51 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, SharedLVal.getAlignment()); } -void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, - SourceLocation Loc) { +void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; - if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { + // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. OMPBuilder.createTaskwait(CGF.Builder); } else { - // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 - // global_tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - // Ignore return result until untied tasks are supported. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_omp_taskwait), - Args); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + auto &M = CGM.getModule(); + Address DependenciesArray = Address::invalid(); + llvm::Value *NumOfElements; + std::tie(NumOfElements, DependenciesArray) = + emitDependClause(CGF, Data.Dependences, Loc); + llvm::Value *DepWaitTaskArgs[6]; + if (!Data.Dependences.empty()) { + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = NumOfElements; + DepWaitTaskArgs[3] = DependenciesArray.getPointer(); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + + // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, + // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info + // is specified. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), + DepWaitTaskArgs); + + } else { + + // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + llvm::Value *Args[] = {UpLoc, ThreadID}; + // Ignore return result until untied tasks are supported. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), + Args); + } } if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) @@ -6740,6 +6794,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: break; default: @@ -7214,6 +7269,7 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: break; default: @@ -7269,6 +7325,14 @@ public: /// 0x800 is reserved for compatibility with XLC. /// Produce a runtime error if the data is not already allocated. OMP_MAP_PRESENT = 0x1000, + // Increment and decrement a separate reference counter so that the data + // cannot be unmapped within the associated region. Thus, this flag is + // intended to be used on 'target' and 'target data' directives because they + // are inherently structured. It is not intended to be used on 'target + // enter data' and 'target exit data' directives because they are inherently + // dynamic. + // This is an OpenMP extension for the sake of OpenACC support. + OMP_MAP_OMPX_HOLD = 0x2000, /// Signal that the runtime library should use args as an array of /// descriptor_dim pointers and use args_size as dims. Used when we have /// non-contiguous list items in target update directive @@ -7447,6 +7511,9 @@ private: SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> DevPointersMap; + /// Map between lambda declarations and their map type. + llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; + llvm::Value *getExprTypeSize(const Expr *E) const { QualType ExprTy = E->getType().getCanonicalType(); @@ -7559,17 +7626,15 @@ private: Bits |= OMP_MAP_PTR_AND_OBJ; if (AddIsTargetParamFlag) Bits |= OMP_MAP_TARGET_PARAM; - if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) - != MapModifiers.end()) + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) Bits |= OMP_MAP_ALWAYS; - if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) - != MapModifiers.end()) + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) Bits |= OMP_MAP_CLOSE; - if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != - MapModifiers.end() || - llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != - MotionModifiers.end()) + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || + llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) Bits |= OMP_MAP_PRESENT; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) + Bits |= OMP_MAP_OMPX_HOLD; if (IsNonContiguous) Bits |= OMP_MAP_NON_CONTIG; return Bits; @@ -8405,6 +8470,15 @@ private: return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; } + auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); + if (I != LambdasMap.end()) + // for map(to: lambda): using user specified map type. + return getMapTypeBits( + I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), + /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), + /*AddPtrFlag=*/false, + /*AddIsTargetParamFlag=*/false, + /*isNonContiguous=*/false); return MappableExprsHandler::OMP_MAP_TO | MappableExprsHandler::OMP_MAP_FROM; } @@ -8536,10 +8610,8 @@ private: if (!C) continue; MapKind Kind = Other; - if (!C->getMapTypeModifiers().empty() && - llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { - return K == OMPC_MAP_MODIFIER_present; - })) + if (llvm::is_contained(C->getMapTypeModifiers(), + OMPC_MAP_MODIFIER_present)) Kind = Present; else if (C->getMapType() == OMPC_MAP_alloc) Kind = Allocs; @@ -8558,10 +8630,8 @@ private: if (!C) continue; MapKind Kind = Other; - if (!C->getMotionModifiers().empty() && - llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { - return K == OMPC_MOTION_MODIFIER_present; - })) + if (llvm::is_contained(C->getMotionModifiers(), + OMPC_MOTION_MODIFIER_present)) Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { @@ -8576,10 +8646,8 @@ private: if (!C) continue; MapKind Kind = Other; - if (!C->getMotionModifiers().empty() && - llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { - return K == OMPC_MOTION_MODIFIER_present; - })) + if (llvm::is_contained(C->getMotionModifiers(), + OMPC_MOTION_MODIFIER_present)) Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { @@ -8869,6 +8937,21 @@ public: for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); + // Extract map information. + for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { + if (C->getMapType() != OMPC_MAP_to) + continue; + for (auto L : C->component_lists()) { + const ValueDecl *VD = std::get<0>(L); + const auto *RD = VD ? VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl() + : nullptr; + if (RD && RD->isLambda()) + LambdasMap.try_emplace(std::get<0>(L), C); + } + } } /// Constructor for the declare mapper directive. @@ -8923,6 +9006,20 @@ public: CombinedInfo.Types.back() |= OMP_MAP_PRESENT; // Remove TARGET_PARAM flag from the first element (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; + // If any element has the ompx_hold modifier, then make sure the runtime + // uses the hold reference count for the struct as a whole so that it won't + // be unmapped by an extra dynamic reference count decrement. Add it to all + // elements as well so the runtime knows which reference count to check + // when determining whether it's time for device-to-host transfers of + // individual elements. + if (CurTypes.end() != + llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { + return Type & OMP_MAP_OMPX_HOLD; + })) { + CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; + for (auto &M : CurTypes) + M |= OMP_MAP_OMPX_HOLD; + } // All other current entries will be MEMBER_OF the combined entry // (except for PTR_AND_OBJ entries which do not have a placeholder value @@ -9067,6 +9164,11 @@ public: ? nullptr : Cap->getCapturedVar()->getCanonicalDecl(); + // for map(to: lambda): skip here, processing it in + // generateDefaultMapInfo + if (LambdasMap.count(VD)) + return; + // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just // pass its value. @@ -9113,18 +9215,13 @@ public: const MapData &RHS) { ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); OpenMPMapClauseKind MapType = std::get<1>(RHS); - bool HasPresent = !MapModifiers.empty() && - llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { - return K == clang::OMPC_MAP_MODIFIER_present; - }); + bool HasPresent = + llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); bool HasAllocs = MapType == OMPC_MAP_alloc; MapModifiers = std::get<2>(RHS); MapType = std::get<1>(LHS); bool HasPresentR = - !MapModifiers.empty() && - llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { - return K == clang::OMPC_MAP_MODIFIER_present; - }); + llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); bool HasAllocsR = MapType == OMPC_MAP_alloc; return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); }); @@ -9435,34 +9532,50 @@ static void emitNonContiguousDescriptor( } } +// Try to extract the base declaration from a `this->x` expression if possible. +static ValueDecl *getDeclFromThisExpr(const Expr *E) { + if (!E) + return nullptr; + + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) + if (const MemberExpr *ME = + dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) + return ME->getMemberDecl(); + return nullptr; +} + /// Emit a string constant containing the names of the values mapped to the /// offloading runtime library. llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { - llvm::Constant *SrcLocStr; - if (!MapExprs.getMapDecl()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + + if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) + return OMPBuilder.getOrCreateDefaultSrcLocStr(); + + SourceLocation Loc; + if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { + if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) + Loc = VD->getLocation(); + else + Loc = MapExprs.getMapExpr()->getExprLoc(); } else { - std::string ExprName = ""; - if (MapExprs.getMapExpr()) { - PrintingPolicy P(CGF.getContext().getLangOpts()); - llvm::raw_string_ostream OS(ExprName); - MapExprs.getMapExpr()->printPretty(OS, nullptr, P); - OS.flush(); - } else { - ExprName = MapExprs.getMapDecl()->getNameAsString(); - } + Loc = MapExprs.getMapDecl()->getLocation(); + } - SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); - PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - const char *FileName = PLoc.getFilename(); - unsigned Line = PLoc.getLine(); - unsigned Column = PLoc.getColumn(); - SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), - Line, Column); + std::string ExprName = ""; + if (MapExprs.getMapExpr()) { + PrintingPolicy P(CGF.getContext().getLangOpts()); + llvm::raw_string_ostream OS(ExprName); + MapExprs.getMapExpr()->printPretty(OS, nullptr, P); + OS.flush(); + } else { + ExprName = MapExprs.getMapDecl()->getNameAsString(); } - return SrcLocStr; + + PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); + return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), + PLoc.getLine(), PLoc.getColumn()); } /// Emit the arrays used to pass the captures and map information to the @@ -9810,6 +9923,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: default: llvm_unreachable("Unexpected directive."); @@ -10660,6 +10774,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: default: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -11341,6 +11456,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: default: llvm_unreachable("Unexpected standalone target data directive."); @@ -11627,11 +11743,11 @@ getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { assert(!Sizes.empty() && "Unable to determine NDS and WDS."); // The LS of a function parameter / return value can only be a power // of 2, starting from 8 bits, up to 128. - assert(std::all_of(Sizes.begin(), Sizes.end(), - [](unsigned Size) { - return Size == 8 || Size == 16 || Size == 32 || - Size == 64 || Size == 128; - }) && + assert(llvm::all_of(Sizes, + [](unsigned Size) { + return Size == 8 || Size == 16 || Size == 32 || + Size == 64 || Size == 128; + }) && "Invalid size"); return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), @@ -12287,7 +12403,7 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { return llvm::any_of( CGM.getOpenMPRuntime().NontemporalDeclsStack, - [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); + [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); } void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( @@ -12958,7 +13074,8 @@ Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, } void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc, + const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h index c24648aae7e1..527a23a8af6a 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -795,9 +795,11 @@ private: llvm::Type *getKmpc_MicroPointerTy(); /// Returns __kmpc_for_static_init_* runtime function for the specified - /// size \a IVSize and sign \a IVSigned. + /// size \a IVSize and sign \a IVSigned. Will create a distribute call + /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set. llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, - bool IVSigned); + bool IVSigned, + bool IsGPUDistribute); /// Returns __kmpc_dispatch_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. @@ -1545,7 +1547,8 @@ public: LValue SharedLVal); /// Emit code for 'taskwait' directive. - virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); + virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPTaskDataTy &Data); /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be @@ -2383,7 +2386,8 @@ public: LValue SharedLVal) override; /// Emit code for 'taskwait' directive. - void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override; + void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPTaskDataTy &Data) override; /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp deleted file mode 100644 index 33d4ab838af1..000000000000 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to -// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#include "CGOpenMPRuntimeAMDGCN.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/DeclOpenMP.h" -#include "clang/AST/StmtOpenMP.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/Basic/Cuda.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" - -using namespace clang; -using namespace CodeGen; -using namespace llvm::omp; - -CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM) - : CGOpenMPRuntimeGPU(CGM) { - if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP AMDGCN can only handle device code."); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - // return constant compile-time target-specific warp size - unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); - return Bld.getInt32(WarpSize); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F = - CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x); - return Bld.CreateCall(F, llvm::None, "nvptx_tid"); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; - llvm::Function *F = M->getFunction(LocSize); - if (!F) { - F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), - llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); - } - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); -} diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h deleted file mode 100644 index c1421261bfc1..000000000000 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h +++ /dev/null @@ -1,43 +0,0 @@ -//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to -// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H -#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H - -#include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/StmtOpenMP.h" - -namespace clang { -namespace CodeGen { - -class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU { - -public: - explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM); - - /// Get the GPU warp size. - llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; - - /// Get the id of the current thread on the GPU. - llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; - - /// Get the maximum number of threads in a block of the GPU. - llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; -}; - -} // namespace CodeGen -} // namespace clang - -#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 63fecedc6fb7..dcb224f33156 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "CGOpenMPRuntimeGPU.h" -#include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" @@ -21,7 +20,7 @@ #include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" -#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/Support/MathExtras.h" using namespace clang; using namespace CodeGen; @@ -106,8 +105,7 @@ public: /// is the same for all known NVPTX architectures. enum MachineConfiguration : unsigned { /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target - /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2, - /// and GV_Warp_Size_Log2_Mask. + /// specific Grid Values like GV_Warp_Size, GV_Slot_Size /// Global memory alignment for performance. GlobalMemoryAlignment = 128, @@ -339,7 +337,7 @@ class CheckVarsEscapingDeclContext final assert(!GlobalizedRD && "Record for globalized variables is built already."); ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams; - unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; if (IsInTTDRegion) EscapedDeclsForTeams = EscapedDecls.getArrayRef(); else @@ -536,7 +534,7 @@ public: static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; unsigned LaneIDBits = - CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2); + llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } @@ -546,8 +544,9 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue( - llvm::omp::GV_Warp_Size_Log2_Mask); + unsigned LaneIDBits = + llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); + unsigned LaneIDMask = ~0u >> (32u - LaneIDBits); auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); @@ -1111,11 +1110,12 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, // warps participate in parallel work. static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, bool Mode) { - auto *GVMode = - new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1), - Twine(Name, "_exec_mode")); + auto *GVMode = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD + : OMP_TGT_EXEC_MODE_GENERIC), + Twine(Name, "_exec_mode")); CGM.addCompilerUsedGlobal(GVMode); } @@ -1195,7 +1195,17 @@ unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const { CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP NVPTX can only handle device code."); + llvm_unreachable("OpenMP can only handle device code."); + + llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); + if (CGM.getLangOpts().OpenMPTargetNewRuntime) { + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug, + "__omp_rtl_debug_kind"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription, + "__omp_rtl_assume_teams_oversubscription"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription, + "__omp_rtl_assume_threads_oversubscription"); + } } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, @@ -1308,7 +1318,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( const RecordDecl *GlobalizedRD = nullptr; llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions; llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; - unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size; // Globalize team reductions variable unconditionally in all modes. if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); @@ -1488,7 +1498,7 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF, Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -2089,7 +2099,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, "__openmp_nvptx_data_transfer_temporary_storage"; llvm::GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); - unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; if (!TransferMedium) { auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); @@ -3476,7 +3486,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr); // Get the array of arguments. SmallVector<llvm::Value *, 8> Args; @@ -3935,3 +3945,31 @@ void CGOpenMPRuntimeGPU::clear() { } CGOpenMPRuntime::clear(); } + +llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Module *M = &CGF.CGM.getModule(); + const char *LocSize = "__kmpc_get_hardware_num_threads_in_block"; + llvm::Function *F = M->getFunction(LocSize); + if (!F) { + F = llvm::Function::Create( + llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), + llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); + } + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); +} + +llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) { + ArrayRef<llvm::Value *> Args{}; + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block), + Args); +} + +llvm::Value *CGOpenMPRuntimeGPU::getGPUWarpSize(CodeGenFunction &CGF) { + ArrayRef<llvm::Value *> Args{}; + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_warp_size), + Args); +} diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index b5f1b843c46b..ac51264d7685 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -17,7 +17,6 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "clang/AST/StmtOpenMP.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" namespace clang { namespace CodeGen { @@ -177,13 +176,13 @@ public: /// and NVPTX. /// Get the GPU warp size. - virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUWarpSize(CodeGenFunction &CGF); /// Get the id of the current thread on the GPU. - virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUThreadID(CodeGenFunction &CGF); /// Get the maximum number of threads in a block of the GPU. - virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUNumThreads(CodeGenFunction &CGF); /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp deleted file mode 100644 index 1688d07b90b6..000000000000 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to NVPTX -// targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#include "CGOpenMPRuntimeNVPTX.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/DeclOpenMP.h" -#include "clang/AST/StmtOpenMP.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/Basic/Cuda.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/IntrinsicsNVPTX.h" - -using namespace clang; -using namespace CodeGen; -using namespace llvm::omp; - -CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntimeGPU(CGM) { - if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP NVPTX can only handle device code."); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), - "nvptx_warp_size"); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F; - F = llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); - return Bld.CreateCall(F, llvm::None, "nvptx_tid"); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F; - F = llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x); - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); -} diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h deleted file mode 100644 index 5f1602959266..000000000000 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ /dev/null @@ -1,43 +0,0 @@ -//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to NVPTX -// targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H -#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H - -#include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/StmtOpenMP.h" - -namespace clang { -namespace CodeGen { - -class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU { - -public: - explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); - - /// Get the GPU warp size. - llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; - - /// Get the id of the current thread on the GPU. - llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; - - /// Get the maximum number of threads in a block of the GPU. - llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; -}; - -} // CodeGen namespace. -} // clang namespace. - -#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp index 0a3a722fa653..d399ff919cc3 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp @@ -26,6 +26,7 @@ #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Assumptions.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -196,6 +197,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::SEHTryStmtClass: EmitSEHTryStmt(cast<SEHTryStmt>(*S)); break; + case Stmt::OMPMetaDirectiveClass: + EmitOMPMetaDirective(cast<OMPMetaDirective>(*S)); + break; case Stmt::OMPCanonicalLoopClass: EmitOMPCanonicalLoop(cast<OMPCanonicalLoop>(S)); break; @@ -389,6 +393,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPMaskedDirectiveClass: EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S)); break; + case Stmt::OMPGenericLoopDirectiveClass: + EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S)); + break; } } @@ -709,6 +716,17 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) { } void CodeGenFunction::EmitIfStmt(const IfStmt &S) { + // The else branch of a consteval if statement is always the only branch that + // can be runtime evaluated. + if (S.isConsteval()) { + const Stmt *Executed = S.isNegatedConsteval() ? S.getThen() : S.getElse(); + if (Executed) { + RunCleanupsScope ExecutedScope(*this); + EmitStmt(Executed); + } + return; + } + // C99 6.8.4.1: The first substatement is executed if the expression compares // unequal to 0. The condition must be a scalar type. LexicalScope ConditionScope(*this, S.getCond()->getSourceRange()); @@ -1518,6 +1536,12 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S, NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt()); } + // Generate a stop point for debug info if the case statement is + // followed by a default statement. A fallthrough case before a + // default case gets its own branch target. + if (CurCase->getSubStmt()->getStmtClass() == Stmt::DefaultStmtClass) + EmitStopPoint(CurCase); + // Normal default recursion for non-cases. EmitStmt(CurCase->getSubStmt()); } @@ -2188,20 +2212,16 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, CodeGenFunction &CGF, std::vector<llvm::Value *> &RegResults) { if (!HasUnwindClobber) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoUnwind); + Result.addFnAttr(llvm::Attribute::NoUnwind); if (NoMerge) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoMerge); + Result.addFnAttr(llvm::Attribute::NoMerge); // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReadNone); + Result.addFnAttr(llvm::Attribute::ReadNone); else if (ReadOnly) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReadOnly); + Result.addFnAttr(llvm::Attribute::ReadOnly); } // Slap the source location of the inline asm into a !srcloc metadata on the @@ -2223,8 +2243,7 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, // convergent (meaning, they may call an intrinsically convergent op, such // as bar.sync, and so can't have certain optimizations applied around // them). - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Convergent); + Result.addFnAttr(llvm::Attribute::Convergent); // Extract all of the register value results from the asm. if (ResultRegTypes.size() == 1) { RegResults.push_back(&Result); @@ -2610,8 +2629,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::FunctionType::get(ResultType, ArgTypes, false); bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0; + + llvm::InlineAsm::AsmDialect GnuAsmDialect = + CGM.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT + ? llvm::InlineAsm::AD_ATT + : llvm::InlineAsm::AD_Intel; llvm::InlineAsm::AsmDialect AsmDialect = isa<MSAsmStmt>(&S) ? - llvm::InlineAsm::AD_Intel : llvm::InlineAsm::AD_ATT; + llvm::InlineAsm::AD_Intel : GnuAsmDialect; + llvm::InlineAsm *IA = llvm::InlineAsm::get( FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect, HasUnwindClobber); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp index f6233b791182..4f14459e4d28 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -309,8 +309,8 @@ llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { VlaSizePair VlaSize = getVLASize(VAT); Ty = VlaSize.Type; - Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) - : VlaSize.NumElts; + Size = + Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; } SizeInChars = C.getTypeSizeInChars(Ty); if (SizeInChars.isZero()) @@ -450,7 +450,8 @@ static llvm::Function *emitOutlinedFunctionPrologue( Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), SourceLocation(), DeclarationName(), FunctionTy, Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, - /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); + /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, + /*hasWrittenPrototype=*/false); } for (const FieldDecl *FD : RD->fields()) { QualType ArgType = FD->getType(); @@ -497,9 +498,8 @@ static llvm::Function *emitOutlinedFunctionPrologue( : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } - Args.append( - std::next(CD->param_begin(), CD->getContextParamPosition() + 1), - CD->param_end()); + Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); TargetArgs.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); @@ -518,8 +518,10 @@ static llvm::Function *emitOutlinedFunctionPrologue( F->setDoesNotRecurse(); // Always inline the outlined function if optimizations are enabled. - if (CGM.getCodeGenOpts().OptimizationLevel != 0) + if (CGM.getCodeGenOpts().OptimizationLevel != 0) { + F->removeFnAttr(llvm::Attribute::NoInline); F->addFnAttr(llvm::Attribute::AlwaysInline); + } // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, @@ -671,9 +673,9 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, if (EI != VLASizes.end()) { CallArg = EI->second.second; } else { - LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), - Arg->getType(), - AlignmentSource::Decl); + LValue LV = + WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), + Arg->getType(), AlignmentSource::Decl); CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); } } @@ -718,29 +720,29 @@ void CodeGenFunction::EmitOMPAggregateAssign( CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); llvm::PHINode *SrcElementPHI = - Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); SrcElementPHI->addIncoming(SrcBegin, EntryBB); Address SrcElementCurrent = Address(SrcElementPHI, SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - llvm::PHINode *DestElementPHI = - Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + llvm::PHINode *DestElementPHI = Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); DestElementPHI->addIncoming(DestBegin, EntryBB); Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CopyGen(DestElementCurrent, SrcElementCurrent); // Shift the address forward by one element. - llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( - DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, - "omp.arraycpy.dest.element"); - llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( - SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, - "omp.arraycpy.src.element"); + llvm::Value *DestElementNext = + Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, + /*Idx0=*/1, "omp.arraycpy.dest.element"); + llvm::Value *SrcElementNext = + Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, + /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. llvm::Value *Done = Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); @@ -1003,9 +1005,9 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { LocalDeclMap.erase(VD); } else { MasterAddr = - Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) - : CGM.GetAddrOfGlobal(VD), - getContext().getDeclAlign(VD)); + Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) + : CGM.GetAddrOfGlobal(VD), + getContext().getDeclAlign(VD)); } // Get the address of the threadprivate variable. Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); @@ -1076,7 +1078,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/ - CapturedStmtInfo->lookup(OrigVD) != nullptr, + CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); return EmitLValue(&DRE).getAddress(*this); }); @@ -1085,19 +1087,19 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // for 'firstprivate' clause. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, - OrigVD]() { - if (C->getKind() == OMPC_LASTPRIVATE_conditional) { - Address VDAddr = - CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, - OrigVD); - setAddrOfLocalVar(VD, VDAddr); - return VDAddr; - } - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); - }); + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() { + if (C->getKind() == OMPC_LASTPRIVATE_conditional) { + Address VDAddr = + CGM.getOpenMPRuntime().emitLastprivateConditionalInit( + *this, OrigVD); + setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); assert(IsRegistered && "lastprivate var already registered as private"); (void)IsRegistered; @@ -1292,14 +1294,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit( OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); - PrivateScope.addPrivate( - RHSVD, [this, PrivateVD, RHSVD, IsArray]() { - return IsArray - ? Builder.CreateElementBitCast( + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() { + return IsArray ? Builder.CreateElementBitCast( GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), "rhs.begin") : GetAddrOfLocalVar(PrivateVD); - }); + }); } ++ILHS; ++IRHS; @@ -1786,6 +1786,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { + EmitStmt(S.getIfStmt()); +} + namespace { /// RAII to handle scopes for loop transformation directives. class OMPTransformDirectiveScopeRAII { @@ -1827,9 +1831,7 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, return; } if (SimplifiedS == NextLoop) { - if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) - SimplifiedS = Dir->getTransformedStmt(); - if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS)) + if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) SimplifiedS = Dir->getTransformedStmt(); if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) SimplifiedS = CanonLoop->getLoopStmt(); @@ -1953,11 +1955,27 @@ llvm::CanonicalLoopInfo * CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); + // The caller is processing the loop-associated directive processing the \p + // Depth loops nested in \p S. Put the previous pending loop-associated + // directive to the stack. If the current loop-associated directive is a loop + // transformation directive, it will push its generated loops onto the stack + // such that together with the loops left here they form the combined loop + // nest for the parent loop-associated directive. + int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; + ExpectedOMPLoopDepth = Depth; + EmitStmt(S); assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); // The last added loop is the outermost one. - return OMPLoopNestStack.back(); + llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); + + // Pop the \p Depth loops requested by the call from that stack and restore + // the previous context. + OMPLoopNestStack.set_size(OMPLoopNestStack.size() - Depth); + ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; + + return Result; } void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { @@ -2113,9 +2131,10 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { CapturedStmtInfo->lookup(OrigVD) != nullptr, VD->getInit()->getType(), VK_LValue, VD->getInit()->getExprLoc()); - EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), - VD->getType()), - /*capturedByInit=*/false); + EmitExprAsInit( + &DRE, VD, + MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), + /*capturedByInit=*/false); EmitAutoVarCleanups(Emission); } else { EmitVarDecl(*VD); @@ -2218,9 +2237,8 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); EmitAutoVarCleanups(VarEmission); LocalDeclMap.erase(PrivateVD); - (void)LoopScope.addPrivate(VD, [&VarEmission]() { - return VarEmission.getAllocatedAddress(); - }); + (void)LoopScope.addPrivate( + VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); }); if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || VD->hasGlobalStorage()) { (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { @@ -2272,7 +2290,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, // Create temp loop control variables with their init values to support // non-rectangular loops. CodeGenFunction::OMPMapVars PreCondVars; - for (const Expr * E: S.dependent_counters()) { + for (const Expr *E : S.dependent_counters()) { if (!E) continue; assert(!E->getType().getNonReferenceType()->isRecordType() && @@ -2587,6 +2605,46 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { } void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { + bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; + + if (UseOMPIRBuilder) { + auto DL = SourceLocToDebugLoc(S.getBeginLoc()); + const Stmt *Inner = S.getRawStmt(); + + // Consume nested loop. Clear the entire remaining loop stack because a + // fully unrolled loop is non-transformable. For partial unrolling the + // generated outer loop is pushed back to the stack. + llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); + OMPLoopNestStack.clear(); + + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; + llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; + + if (S.hasClausesOfKind<OMPFullClause>()) { + assert(ExpectedOMPLoopDepth == 0); + OMPBuilder.unrollLoopFull(DL, CLI); + } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { + uint64_t Factor = 0; + if (Expr *FactorExpr = PartialClause->getFactor()) { + Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); + assert(Factor >= 1 && "Only positive factors are valid"); + } + OMPBuilder.unrollLoopPartial(DL, CLI, Factor, + NeedsUnrolledCLI ? &UnrolledCLI : nullptr); + } else { + OMPBuilder.unrollLoopHeuristic(DL, CLI); + } + + assert((!NeedsUnrolledCLI || UnrolledCLI) && + "NeedsUnrolledCLI implies UnrolledCLI to be set"); + if (UnrolledCLI) + OMPLoopNestStack.push_back(UnrolledCLI); + + return; + } + // This function is only called if the unrolled loop is not consumed by any // other loop-associated construct. Such a loop-associated construct will have // used the transformed AST. @@ -2732,12 +2790,10 @@ void CodeGenFunction::EmitOMPForOuterLoop( CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). - const bool DynamicOrOrdered = - Ordered || RT.isDynamic(ScheduleKind.Schedule); + const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); - assert((Ordered || - !RT.isStaticNonchunked(ScheduleKind.Schedule, - LoopArgs.Chunk != nullptr)) && + assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, + LoopArgs.Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -3057,15 +3113,15 @@ void CodeGenFunction::EmitOMPTargetSimdDirective( } namespace { - struct ScheduleKindModifiersTy { - OpenMPScheduleClauseKind Kind; - OpenMPScheduleClauseModifier M1; - OpenMPScheduleClauseModifier M2; - ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, - OpenMPScheduleClauseModifier M1, - OpenMPScheduleClauseModifier M2) - : Kind(Kind), M1(M1), M2(M2) {} - }; +struct ScheduleKindModifiersTy { + OpenMPScheduleClauseKind Kind; + OpenMPScheduleClauseModifier M1; + OpenMPScheduleClauseModifier M2; + ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, + OpenMPScheduleClauseModifier M1, + OpenMPScheduleClauseModifier M2) + : Kind(Kind), M1(M1), M2(M2) {} +}; } // namespace bool CodeGenFunction::EmitOMPWorksharingLoop( @@ -3185,8 +3241,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, - /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + bool StaticChunkedOne = + RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && + HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); bool IsMonotonic = Ordered || @@ -3620,7 +3678,8 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { CGM.getOpenMPRuntime().getOMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier); + OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI, + AllocaIP, NeedsBarrier); return; } @@ -4440,7 +4499,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/ - CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); Scope.addPrivate(Pair.first, [&CGF, &DRE]() { @@ -4661,7 +4720,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( [&InputInfo]() { return InputInfo.SizesArray; }); // If there is no user-defined mapper, the mapper array will be nullptr. In // this case, we don't need to privatize it. - if (!dyn_cast_or_null<llvm::ConstantPointerNull>( + if (!isa_and_nonnull<llvm::ConstantPointerNull>( InputInfo.MappersArray.getPointer())) { MVD = createImplicitFirstprivateForType( getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); @@ -4786,7 +4845,14 @@ void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { } void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { - CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); + OMPTaskDataTy Data; + // Build list of dependences + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); + DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); + } + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); } void CodeGenFunction::EmitOMPTaskgroupDirective( @@ -5168,8 +5234,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // iteration space is divided into chunks that are approximately equal // in size, and at most one chunk is distributed to each team of the // league. The size of the chunks is unspecified in this case. - bool StaticChunked = RT.isStaticChunked( - ScheduleKind, /* Chunked */ Chunk != nullptr) && + bool StaticChunked = + RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) || @@ -5307,12 +5373,78 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, CGF.CapturedStmtInfo = &CapStmtInfo; llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); Fn->setDoesNotRecurse(); - if (CGM.getCodeGenOpts().OptimizationLevel != 0) - Fn->addFnAttr(llvm::Attribute::AlwaysInline); return Fn; } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + if (S.hasClausesOfKind<OMPDependClause>()) { + // The ordered directive with depend clause. + assert(!S.hasAssociatedStmt() && + "No associated statement must be in ordered depend construct."); + InsertPointTy AllocaIP(AllocaInsertPt->getParent(), + AllocaInsertPt->getIterator()); + for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) { + unsigned NumLoops = DC->getNumLoops(); + QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/64, /*Signed=*/1); + llvm::SmallVector<llvm::Value *> StoreValues; + for (unsigned I = 0; I < NumLoops; I++) { + const Expr *CounterVal = DC->getLoopData(I); + assert(CounterVal); + llvm::Value *StoreValue = EmitScalarConversion( + EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + StoreValues.emplace_back(StoreValue); + } + bool IsDependSource = false; + if (DC->getDependencyKind() == OMPC_DEPEND_source) + IsDependSource = true; + Builder.restoreIP(OMPBuilder.createOrderedDepend( + Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr", + IsDependSource)); + } + } else { + // The ordered directive with threads or simd clause, or without clause. + // Without clause, it behaves as if the threads clause is specified. + const auto *C = S.getSingleClause<OMPSIMDClause>(); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + if (C) { + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + GenerateOpenMPCapturedVars(*CS, CapturedVars); + llvm::Function *OutlinedFn = + emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); + assert(S.getBeginLoc().isValid() && + "Outlined function call location must be valid."); + ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); + OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB, + OutlinedFn, CapturedVars); + } else { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, + FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(), + CodeGenIP, FiniBB); + } + }; + + OMPLexicalScope Scope(*this, S, OMPD_unknown); + Builder.restoreIP( + OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); + } + return; + } + if (S.hasClausesOfKind<OMPDependClause>()) { assert(!S.hasAssociatedStmt() && "No associated statement must be in ordered depend construct."); @@ -5863,6 +5995,12 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_novariants: case OMPC_nocontext: case OMPC_filter: + case OMPC_when: + case OMPC_adjust_args: + case OMPC_append_args: + case OMPC_memory_order: + case OMPC_bind: + case OMPC_align: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -5940,8 +6078,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, return; } - auto LPCRegion = - CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); + auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -6471,7 +6608,8 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( auto OrigVarIt = C.varlist_begin(); auto InitIt = C.inits().begin(); for (const Expr *PvtVarIt : C.private_copies()) { - const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); + const auto *OrigVD = + cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); @@ -6494,31 +6632,30 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( if (InitAddrIt == CaptureDeviceAddrMap.end()) continue; - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, - InitAddrIt, InitVD, - PvtVD]() { - // Initialize the temporary initialization variable with the address we - // get from the runtime library. We have to cast the source address - // because it is always a void *. References are materialized in the - // privatization scope, so the initialization here disregards the fact - // the original variable is a reference. - QualType AddrQTy = - getContext().getPointerType(OrigVD->getType().getNonReferenceType()); - llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); - Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); - setAddrOfLocalVar(InitVD, InitAddr); - - // Emit private declaration, it will be initialized by the value we - // declaration we just added to the local declarations map. - EmitDecl(*PvtVD); - - // The initialization variables reached its purpose in the emission - // of the previous declaration, so we don't need it anymore. - LocalDeclMap.erase(InitVD); - - // Return the address of the private variable. - return GetAddrOfLocalVar(PvtVD); - }); + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, OrigVD, InitAddrIt, InitVD, PvtVD]() { + // Initialize the temporary initialization variable with the address + // we get from the runtime library. We have to cast the source address + // because it is always a void *. References are materialized in the + // privatization scope, so the initialization here disregards the fact + // the original variable is a reference. + QualType AddrQTy = getContext().getPointerType( + OrigVD->getType().getNonReferenceType()); + llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); + Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); + setAddrOfLocalVar(InitVD, InitAddr); + + // Emit private declaration, it will be initialized by the value we + // declaration we just added to the local declarations map. + EmitDecl(*PvtVD); + + // The initialization variables reached its purpose in the emission + // of the previous declaration, so we don't need it anymore. + LocalDeclMap.erase(InitVD); + + // Return the address of the private variable. + return GetAddrOfLocalVar(PvtVD); + }); assert(IsRegistered && "firstprivate var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; @@ -6879,11 +7016,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { // TODO: Check if we should emit tied or untied task. Data.Tied = true; // Set scheduling for taskloop - if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { + if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { // grainsize clause Data.Schedule.setInt(/*IntVal=*/false); Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); - } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { + } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { // num_tasks clause Data.Schedule.setInt(/*IntVal=*/true); Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); @@ -7111,6 +7248,16 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } +void CodeGenFunction::EmitOMPGenericLoopDirective( + const OMPGenericLoopDirective &S) { + // Unimplemented, just inline the underlying statement for now. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); +} + void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp index 9eb650814238..482499da1b0f 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp @@ -201,7 +201,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, Address ThisPtr(&*AI, CGM.getClassPointerAlignment(MD->getParent())); llvm::BasicBlock *EntryBB = &Fn->front(); llvm::BasicBlock::iterator ThisStore = - std::find_if(EntryBB->begin(), EntryBB->end(), [&](llvm::Instruction &I) { + llvm::find_if(*EntryBB, [&](llvm::Instruction &I) { return isa<llvm::StoreInst>(I) && I.getOperand(0) == ThisPtr.getPointer(); }); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp index b30bd11edbad..52c54d3c7a72 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp @@ -25,8 +25,10 @@ #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/Hashing.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" @@ -50,6 +52,8 @@ using namespace clang; using namespace llvm; +#define DEBUG_TYPE "codegenaction" + namespace clang { class BackendConsumer; class ClangDiagnosticHandler final : public DiagnosticHandler { @@ -125,6 +129,17 @@ namespace clang { SmallVector<LinkModule, 4> LinkModules; + // A map from mangled names to their function's source location, used for + // backend diagnostics as the Clang AST may be unavailable. We actually use + // the mangled name's hash as the key because mangled names can be very + // long and take up lots of space. Using a hash can cause name collision, + // but that is rare and the consequences are pointing to a wrong source + // location which is not severe. This is a vector instead of an actual map + // because we optimize for time building this map rather than time + // retrieving an entry, as backend diagnostics are uncommon. + std::vector<std::pair<llvm::hash_code, FullSourceLoc>> + ManglingFullSourceLocs; + // This is here so that the diagnostic printer knows the module a diagnostic // refers to. llvm::Module *CurLinkModule = nullptr; @@ -160,7 +175,7 @@ namespace clang { const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, const TargetOptions &TargetOpts, - const LangOptions &LangOpts, + const LangOptions &LangOpts, llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules, LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), @@ -170,7 +185,7 @@ namespace clang { LLVMIRGenerationRefCount(0), Gen(CreateLLVMCodeGen(Diags, "", HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)) { + LinkModules(std::move(LinkModules)), CurLinkModule(Module) { TimerIsEnabled = CodeGenOpts.TimePasses; llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; @@ -329,6 +344,27 @@ namespace clang { if (LinkInModules()) return; + for (auto &F : getModule()->functions()) { + if (const Decl *FD = Gen->GetDeclForMangledName(F.getName())) { + auto Loc = FD->getASTContext().getFullLoc(FD->getLocation()); + // TODO: use a fast content hash when available. + auto NameHash = llvm::hash_value(F.getName()); + ManglingFullSourceLocs.push_back(std::make_pair(NameHash, Loc)); + } + } + + if (CodeGenOpts.ClearASTBeforeBackend) { + LLVM_DEBUG(llvm::dbgs() << "Clearing AST...\n"); + // Access to the AST is no longer available after this. + // Other things that the ASTContext manages are still available, e.g. + // the SourceManager. It'd be nice if we could separate out all the + // things in ASTContext used after this point and null out the + // ASTContext, but too many various parts of the ASTContext are still + // used in various parts. + C.cleanup(); + C.getAllocator().Reset(); + } + EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, @@ -375,6 +411,8 @@ namespace clang { bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const; + Optional<FullSourceLoc> getFunctionSourceLocation(const Function &F) const; + void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); /// Specialized handler for InlineAsm diagnostic. /// \return True if the diagnostic has been successfully reported, false @@ -401,6 +439,7 @@ namespace clang { const llvm::OptimizationRemarkAnalysisAliasing &D); void OptimizationFailureHandler( const llvm::DiagnosticInfoOptimizationFailure &D); + void DontCallDiagHandler(const DiagnosticInfoDontCall &D); }; void BackendConsumer::anchor() {} @@ -567,17 +606,16 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { // We do not know how to format other severities. return false; - if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) { - // FIXME: Shouldn't need to truncate to uint32_t - Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()), - diag::warn_fe_frame_larger_than) - << static_cast<uint32_t>(D.getStackSize()) - << static_cast<uint32_t>(D.getStackLimit()) - << Decl::castToDeclContext(ND); - return true; - } + auto Loc = getFunctionSourceLocation(D.getFunction()); + if (!Loc) + return false; - return false; + // FIXME: Shouldn't need to truncate to uint32_t + Diags.Report(*Loc, diag::warn_fe_frame_larger_than) + << static_cast<uint32_t>(D.getStackSize()) + << static_cast<uint32_t>(D.getStackLimit()) + << llvm::demangle(D.getFunction().getName().str()); + return true; } const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( @@ -606,9 +644,10 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( // function definition. We use the definition's right brace to differentiate // from diagnostics that genuinely relate to the function itself. FullSourceLoc Loc(DILoc, SourceMgr); - if (Loc.isInvalid()) - if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName())) - Loc = FD->getASTContext().getFullLoc(FD->getLocation()); + if (Loc.isInvalid()) { + if (auto MaybeLoc = getFunctionSourceLocation(D.getFunction())) + Loc = *MaybeLoc; + } if (DILoc.isInvalid() && D.isLocationAvailable()) // If we were not able to translate the file:line:col information @@ -621,6 +660,16 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( return Loc; } +Optional<FullSourceLoc> +BackendConsumer::getFunctionSourceLocation(const Function &F) const { + auto Hash = llvm::hash_value(F.getName()); + for (const auto &Pair : ManglingFullSourceLocs) { + if (Pair.first == Hash) + return Pair.second; + } + return Optional<FullSourceLoc>(); +} + void BackendConsumer::UnsupportedDiagHandler( const llvm::DiagnosticInfoUnsupported &D) { // We only support warnings or errors. @@ -758,6 +807,21 @@ void BackendConsumer::OptimizationFailureHandler( EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure); } +void BackendConsumer::DontCallDiagHandler(const DiagnosticInfoDontCall &D) { + SourceLocation LocCookie = + SourceLocation::getFromRawEncoding(D.getLocCookie()); + + // FIXME: we can't yet diagnose indirect calls. When/if we can, we + // should instead assert that LocCookie.isValid(). + if (!LocCookie.isValid()) + return; + + Diags.Report(LocCookie, D.getSeverity() == DiagnosticSeverity::DS_Error + ? diag::err_fe_backend_error_attr + : diag::warn_fe_backend_warning_attr) + << llvm::demangle(D.getFunctionName().str()) << D.getNote(); +} + /// This function is invoked when the backend needs /// to report something to the user. void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { @@ -779,11 +843,7 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { ComputeDiagID(Severity, backend_frame_larger_than, DiagID); break; case DK_Linker: - assert(CurLinkModule); - // FIXME: stop eating the warnings and notes. - if (Severity != DS_Error) - return; - DiagID = diag::err_fe_cannot_link_module; + ComputeDiagID(Severity, linking_module, DiagID); break; case llvm::DK_OptimizationRemark: // Optimization remarks are always handled completely by this @@ -833,6 +893,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { case llvm::DK_Unsupported: UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI)); return; + case llvm::DK_DontCall: + DontCallDiagHandler(cast<DiagnosticInfoDontCall>(DI)); + return; default: // Plugin IDs are not bound to any value as they are set dynamically. ComputeDiagRemarkID(Severity, backend_plugin, DiagID); @@ -845,9 +908,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { DI.print(DP); } - if (DiagID == diag::err_fe_cannot_link_module) { - Diags.Report(diag::err_fe_cannot_link_module) - << CurLinkModule->getModuleIdentifier() << MsgStorage; + if (DI.getKind() == DK_Linker) { + assert(CurLinkModule && "CurLinkModule must be set for linker diagnostics"); + Diags.Report(DiagID) << CurLinkModule->getModuleIdentifier() << MsgStorage; return; } @@ -1088,7 +1151,7 @@ void CodeGenAction::ExecuteAction() { // BackendConsumer. BackendConsumer Result(BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), - CI.getTargetOpts(), CI.getLangOpts(), + CI.getTargetOpts(), CI.getLangOpts(), TheModule.get(), std::move(LinkModules), *VMContext, nullptr); // PR44896: Force DiscardValueNames as false. DiscardValueNames cannot be // true here because the valued names are needed for reading textual IR. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp index a2384456ea94..d87cf2d49720 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/CRC.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" + using namespace clang; using namespace CodeGen; @@ -78,7 +79,6 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) EHStack.setCGF(this); SetFastMathFlags(CurFPFeatures); - SetFPModel(); } CodeGenFunction::~CodeGenFunction() { @@ -109,17 +109,6 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) { llvm_unreachable("Unsupported FP Exception Behavior"); } -void CodeGenFunction::SetFPModel() { - llvm::RoundingMode RM = getLangOpts().getFPRoundingMode(); - auto fpExceptionBehavior = ToConstrainedExceptMD( - getLangOpts().getFPExceptionMode()); - - Builder.setDefaultConstrainedRounding(RM); - Builder.setDefaultConstrainedExcept(fpExceptionBehavior); - Builder.setIsFPConstrained(fpExceptionBehavior != llvm::fp::ebIgnore || - RM != llvm::RoundingMode::NearestTiesToEven); -} - void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) { llvm::FastMathFlags FMF; FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate()); @@ -393,6 +382,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { "__cyg_profile_func_exit"); } + if (ShouldSkipSanitizerInstrumentation()) + CurFn->addFnAttr(llvm::Attribute::DisableSanitizerInstrumentation); + // Emit debug descriptor for function end. if (CGDebugInfo *DI = getDebugInfo()) DI->EmitFunctionEnd(Builder, CurFn); @@ -432,6 +424,14 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { AllocaInsertPt = nullptr; Ptr->eraseFromParent(); + // PostAllocaInsertPt, if created, was lazily created when it was required, + // remove it now since it was just created for our own convenience. + if (PostAllocaInsertPt) { + llvm::Instruction *PostPtr = PostAllocaInsertPt; + PostAllocaInsertPt = nullptr; + PostPtr->eraseFromParent(); + } + // If someone took the address of a label but never did an indirect goto, we // made a zero entry PHI node, which is illegal, zap it now. if (IndirectBranch) { @@ -496,11 +496,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // function. CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); - // Add vscale attribute if appropriate. - if (getLangOpts().ArmSveVectorBits) { - unsigned VScale = getLangOpts().ArmSveVectorBits / 128; - CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), - VScale, VScale)); + // Add vscale_range attribute if appropriate. + Optional<std::pair<unsigned, unsigned>> VScaleRange = + getContext().getTargetInfo().getVScaleRange(getLangOpts()); + if (VScaleRange) { + CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( + getLLVMContext(), VScaleRange.getValue().first, + VScaleRange.getValue().second)); } // If we generated an unreachable return block, delete it now. @@ -529,6 +531,12 @@ bool CodeGenFunction::ShouldInstrumentFunction() { return true; } +bool CodeGenFunction::ShouldSkipSanitizerInstrumentation() { + if (!CurFuncDecl) + return false; + return CurFuncDecl->hasAttr<DisableSanitizerInstrumentationAttr>(); +} + /// ShouldXRayInstrument - Return true if the current function should be /// instrumented with XRay nop sleds. bool CodeGenFunction::ShouldXRayInstrumentFunction() const { @@ -947,10 +955,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>()))) Fn->addFnAttr(llvm::Attribute::NoRecurse); - if (FD) { - Builder.setIsFPConstrained(FD->hasAttr<StrictFPAttr>()); - if (FD->hasAttr<StrictFPAttr>()) - Fn->addFnAttr(llvm::Attribute::StrictFP); + llvm::RoundingMode RM = getLangOpts().getFPRoundingMode(); + llvm::fp::ExceptionBehavior FPExceptionBehavior = + ToConstrainedExceptMD(getLangOpts().getFPExceptionMode()); + Builder.setDefaultConstrainedRounding(RM); + Builder.setDefaultConstrainedExcept(FPExceptionBehavior); + if ((FD && (FD->UsesFPIntrin() || FD->hasAttr<StrictFPAttr>())) || + (!FD && (FPExceptionBehavior != llvm::fp::ebIgnore || + RM != llvm::RoundingMode::NearestTiesToEven))) { + Builder.setIsFPConstrained(true); + Fn->addFnAttr(llvm::Attribute::StrictFP); } // If a custom alignment is used, force realigning to this alignment on @@ -975,7 +989,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // precise source location of the checked return statement. if (requiresReturnValueCheck()) { ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr"); - InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy)); + Builder.CreateStore(llvm::ConstantPointerNull::get(Int8PtrTy), + ReturnLocation); } // Emit subprogram debug descriptor. @@ -983,16 +998,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Reconstruct the type from the argument list so that implicit parameters, // such as 'this' and 'vtt', show up in the debug info. Preserve the calling // convention. - CallingConv CC = CallingConv::CC_C; - if (FD) - if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>()) - CC = SrcFnTy->getCallConv(); - SmallVector<QualType, 16> ArgTypes; - for (const VarDecl *VD : Args) - ArgTypes.push_back(VD->getType()); - QualType FnType = getContext().getFunctionType( - RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); - DI->emitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, CurFuncIsThunk); + DI->emitFunctionStart(GD, Loc, StartLoc, + DI->getFunctionType(FD, RetTy, Args), CurFn, + CurFuncIsThunk); } if (ShouldInstrumentFunction()) { @@ -1044,7 +1052,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Fn->addFnAttr("packed-stack"); } - if (CGM.getCodeGenOpts().WarnStackSize != UINT_MAX) + if (CGM.getCodeGenOpts().WarnStackSize != UINT_MAX && + !CGM.getDiags().isIgnored(diag::warn_fe_backend_frame_larger_than, Loc)) Fn->addFnAttr("warn-stack-size", std::to_string(CGM.getCodeGenOpts().WarnStackSize)); @@ -1295,6 +1304,45 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, FunctionArgList Args; QualType ResTy = BuildFunctionArgList(GD, Args); + // When generating code for a builtin with an inline declaration, use a + // mangled name to hold the actual body, while keeping an external definition + // in case the function pointer is referenced somewhere. + if (Fn) { + if (FD->isInlineBuiltinDeclaration()) { + std::string FDInlineName = (Fn->getName() + ".inline").str(); + llvm::Module *M = Fn->getParent(); + llvm::Function *Clone = M->getFunction(FDInlineName); + if (!Clone) { + Clone = llvm::Function::Create(Fn->getFunctionType(), + llvm::GlobalValue::InternalLinkage, + Fn->getAddressSpace(), FDInlineName, M); + Clone->addFnAttr(llvm::Attribute::AlwaysInline); + } + Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); + Fn = Clone; + } + + // Detect the unusual situation where an inline version is shadowed by a + // non-inline version. In that case we should pick the external one + // everywhere. That's GCC behavior too. Unfortunately, I cannot find a way + // to detect that situation before we reach codegen, so do some late + // replacement. + else { + for (const FunctionDecl *PD = FD->getPreviousDecl(); PD; + PD = PD->getPreviousDecl()) { + if (LLVM_UNLIKELY(PD->isInlineBuiltinDeclaration())) { + std::string FDInlineName = (Fn->getName() + ".inline").str(); + llvm::Module *M = Fn->getParent(); + if (llvm::Function *Clone = M->getFunction(FDInlineName)) { + Clone->replaceAllUsesWith(Fn); + Clone->eraseFromParent(); + } + break; + } + } + } + } + // Check if we should generate debug info for this function. if (FD->hasAttr<NoDebugAttr>()) { // Clear non-distinct debug info that was possibly attached to the function @@ -2399,15 +2447,19 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute"); llvm::Value *V = Addr.getPointer(); llvm::Type *VTy = V->getType(); - llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, - CGM.Int8PtrTy); + auto *PTy = dyn_cast<llvm::PointerType>(VTy); + unsigned AS = PTy ? PTy->getAddressSpace() : 0; + llvm::PointerType *IntrinTy = + llvm::PointerType::getWithSamePointeeType(CGM.Int8PtrTy, AS); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, IntrinTy); for (const auto *I : D->specific_attrs<AnnotateAttr>()) { // FIXME Always emit the cast inst so we can differentiate between // annotation on the first field of a struct and annotation on the struct // itself. - if (VTy != CGM.Int8PtrTy) - V = Builder.CreateBitCast(V, CGM.Int8PtrTy); + if (VTy != IntrinTy) + V = Builder.CreateBitCast(V, IntrinTy); V = EmitAnnotationCall(F, V, I->getAnnotation(), D->getLocation(), I); V = Builder.CreateBitCast(V, VTy); } @@ -2478,8 +2530,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, // Return if the builtin doesn't have any required features. if (FeatureList.empty()) return; - assert(FeatureList.find(' ') == StringRef::npos && - "Space in feature list"); + assert(!FeatureList.contains(' ') && "Space in feature list"); TargetFeatures TF(CallerFeatureMap); if (!TF.hasRequiredFeatures(FeatureList)) CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h index 4e087ce51e37..ff5b6634da1c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h @@ -291,6 +291,10 @@ public: /// nest would extend. SmallVector<llvm::CanonicalLoopInfo *, 4> OMPLoopNestStack; + /// Number of nested loop to be consumed by the last surrounding + /// loop-associated directive. + int ExpectedOMPLoopDepth = 0; + // CodeGen lambda for loops and support for ordered clause typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &, JumpDest)> @@ -375,6 +379,34 @@ public: /// we prefer to insert allocas. llvm::AssertingVH<llvm::Instruction> AllocaInsertPt; +private: + /// PostAllocaInsertPt - This is a place in the prologue where code can be + /// inserted that will be dominated by all the static allocas. This helps + /// achieve two things: + /// 1. Contiguity of all static allocas (within the prologue) is maintained. + /// 2. All other prologue code (which are dominated by static allocas) do + /// appear in the source order immediately after all static allocas. + /// + /// PostAllocaInsertPt will be lazily created when it is *really* required. + llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr; + +public: + /// Return PostAllocaInsertPt. If it is not yet created, then insert it + /// immediately after AllocaInsertPt. + llvm::Instruction *getPostAllocaInsertPoint() { + if (!PostAllocaInsertPt) { + assert(AllocaInsertPt && + "Expected static alloca insertion point at function prologue"); + assert(AllocaInsertPt->getParent()->isEntryBlock() && + "EBB should be entry block of the current code gen function"); + PostAllocaInsertPt = AllocaInsertPt->clone(); + PostAllocaInsertPt->setName("postallocapt"); + PostAllocaInsertPt->insertAfter(AllocaInsertPt); + } + + return PostAllocaInsertPt; + } + /// API for captured statement code generation. class CGCapturedStmtInfo { public: @@ -467,7 +499,7 @@ public: AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {} AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {} bool hasFunctionDecl() const { - return dyn_cast_or_null<FunctionDecl>(CalleeDecl); + return isa_and_nonnull<FunctionDecl>(CalleeDecl); } const Decl *getDecl() const { return CalleeDecl; } unsigned getNumParams() const { @@ -1775,6 +1807,24 @@ public: CGF.Builder.CreateBr(&FiniBB); } + static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB, llvm::Function *Fn, + ArrayRef<llvm::Value *> Args) { + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator()) + CodeGenIPBBTI->eraseFromParent(); + + CGF.Builder.SetInsertPoint(CodeGenIPBB); + + if (Fn->doesNotThrow()) + CGF.EmitNounwindRuntimeCall(Fn, Args); + else + CGF.EmitRuntimeCall(Fn, Args); + + if (CGF.Builder.saveIP().isSet()) + CGF.Builder.CreateBr(&FiniBB); + } + /// RAII for preserving necessary info during Outlined region body codegen. class OutlinedRegionBodyRAII { @@ -2286,6 +2336,10 @@ public: /// instrumented with __cyg_profile_func_* calls bool ShouldInstrumentFunction(); + /// ShouldSkipSanitizerInstrumentation - Return true if the current function + /// should not be instrumented with sanitizers. + bool ShouldSkipSanitizerInstrumentation(); + /// ShouldXRayInstrument - Return true if the current function should be /// instrumented with XRay nop sleds. bool ShouldXRayInstrumentFunction() const; @@ -2519,15 +2573,6 @@ public: Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp"); - /// InitTempAlloca - Provide an initial value for the given alloca which - /// will be observable at all locations in the function. - /// - /// The address should be something that was returned from one of - /// the CreateTempAlloca or CreateMemTemp routines, and the - /// initializer must be valid in the entry block (i.e. it must - /// either be a constant or an argument value). - void InitTempAlloca(Address Alloca, llvm::Value *Value); - /// CreateIRTemp - Create a temporary IR object of the given type, with /// appropriate alignment. This routine should only be used when an temporary /// value needs to be stored into an alloca (for example, to avoid explicit @@ -3438,6 +3483,7 @@ public: const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo); + void EmitOMPMetaDirective(const OMPMetaDirective &S); void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPTileDirective(const OMPTileDirective &S); @@ -3511,6 +3557,7 @@ public: const OMPTargetTeamsDistributeParallelForSimdDirective &S); void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); + void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, @@ -4051,10 +4098,9 @@ public: RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E, ReturnValueSlot ReturnValue); - RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue); - RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue); + RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E); + RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E); + RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E); RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); @@ -4126,30 +4172,30 @@ public: /// SVEBuiltinMemEltTy - Returns the memory element type for this memory /// access builtin. Only required if it can't be inferred from the base /// pointer operand. - llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags); + llvm::Type *SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags); - SmallVector<llvm::Type *, 2> getSVEOverloadTypes(SVETypeFlags TypeFlags, - llvm::Type *ReturnType, - ArrayRef<llvm::Value *> Ops); - llvm::Type *getEltType(SVETypeFlags TypeFlags); + SmallVector<llvm::Type *, 2> + getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, + ArrayRef<llvm::Value *> Ops); + llvm::Type *getEltType(const SVETypeFlags &TypeFlags); llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags); - llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags); - llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags); + llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags); + llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags); llvm::Value *EmitSVEDupX(llvm::Value *Scalar); llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty); llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty); - llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); - llvm::Value *EmitSVEMovl(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef<llvm::Value *> Ops, unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); - llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); - llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, @@ -4158,15 +4204,16 @@ public: llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); - llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); - llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); - llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags, - SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); - llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEStructLoad(const SVETypeFlags &TypeFlags, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); @@ -4588,9 +4635,6 @@ public: /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); - /// SetFPModel - Control floating point behavior via fp-model settings. - void SetFPModel(); - /// Set the codegen fast-math flags. void SetFastMathFlags(FPOptions FPFeatures); @@ -4726,8 +4770,6 @@ public: void EmitMultiVersionResolver(llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options); - static uint64_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); - private: QualType getVarArgType(const Expr *Arg); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp index 49a1396b58e3..59f3e0270571 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp @@ -19,8 +19,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeAMDGCN.h" -#include "CGOpenMPRuntimeNVPTX.h" +#include "CGOpenMPRuntimeGPU.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "ConstantEmitter.h" @@ -63,6 +62,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/X86TargetParser.h" using namespace clang; using namespace CodeGen; @@ -130,8 +130,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, C.getTargetInfo().getMaxPointerWidth()); Int8PtrTy = Int8Ty->getPointerTo(0); Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); - AllocaInt8PtrTy = Int8Ty->getPointerTo( - M.getDataLayout().getAllocaAddrSpace()); + const llvm::DataLayout &DL = M.getDataLayout(); + AllocaInt8PtrTy = Int8Ty->getPointerTo(DL.getAllocaAddrSpace()); + GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace()); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); @@ -242,14 +243,10 @@ void CodeGenModule::createOpenMPRuntime() { switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - assert(getLangOpts().OpenMPIsDevice && - "OpenMP NVPTX is only prepared to deal with device code."); - OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); - break; case llvm::Triple::amdgcn: assert(getLangOpts().OpenMPIsDevice && - "OpenMP AMDGCN is only prepared to deal with device code."); - OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); + "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this)); break; default: if (LangOpts.OpenMPSimd) @@ -315,22 +312,58 @@ void CodeGenModule::applyGlobalValReplacements() { // This is only used in aliases that we created and we know they have a // linear structure. -static const llvm::GlobalObject *getAliasedGlobal( - const llvm::GlobalIndirectSymbol &GIS) { - llvm::SmallPtrSet<const llvm::GlobalIndirectSymbol*, 4> Visited; - const llvm::Constant *C = &GIS; - for (;;) { - C = C->stripPointerCasts(); - if (auto *GO = dyn_cast<llvm::GlobalObject>(C)) - return GO; - // stripPointerCasts will not walk over weak aliases. - auto *GIS2 = dyn_cast<llvm::GlobalIndirectSymbol>(C); - if (!GIS2) - return nullptr; - if (!Visited.insert(GIS2).second) - return nullptr; - C = GIS2->getIndirectSymbol(); +static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) { + const llvm::Constant *C; + if (auto *GA = dyn_cast<llvm::GlobalAlias>(GV)) + C = GA->getAliasee(); + else if (auto *GI = dyn_cast<llvm::GlobalIFunc>(GV)) + C = GI->getResolver(); + else + return GV; + + const auto *AliaseeGV = dyn_cast<llvm::GlobalValue>(C->stripPointerCasts()); + if (!AliaseeGV) + return nullptr; + + const llvm::GlobalValue *FinalGV = AliaseeGV->getAliaseeObject(); + if (FinalGV == GV) + return nullptr; + + return FinalGV; +} + +static bool checkAliasedGlobal(DiagnosticsEngine &Diags, + SourceLocation Location, bool IsIFunc, + const llvm::GlobalValue *Alias, + const llvm::GlobalValue *&GV) { + GV = getAliasedGlobal(Alias); + if (!GV) { + Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; + return false; + } + + if (GV->isDeclaration()) { + Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; + return false; + } + + if (IsIFunc) { + // Check resolver function type. + const auto *F = dyn_cast<llvm::Function>(GV); + if (!F) { + Diags.Report(Location, diag::err_alias_to_undefined) + << IsIFunc << IsIFunc; + return false; + } + + llvm::FunctionType *FTy = F->getFunctionType(); + if (!FTy->getReturnType()->isPointerTy()) { + Diags.Report(Location, diag::err_ifunc_resolver_return); + return false; + } } + + return true; } void CodeGenModule::checkAliases() { @@ -347,27 +380,19 @@ void CodeGenModule::checkAliases() { Location = A->getLocation(); else llvm_unreachable("Not an alias or ifunc?"); + StringRef MangledName = getMangledName(GD); - llvm::GlobalValue *Entry = GetGlobalValue(MangledName); - auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry); - const llvm::GlobalValue *GV = getAliasedGlobal(*Alias); - if (!GV) { - Error = true; - Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; - } else if (GV->isDeclaration()) { + llvm::GlobalValue *Alias = GetGlobalValue(MangledName); + const llvm::GlobalValue *GV = nullptr; + if (!checkAliasedGlobal(Diags, Location, IsIFunc, Alias, GV)) { Error = true; - Diags.Report(Location, diag::err_alias_to_undefined) - << IsIFunc << IsIFunc; - } else if (IsIFunc) { - // Check resolver function type. - llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>( - GV->getType()->getPointerElementType()); - assert(FTy); - if (!FTy->getReturnType()->isPointerTy()) - Diags.Report(Location, diag::err_ifunc_resolver_return); + continue; } - llvm::Constant *Aliasee = Alias->getIndirectSymbol(); + llvm::Constant *Aliasee = + IsIFunc ? cast<llvm::GlobalIFunc>(Alias)->getResolver() + : cast<llvm::GlobalAlias>(Alias)->getAliasee(); + llvm::GlobalValue *AliaseeGV; if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee)) AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0)); @@ -386,13 +411,17 @@ void CodeGenModule::checkAliases() { // compatibility with gcc we implement it by just pointing the alias // to its aliasee's aliasee. We also warn, since the user is probably // expecting the link to be weak. - if (auto GA = dyn_cast<llvm::GlobalIndirectSymbol>(AliaseeGV)) { + if (auto *GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) { if (GA->isInterposable()) { Diags.Report(Location, diag::warn_alias_to_weak_alias) << GV->getName() << GA->getName() << IsIFunc; Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - GA->getIndirectSymbol(), Alias->getType()); - Alias->setIndirectSymbol(Aliasee); + GA->getAliasee(), Alias->getType()); + + if (IsIFunc) + cast<llvm::GlobalIFunc>(Alias)->setResolver(Aliasee); + else + cast<llvm::GlobalAlias>(Alias)->setAliasee(Aliasee); } } } @@ -401,8 +430,7 @@ void CodeGenModule::checkAliases() { for (const GlobalDecl &GD : Aliases) { StringRef MangledName = getMangledName(GD); - llvm::GlobalValue *Entry = GetGlobalValue(MangledName); - auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry); + llvm::GlobalValue *Alias = GetGlobalValue(MangledName); Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType())); Alias->eraseFromParent(); } @@ -537,6 +565,7 @@ void CodeGenModule::Release() { "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr, llvm::GlobalVariable::NotThreadLocal); addCompilerUsedGlobal(Var); + getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); } emitLLVMUsed(); @@ -731,8 +760,9 @@ void CodeGenModule::Release() { if (getTriple().isSPIR()) { // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. - // C++ is backwards compatible with OpenCL v2.0. - auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion; + // C++ for OpenCL has a distinct mapping for version compatibility with + // OpenCL. + auto Version = LangOpts.getOpenCLCompatibleVersion(); llvm::Metadata *SPIRVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, Version / 100)), @@ -810,6 +840,8 @@ void CodeGenModule::Release() { getCodeGenOpts().StackProtectorGuardOffset); if (getCodeGenOpts().StackAlignment) getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); + if (getCodeGenOpts().SkipRaxSetup) + getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); @@ -825,9 +857,8 @@ void CodeGenModule::Release() { void CodeGenModule::EmitOpenCLMetadata() { // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. - // C++ is backwards compatible with OpenCL v2.0. - // FIXME: We might need to add CXX version at some point too? - auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion; + // C++ for OpenCL has a distinct mapping for versions compatibile with OpenCL. + auto Version = LangOpts.getOpenCLCompatibleVersion(); llvm::Metadata *OCLVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, Version / 100)), @@ -1781,7 +1812,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); return; } @@ -1868,7 +1899,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) @@ -1918,13 +1949,13 @@ void CodeGenModule::setLLVMFunctionFEnvAttributes(const FunctionDecl *D, if (D->hasAttr<StrictFPAttr>()) { llvm::AttrBuilder FuncAttrs; FuncAttrs.addAttribute("strictfp"); - F->addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + F->addFnAttrs(FuncAttrs); } } void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { const Decl *D = GD.getDecl(); - if (dyn_cast_or_null<NamedDecl>(D)) + if (isa_and_nonnull<NamedDecl>(D)) setGVProperties(GV, GD); else GV->setVisibility(llvm::GlobalValue::DefaultVisibility); @@ -2034,8 +2065,8 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, RemoveAttrs.addAttribute("target-cpu"); RemoveAttrs.addAttribute("target-features"); RemoveAttrs.addAttribute("tune-cpu"); - F->removeAttributes(llvm::AttributeList::FunctionIndex, RemoveAttrs); - F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); + F->removeFnAttrs(RemoveAttrs); + F->addFnAttrs(Attrs); } } @@ -2118,7 +2149,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->arg_begin()->getType() ->canLosslesslyBitCastTo(F->getReturnType()) && "unexpected this return"); - F->addAttribute(1, llvm::Attribute::Returned); + F->addParamAttr(0, llvm::Attribute::Returned); } // Only a few attributes are set on declarations; these may later be @@ -2136,6 +2167,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, else if (const auto *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); + if (const auto *EA = FD->getAttr<ErrorAttr>()) { + if (EA->isError()) + F->addFnAttr("dontcall-error", EA->getUserDiagnostic()); + else if (EA->isWarning()) + F->addFnAttr("dontcall-warn", EA->getUserDiagnostic()); + } + // If we plan on emitting this inline builtin, we can't treat it as a builtin. if (FD->isInlineBuiltinDeclaration()) { const FunctionDecl *FDBody; @@ -2144,15 +2182,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, assert(HasBody && "Inline builtin declarations should always have an " "available body!"); if (shouldEmitFunction(FDBody)) - F->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoBuiltin); + F->addFnAttr(llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. - F->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoBuiltin); + F->addFnAttr(llvm::Attribute::NoBuiltin); } if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD)) @@ -2281,9 +2317,9 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, } // Import this module's dependencies. - for (unsigned I = Mod->Imports.size(); I > 0; --I) { - if (Visited.insert(Mod->Imports[I - 1]).second) - addLinkOptionsPostorder(CGM, Mod->Imports[I-1], Metadata, Visited); + for (Module *Import : llvm::reverse(Mod->Imports)) { + if (Visited.insert(Import).second) + addLinkOptionsPostorder(CGM, Import, Metadata, Visited); } // Add linker options to link against the libraries/frameworks @@ -2296,13 +2332,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, if (Mod->UseExportAsModuleLinkName) return; - for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) { + for (const Module::LinkLibrary &LL : llvm::reverse(Mod->LinkLibraries)) { // Link against a framework. Frameworks are currently Darwin only, so we // don't to ask TargetCodeGenInfo for the spelling of the linker option. - if (Mod->LinkLibraries[I-1].IsFramework) { - llvm::Metadata *Args[2] = { - llvm::MDString::get(Context, "-framework"), - llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library)}; + if (LL.IsFramework) { + llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), + llvm::MDString::get(Context, LL.Library)}; Metadata.push_back(llvm::MDNode::get(Context, Args)); continue; @@ -2312,13 +2347,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, if (IsELF) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "lib"), - llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library), + llvm::MDString::get(Context, LL.Library), }; Metadata.push_back(llvm::MDNode::get(Context, Args)); } else { llvm::SmallString<24> Opt; - CGM.getTargetCodeGenInfo().getDependentLibraryOption( - Mod->LinkLibraries[I - 1].Library, Opt); + CGM.getTargetCodeGenInfo().getDependentLibraryOption(LL.Library, Opt); auto *OptString = llvm::MDString::get(Context, Opt); Metadata.push_back(llvm::MDNode::get(Context, OptString)); } @@ -2531,7 +2565,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) { llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { ArrayRef<Expr *> Exprs = {Attr->args_begin(), Attr->args_size()}; if (Exprs.empty()) - return llvm::ConstantPointerNull::get(Int8PtrTy); + return llvm::ConstantPointerNull::get(GlobalsInt8PtrTy); llvm::FoldingSetNodeID ID; for (Expr *E : Exprs) { @@ -2555,7 +2589,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { ".args"); GV->setSection(AnnotationSection); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, Int8PtrTy); + auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, GlobalsInt8PtrTy); Lookup = Bitcasted; return Bitcasted; @@ -2570,17 +2604,19 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, *LineNoCst = EmitAnnotationLineNo(L), *Args = EmitAnnotationArgs(AA); - llvm::Constant *ASZeroGV = GV; - if (GV->getAddressSpace() != 0) { - ASZeroGV = llvm::ConstantExpr::getAddrSpaceCast( - GV, GV->getValueType()->getPointerTo(0)); + llvm::Constant *GVInGlobalsAS = GV; + if (GV->getAddressSpace() != + getDataLayout().getDefaultGlobalsAddressSpace()) { + GVInGlobalsAS = llvm::ConstantExpr::getAddrSpaceCast( + GV, GV->getValueType()->getPointerTo( + getDataLayout().getDefaultGlobalsAddressSpace())); } // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[] = { - llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), + llvm::ConstantExpr::getBitCast(GVInGlobalsAS, GlobalsInt8PtrTy), + llvm::ConstantExpr::getBitCast(AnnoGV, GlobalsInt8PtrTy), + llvm::ConstantExpr::getBitCast(UnitGV, GlobalsInt8PtrTy), LineNoCst, Args, }; @@ -2853,7 +2889,8 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { GlobalDecl(cast<FunctionDecl>(VD)), /*ForVTable=*/false); else - Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, 0, nullptr); + Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, + nullptr); auto *F = cast<llvm::GlobalValue>(Aliasee); F->setLinkage(llvm::Function::ExternalWeakLinkage); @@ -3163,6 +3200,11 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { } } + // Inline builtins declaration must be emitted. They often are fortified + // functions. + if (F->isInlineBuiltinDeclaration()) + return true; + // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, // but a function that calls itself through asm label/`__builtin_` trickery is @@ -3252,6 +3294,19 @@ TargetMVPriority(const TargetInfo &TI, return Priority; } +// Multiversion functions should be at most 'WeakODRLinkage' so that a different +// TU can forward declare the function without causing problems. Particularly +// in the cases of CPUDispatch, this causes issues. This also makes sure we +// work with internal linkage functions, so that the same function name can be +// used with internal linkage in multiple TUs. +llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, + GlobalDecl GD) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); + if (FD->getFormalLinkage() == InternalLinkage) + return llvm::GlobalValue::InternalLinkage; + return llvm::GlobalValue::WeakODRLinkage; +} + void CodeGenModule::emitMultiVersionFunctions() { std::vector<GlobalDecl> MVFuncsToEmit; MultiVersionFuncs.swap(MVFuncsToEmit); @@ -3292,7 +3347,7 @@ void CodeGenModule::emitMultiVersionFunctions() { if (TI.supportsIFunc() || FD->isTargetMultiVersion()) { ResolverFunc = cast<llvm::Function>( GetGlobalValue((getMangledName(GD) + ".resolver").str())); - ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); + ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); } else { ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD))); } @@ -3350,7 +3405,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction( ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); - ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); + ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); @@ -3386,10 +3441,9 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), [](StringRef Str) { return Str.substr(1); }); - Features.erase(std::remove_if( - Features.begin(), Features.end(), [&Target](StringRef Feat) { - return !Target.validateCpuSupports(Feat); - }), Features.end()); + llvm::erase_if(Features, [&Target](StringRef Feat) { + return !Target.validateCpuSupports(Feat); + }); Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features); ++Index; } @@ -3397,8 +3451,8 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { llvm::stable_sort( Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { - return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) > - CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features); + return llvm::X86::getCpuSupportsMask(LHS.Conditions.Features) > + llvm::X86::getCpuSupportsMask(RHS.Conditions.Features); }); // If the list contains multiple 'default' versions, such as when it contains @@ -3406,7 +3460,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { // always run on at least a 'pentium'). We do this by deleting the 'least // advanced' (read, lowest mangling letter). while (Options.size() > 1 && - CodeGenFunction::GetX86CpuSupportsMask( + llvm::X86::getCpuSupportsMask( (Options.end() - 2)->Conditions.Features) == 0) { StringRef LHSName = (Options.end() - 2)->Function->getName(); StringRef RHSName = (Options.end() - 1)->Function->getName(); @@ -3427,9 +3481,9 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { auto *IFunc = cast<llvm::GlobalIFunc>(GetOrCreateLLVMFunction( AliasName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), NotForDefinition)); - auto *GA = llvm::GlobalAlias::create( - DeclTy, 0, getFunctionLinkage(GD), AliasName, IFunc, &getModule()); - GA->setLinkage(llvm::Function::WeakODRLinkage); + auto *GA = llvm::GlobalAlias::create(DeclTy, 0, + getMultiversionLinkage(*this, GD), + AliasName, IFunc, &getModule()); SetCommonAttributes(GD, GA); } } @@ -3468,8 +3522,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); - llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::WeakODRLinkage, "", Resolver, &getModule()); + llvm::GlobalIFunc *GIF = + llvm::GlobalIFunc::create(DeclTy, 0, getMultiversionLinkage(*this, GD), + "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); @@ -3613,9 +3668,9 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); - if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { + if (ExtraAttrs.hasFnAttrs()) { llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); } if (!DontDefer) { @@ -3761,8 +3816,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, bool AssumeConvergent) { if (AssumeConvergent) { ExtraAttrs = - ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex, - llvm::Attribute::Convergent); + ExtraAttrs.addFnAttribute(VMContext, llvm::Attribute::Convergent); } llvm::Constant *C = @@ -3827,10 +3881,11 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) { /// mangled name but some other type. llvm::Constant * CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, - unsigned AddrSpace, const VarDecl *D, + LangAS AddrSpace, const VarDecl *D, ForDefinition_t IsForDefinition) { // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + unsigned TargetAS = getContext().getTargetAddressSpace(AddrSpace); if (Entry) { if (WeakRefReferences.erase(Entry)) { if (D && !D->hasAttr<WeakAttr>()) @@ -3844,7 +3899,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D) getOpenMPRuntime().registerTargetGlobalVariable(D, Entry); - if (Entry->getValueType() == Ty && Entry->getAddressSpace() == AddrSpace) + if (Entry->getValueType() == Ty && Entry->getAddressSpace() == TargetAS) return Entry; // If there are two attempts to define the same mangled name, issue an @@ -3868,24 +3923,23 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, } // Make sure the result is of the correct type. - if (Entry->getType()->getAddressSpace() != AddrSpace) { + if (Entry->getType()->getAddressSpace() != TargetAS) { return llvm::ConstantExpr::getAddrSpaceCast(Entry, - Ty->getPointerTo(AddrSpace)); + Ty->getPointerTo(TargetAS)); } // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo(AddrSpace)); + return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo(TargetAS)); } auto DAddrSpace = GetGlobalVarAddressSpace(D); - auto TargetAddrSpace = getContext().getTargetAddressSpace(DAddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Ty, false, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, llvm::GlobalVariable::NotThreadLocal, - TargetAddrSpace); + getContext().getTargetAddressSpace(DAddrSpace)); // If we already created a global with the same mangled name (but different // type) before, take its name and remove it from its parent. @@ -4008,10 +4062,10 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS ExpectedAS = D ? D->getType().getAddressSpace() : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); - assert(getContext().getTargetAddressSpace(ExpectedAS) == AddrSpace); + assert(getContext().getTargetAddressSpace(ExpectedAS) == TargetAS); if (DAddrSpace != ExpectedAS) { return getTargetCodeGenInfo().performAddrSpaceCast( - *this, GV, DAddrSpace, ExpectedAS, Ty->getPointerTo(AddrSpace)); + *this, GV, DAddrSpace, ExpectedAS, Ty->getPointerTo(TargetAS)); } return GV; @@ -4101,8 +4155,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, Ty = getTypes().ConvertTypeForMem(ASTTy); StringRef MangledName = getMangledName(D); - return GetOrCreateLLVMGlobal(MangledName, Ty, - getContext().getTargetAddressSpace(ASTTy), D, + return GetOrCreateLLVMGlobal(MangledName, Ty, ASTTy.getAddressSpace(), D, IsForDefinition); } @@ -4111,10 +4164,8 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::Constant * CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, StringRef Name) { - auto AddrSpace = - getContext().getLangOpts().OpenCL - ? getContext().getTargetAddressSpace(LangAS::opencl_global) - : 0; + LangAS AddrSpace = getContext().getLangOpts().OpenCL ? LangAS::opencl_global + : LangAS::Default; auto *Ret = GetOrCreateLLVMGlobal(Name, Ty, AddrSpace, nullptr); setDSOLocal(cast<llvm::GlobalValue>(Ret->stripPointerCasts())); return Ret; @@ -4153,16 +4204,15 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { } LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { - LangAS AddrSpace = LangAS::Default; if (LangOpts.OpenCL) { - AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global; - assert(AddrSpace == LangAS::opencl_global || - AddrSpace == LangAS::opencl_global_device || - AddrSpace == LangAS::opencl_global_host || - AddrSpace == LangAS::opencl_constant || - AddrSpace == LangAS::opencl_local || - AddrSpace >= LangAS::FirstTargetAddressSpace); - return AddrSpace; + LangAS AS = D ? D->getType().getAddressSpace() : LangAS::opencl_global; + assert(AS == LangAS::opencl_global || + AS == LangAS::opencl_global_device || + AS == LangAS::opencl_global_host || + AS == LangAS::opencl_constant || + AS == LangAS::opencl_local || + AS >= LangAS::FirstTargetAddressSpace); + return AS; } if (LangOpts.SYCLIsDevice && @@ -4261,11 +4311,6 @@ static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) { if (!CGM.supportsCOMDAT()) return false; - // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent - // them being "merged" by the COMDAT Folding linker optimization. - if (D.hasAttr<CUDAGlobalAttr>()) - return false; - if (D.hasAttr<SelectAnyAttr>()) return true; @@ -4438,7 +4483,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, if (GV && LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { if (Linkage != llvm::GlobalValue::InternalLinkage && - (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())) + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType())) GV->setExternallyInitialized(true); } else { getCUDARuntime().internalizeDeviceSideVar(D, Linkage); @@ -4529,8 +4576,8 @@ void CodeGenModule::EmitExternalVarDeclaration(const VarDecl *D) { if (getCodeGenOpts().hasReducedDebugInfo()) { QualType ASTTy = D->getType(); llvm::Type *Ty = getTypes().ConvertTypeForMem(D->getType()); - llvm::Constant *GV = GetOrCreateLLVMGlobal( - D->getName(), Ty, getContext().getTargetAddressSpace(ASTTy), D); + llvm::Constant *GV = + GetOrCreateLLVMGlobal(D->getName(), Ty, ASTTy.getAddressSpace(), D); DI->EmitExternalVariable( cast<llvm::GlobalVariable>(GV->stripPointerCasts()), D); } @@ -4747,7 +4794,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, } // Add any parameter attributes. - newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo)); + newArgAttrs.push_back(oldAttrs.getParamAttrs(argNo)); argNo++; } if (dontTransform) @@ -4762,7 +4809,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, callSite->getOperandBundlesAsDefs(newBundles); llvm::CallBase *newCall; - if (dyn_cast<llvm::CallInst>(callSite)) { + if (isa<llvm::CallInst>(callSite)) { newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite); } else { @@ -4775,9 +4822,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite); - newCall->setAttributes(llvm::AttributeList::get( - newFn->getContext(), oldAttrs.getFnAttributes(), - oldAttrs.getRetAttributes(), newArgAttrs)); + newCall->setAttributes( + llvm::AttributeList::get(newFn->getContext(), oldAttrs.getFnAttrs(), + oldAttrs.getRetAttrs(), newArgAttrs)); newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. @@ -4902,7 +4949,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { /*ForVTable=*/false); LT = getFunctionLinkage(GD); } else { - Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, 0, + Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, /*D=*/nullptr); if (const auto *VD = dyn_cast<VarDecl>(GD.getDecl())) LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified()); @@ -4983,8 +5030,9 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { Aliases.push_back(GD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); + llvm::Type *ResolverTy = llvm::GlobalIFunc::getResolverFunctionType(DeclTy); llvm::Constant *Resolver = - GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD, + GetOrCreateLLVMFunction(IFA->getResolver(), ResolverTy, {}, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage, @@ -5360,7 +5408,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { - if (Alignment.getQuantity() > GV->getAlignment()) + if (uint64_t(Alignment.getQuantity()) > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); @@ -5423,7 +5471,7 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString( if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { - if (Alignment.getQuantity() > GV->getAlignment()) + if (uint64_t(Alignment.getQuantity()) > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); @@ -6448,5 +6496,5 @@ bool CodeGenModule::stopAutoInit() { void CodeGenModule::printPostfixForExternalizedStaticVar( llvm::raw_ostream &OS) const { - OS << ".static." << getContext().getCUIDHash(); + OS << "__static__" << getContext().getCUIDHash(); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h index 47dc6f415b60..fbed22376c82 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h @@ -1478,8 +1478,8 @@ private: void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD); llvm::Constant * - GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, - unsigned AddrSpace, const VarDecl *D, + GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace, + const VarDecl *D, ForDefinition_t IsForDefinition = NotForDefinition); bool GetCPUAndFeaturesAttributes(GlobalDecl GD, diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp index d828ac0eb5e9..ab953c2c7d52 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp @@ -649,6 +649,14 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> { void VisitIfStmt(const IfStmt *S) { RecordStmtCount(S); + + if (S->isConsteval()) { + const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse(); + if (Stm) + Visit(Stm); + return; + } + uint64_t ParentCount = CurrentCount; if (S->getInit()) Visit(S->getInit()); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypeCache.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypeCache.h index f258234fb4d8..577f88367a3a 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypeCache.h @@ -69,6 +69,12 @@ struct CodeGenTypeCache { llvm::PointerType *AllocaInt8PtrTy; }; + /// void* in default globals address space + union { + llvm::PointerType *GlobalsVoidPtrTy; + llvm::PointerType *GlobalsInt8PtrTy; + }; + /// The size and alignment of the builtin C type 'int'. This comes /// up enough in various ABI lowering tasks to be worth pre-computing. union { diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp index 9cb42941cb96..fb05475a4e8c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp @@ -512,6 +512,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::Double: case BuiltinType::LongDouble: case BuiltinType::Float128: + case BuiltinType::Ibm128: ResultType = getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T), /* UseNativeHalf = */ false); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp index 8a11da600e4a..9b81c8a670f5 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -751,13 +751,11 @@ struct CounterCoverageMappingBuilder /// is already added to \c SourceRegions. bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc, bool isBranch = false) { - return SourceRegions.rend() != - std::find_if(SourceRegions.rbegin(), SourceRegions.rend(), - [&](const SourceMappingRegion &Region) { - return Region.getBeginLoc() == StartLoc && - Region.getEndLoc() == EndLoc && - Region.isBranch() == isBranch; - }); + return llvm::any_of( + llvm::reverse(SourceRegions), [&](const SourceMappingRegion &Region) { + return Region.getBeginLoc() == StartLoc && + Region.getEndLoc() == EndLoc && Region.isBranch() == isBranch; + }); } /// Adjust the most recently visited location to \c EndLoc. @@ -971,7 +969,7 @@ struct CounterCoverageMappingBuilder // If last statement contains terminate statements, add a gap area // between the two statements. Skipping attributed statements, because // they don't have valid start location. - if (LastStmt && HasTerminateStmt && !dyn_cast<AttributedStmt>(Child)) { + if (LastStmt && HasTerminateStmt && !isa<AttributedStmt>(Child)) { auto Gap = findGapAreaBetween(getEnd(LastStmt), getStart(Child)); if (Gap) fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), diff --git a/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp index d3dc0e6212b8..04163aeaddc5 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -334,6 +334,19 @@ public: ArrayRef<llvm::Function *> CXXThreadLocalInits, ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override; + bool mayNeedDestruction(const VarDecl *VD) const { + if (VD->needsDestruction(getContext())) + return true; + + // If the variable has an incomplete class type (or array thereof), it + // might need destruction. + const Type *T = VD->getType()->getBaseElementTypeUnsafe(); + if (T->getAs<RecordType>() && T->isIncompleteType()) + return true; + + return false; + } + /// Determine whether we will definitely emit this variable with a constant /// initializer, either because the language semantics demand it or because /// we know that the initializer is a constant. @@ -364,7 +377,7 @@ public: // If we have the only definition, we don't need a thread wrapper if we // will emit the value as a constant. if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD))) - return !VD->needsDestruction(getContext()) && InitDecl->evaluateValue(); + return !mayNeedDestruction(VD) && InitDecl->evaluateValue(); // Otherwise, we need a thread wrapper unless we know that every // translation unit will emit the value as a constant. We rely on the @@ -376,7 +389,7 @@ public: bool usesThreadWrapperFunction(const VarDecl *VD) const override { return !isEmittedWithConstantInitializer(VD) || - VD->needsDestruction(getContext()); + mayNeedDestruction(VD); } LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType LValType) override; @@ -2445,11 +2458,6 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, (CGM.getTarget().getTriple().isOSBinFormatELF() || CGM.getTarget().getTriple().isOSBinFormatWasm())) { guard->setComdat(C); - // An inline variable's guard function is run from the per-TU - // initialization function, not via a dedicated global ctor function, so - // we can't put it in a comdat. - if (!NonTemplateInline) - CGF.CurFn->setComdat(C); } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) { guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName())); } @@ -2968,7 +2976,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( // also when the symbol is weak. if (CGM.getTriple().isOSAIX() && VD->hasDefinition() && isEmittedWithConstantInitializer(VD, true) && - !VD->needsDestruction(getContext())) { + !mayNeedDestruction(VD)) { // Init should be null. If it were non-null, then the logic above would // either be defining the function to be an alias or declaring the // function with the expectation that the definition of the variable @@ -3274,6 +3282,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::LongDouble: case BuiltinType::Float16: case BuiltinType::Float128: + case BuiltinType::Ibm128: case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: diff --git a/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 990648b131fe..0fd5a0ffe06c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -847,7 +847,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { // arguments was not supported and resulted in a compiler error. In 19.14 // and later versions, such arguments are now passed indirectly. TypeInfo Info = getContext().getTypeInfo(RD->getTypeForDecl()); - if (Info.AlignIsRequired && Info.Align > 4) + if (Info.isAlignRequired() && Info.Align > 4) return RAA_Indirect; // If C++ prohibits us from making a copy, construct the arguments directly @@ -1810,8 +1810,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, #endif } - const std::unique_ptr<VPtrInfo> *VFPtrI = std::find_if( - VFPtrs.begin(), VFPtrs.end(), [&](const std::unique_ptr<VPtrInfo>& VPI) { + const std::unique_ptr<VPtrInfo> *VFPtrI = + llvm::find_if(VFPtrs, [&](const std::unique_ptr<VPtrInfo> &VPI) { return VPI->FullOffsetInMDC == VPtrOffset; }); if (VFPtrI == VFPtrs.end()) { @@ -1844,7 +1844,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, VFTablesMap[ID] = VFTable; VTable = VTableAliasIsRequred ? cast<llvm::GlobalVariable>( - cast<llvm::GlobalAlias>(VFTable)->getBaseObject()) + cast<llvm::GlobalAlias>(VFTable)->getAliaseeObject()) : cast<llvm::GlobalVariable>(VFTable); return VTable; } @@ -4348,6 +4348,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { const Expr *SubExpr = E->getSubExpr(); + assert(SubExpr && "SubExpr cannot be null"); QualType ThrowType = SubExpr->getType(); // The exception object lives on the stack and it's address is passed to the // runtime function. diff --git a/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp index b63f756ca288..f6642a79e1e4 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp @@ -122,6 +122,10 @@ namespace { return D; } + llvm::StringRef GetMangledName(GlobalDecl GD) { + return Builder->getMangledName(GD); + } + llvm::Constant *GetAddrOfGlobal(GlobalDecl global, bool isForDefinition) { return Builder->GetAddrOfGlobal(global, ForDefinition_t(isForDefinition)); } @@ -325,6 +329,10 @@ const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) { return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name); } +llvm::StringRef CodeGenerator::GetMangledName(GlobalDecl GD) { + return static_cast<CodeGeneratorImpl *>(this)->GetMangledName(GD); +} + llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global, bool isForDefinition) { return static_cast<CodeGeneratorImpl*>(this) diff --git a/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 1adf0ad9c0e5..f7b83c45022d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -27,10 +27,10 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Path.h" -#include "llvm/Support/TargetRegistry.h" #include <memory> #include <utility> @@ -264,31 +264,48 @@ public: std::string Error; auto Triple = Ctx.getTargetInfo().getTriple(); if (!llvm::TargetRegistry::lookupTarget(Triple.getTriple(), Error)) - llvm::report_fatal_error(Error); + llvm::report_fatal_error(llvm::Twine(Error)); // Emit the serialized Clang AST into its own section. assert(Buffer->IsComplete && "serialization did not complete"); auto &SerializedAST = Buffer->Data; auto Size = SerializedAST.size(); - auto Int8Ty = llvm::Type::getInt8Ty(*VMContext); - auto *Ty = llvm::ArrayType::get(Int8Ty, Size); - auto *Data = llvm::ConstantDataArray::getString( - *VMContext, StringRef(SerializedAST.data(), Size), - /*AddNull=*/false); - auto *ASTSym = new llvm::GlobalVariable( - *M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data, - "__clang_ast"); - // The on-disk hashtable needs to be aligned. - ASTSym->setAlignment(llvm::Align(8)); - - // Mach-O also needs a segment name. - if (Triple.isOSBinFormatMachO()) - ASTSym->setSection("__CLANG,__clangast"); - // COFF has an eight character length limit. - else if (Triple.isOSBinFormatCOFF()) - ASTSym->setSection("clangast"); - else - ASTSym->setSection("__clangast"); + + if (Triple.isOSBinFormatWasm()) { + // Emit __clangast in custom section instead of named data segment + // to find it while iterating sections. + // This could be avoided if all data segements (the wasm sense) were + // represented as their own sections (in the llvm sense). + // TODO: https://github.com/WebAssembly/tool-conventions/issues/138 + llvm::NamedMDNode *MD = + M->getOrInsertNamedMetadata("wasm.custom_sections"); + llvm::Metadata *Ops[2] = { + llvm::MDString::get(*VMContext, "__clangast"), + llvm::MDString::get(*VMContext, + StringRef(SerializedAST.data(), Size))}; + auto *NameAndContent = llvm::MDTuple::get(*VMContext, Ops); + MD->addOperand(NameAndContent); + } else { + auto Int8Ty = llvm::Type::getInt8Ty(*VMContext); + auto *Ty = llvm::ArrayType::get(Int8Ty, Size); + auto *Data = llvm::ConstantDataArray::getString( + *VMContext, StringRef(SerializedAST.data(), Size), + /*AddNull=*/false); + auto *ASTSym = new llvm::GlobalVariable( + *M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, + Data, "__clang_ast"); + // The on-disk hashtable needs to be aligned. + ASTSym->setAlignment(llvm::Align(8)); + + // Mach-O also needs a segment name. + if (Triple.isOSBinFormatMachO()) + ASTSym->setSection("__CLANG,__clangast"); + // COFF has an eight character length limit. + else if (Triple.isOSBinFormatCOFF()) + ASTSym->setSection("clangast"); + else + ASTSym->setSection("__clangast"); + } LLVM_DEBUG({ // Print the IR for the PCH container to the debug output. diff --git a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp index d2cc0a699f43..302dc653c46e 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp @@ -392,6 +392,36 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, } +static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty, CharUnits SlotSize, + CharUnits EltSize, const ComplexType *CTy) { + Address Addr = + emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2, + SlotSize, SlotSize, /*AllowHigher*/ true); + + Address RealAddr = Addr; + Address ImagAddr = RealAddr; + if (CGF.CGM.getDataLayout().isBigEndian()) { + RealAddr = + CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize); + ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, + 2 * SlotSize - EltSize); + } else { + ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); + } + + llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); + RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy); + ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy); + llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); + llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); + + Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); + CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), + /*init*/ true); + return Temp; +} + static Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, llvm::BasicBlock *Block1, Address Addr2, llvm::BasicBlock *Block2, @@ -827,19 +857,19 @@ public: llvm::Function *Fn = cast<llvm::Function>(GV); llvm::AttrBuilder B; B.addAttribute("wasm-import-module", Attr->getImportModule()); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); llvm::AttrBuilder B; B.addAttribute("wasm-import-name", Attr->getImportName()); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); llvm::AttrBuilder B; B.addAttribute("wasm-export-name", Attr->getExportName()); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } } @@ -1170,7 +1200,8 @@ public: IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || + CGT.getTarget().getTriple().isOSCygMing()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, @@ -1524,6 +1555,14 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); + // Return complex of _Float16 as <2 x half> so the backend will use xmm0. + if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { + QualType ET = getContext().getCanonicalType(CT->getElementType()); + if (ET->isFloat16Type()) + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getHalfTy(getVMContext()), 2)); + } + // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { @@ -1831,7 +1870,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // Pass over-aligned aggregates on Windows indirectly. This behavior was // added in MSVC 2015. - if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32) + if (IsWin32StructABI && TI.isAlignRequired() && TI.Align > 32) return getIndirectResult(Ty, /*ByVal=*/false, State); // Expand small (<= 128-bit) record types when we know that the stack layout @@ -2607,7 +2646,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { // If the argument does not end in .lib, automatically add the suffix. // If the argument contains a space, enclose it in quotes. // This matches the behavior of MSVC. - bool Quote = (Lib.find(' ') != StringRef::npos); + bool Quote = Lib.contains(' '); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; if (!Lib.endswith_insensitive(".lib") && !Lib.endswith_insensitive(".a")) @@ -2812,7 +2851,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Hi = Integer; } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { Current = Integer; - } else if (k == BuiltinType::Float || k == BuiltinType::Double) { + } else if (k == BuiltinType::Float || k == BuiltinType::Double || + k == BuiltinType::Float16) { Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); @@ -2943,7 +2983,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Current = Integer; else if (Size <= 128) Lo = Hi = Integer; - } else if (ET == getContext().FloatTy) { + } else if (ET->isFloat16Type() || ET == getContext().FloatTy) { Current = SSE; } else if (ET == getContext().DoubleTy) { Lo = Hi = SSE; @@ -3367,55 +3407,77 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, return false; } -/// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a -/// float member at the specified offset. For example, {int,{float}} has a -/// float at offset 4. It is conservatively correct for this routine to return -/// false. -static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset, - const llvm::DataLayout &TD) { - // Base case if we find a float. - if (IROffset == 0 && IRType->isFloatTy()) - return true; +/// getFPTypeAtOffset - Return a floating point type at the specified offset. +static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, + const llvm::DataLayout &TD) { + if (IROffset == 0 && IRType->isFloatingPointTy()) + return IRType; // If this is a struct, recurse into the field at the specified offset. if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { + if (!STy->getNumContainedTypes()) + return nullptr; + const llvm::StructLayout *SL = TD.getStructLayout(STy); unsigned Elt = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(Elt); - return ContainsFloatAtOffset(STy->getElementType(Elt), IROffset, TD); + return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); } // If this is an array, recurse into the field at the specified offset. if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = TD.getTypeAllocSize(EltTy); - IROffset -= IROffset/EltSize*EltSize; - return ContainsFloatAtOffset(EltTy, IROffset, TD); + IROffset -= IROffset / EltSize * EltSize; + return getFPTypeAtOffset(EltTy, IROffset, TD); } - return false; + return nullptr; } - /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the /// low 8 bytes of an XMM register, corresponding to the SSE class. llvm::Type *X86_64ABIInfo:: GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { - // The only three choices we have are either double, <2 x float>, or float. We - // pass as float if the last 4 bytes is just padding. This happens for - // structs that contain 3 floats. - if (BitsContainNoUserData(SourceTy, SourceOffset*8+32, - SourceOffset*8+64, getContext())) - return llvm::Type::getFloatTy(getVMContext()); - - // We want to pass as <2 x float> if the LLVM IR type contains a float at - // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the - // case. - if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) && - ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout())) - return llvm::FixedVectorType::get(llvm::Type::getFloatTy(getVMContext()), - 2); + const llvm::DataLayout &TD = getDataLayout(); + unsigned SourceSize = + (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; + llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); + if (!T0 || T0->isDoubleTy()) + return llvm::Type::getDoubleTy(getVMContext()); + + // Get the adjacent FP type. + llvm::Type *T1 = nullptr; + unsigned T0Size = TD.getTypeAllocSize(T0); + if (SourceSize > T0Size) + T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); + if (T1 == nullptr) { + // Check if IRType is a half + float. float type will be in IROffset+4 due + // to its alignment. + if (T0->isHalfTy() && SourceSize > 4) + T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); + // If we can't get a second FP type, return a simple half or float. + // avx512fp16-abi.c:pr51813_2 shows it works to return float for + // {float, i8} too. + if (T1 == nullptr) + return T0; + } + + if (T0->isFloatTy() && T1->isFloatTy()) + return llvm::FixedVectorType::get(T0, 2); + + if (T0->isHalfTy() && T1->isHalfTy()) { + llvm::Type *T2 = nullptr; + if (SourceSize > 4) + T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); + if (T2 == nullptr) + return llvm::FixedVectorType::get(T0, 2); + return llvm::FixedVectorType::get(T0, 4); + } + + if (T0->isHalfTy() || T1->isHalfTy()) + return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); return llvm::Type::getDoubleTy(getVMContext()); } @@ -3521,11 +3583,11 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, // struct. if (HiStart != 8) { // There are usually two sorts of types the ABI generation code can produce - // for the low part of a pair that aren't 8 bytes in size: float or + // for the low part of a pair that aren't 8 bytes in size: half, float or // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and // NaCl). // Promote these to a larger type. - if (Lo->isFloatTy()) + if (Lo->isHalfTy() || Lo->isFloatTy()) Lo = llvm::Type::getDoubleTy(Lo->getContext()); else { assert((Lo->isIntegerTy() || Lo->isPointerTy()) @@ -4572,14 +4634,25 @@ CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const { Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - if (Ty->isAnyComplexType()) - llvm::report_fatal_error("complex type is not supported on AIX yet"); auto TypeInfo = getContext().getTypeInfoInChars(Ty); TypeInfo.Align = getParamTypeAlignment(Ty); CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize); + // If we have a complex type and the base type is smaller than the register + // size, the ABI calls for the real and imaginary parts to be right-adjusted + // in separate words in 32bit mode or doublewords in 64bit mode. However, + // Clang expects us to produce a pointer to a structure with the two parts + // packed tightly. So generate loads of the real and imaginary parts relative + // to the va_list pointer, and store them to a temporary structure. We do the + // same as the PPC64ABI here. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { + CharUnits EltSize = TypeInfo.Width / 2; + if (EltSize < SlotSize) + return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); + } + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true); } @@ -5168,8 +5241,9 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble || + BT->getKind() == BuiltinType::Ibm128 || (getContext().getTargetInfo().hasFloat128Type() && - (BT->getKind() == BuiltinType::Float128))) { + (BT->getKind() == BuiltinType::Float128))) { if (IsSoftFloatABI) return false; return true; @@ -5346,33 +5420,8 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // and store them to a temporary structure. if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { CharUnits EltSize = TypeInfo.Width / 2; - if (EltSize < SlotSize) { - Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, - SlotSize * 2, SlotSize, - SlotSize, /*AllowHigher*/ true); - - Address RealAddr = Addr; - Address ImagAddr = RealAddr; - if (CGF.CGM.getDataLayout().isBigEndian()) { - RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, - SlotSize - EltSize); - ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, - 2 * SlotSize - EltSize); - } else { - ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); - } - - llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); - RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy); - ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy); - llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); - llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); - - Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); - CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), - /*init*/ true); - return Temp; - } + if (EltSize < SlotSize) + return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); } // Otherwise, just use the general rule. @@ -6343,7 +6392,7 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } }; @@ -6934,7 +6983,7 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, TyAlignForABI = CharUnits::fromQuantity(4); } - TypeInfoChars TyInfo(TySize, TyAlignForABI, false); + TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } @@ -10120,24 +10169,26 @@ void XCoreTargetCodeGenInfo::emitTargetMetadata( } } } + //===----------------------------------------------------------------------===// -// SPIR ABI Implementation +// Base ABI and target codegen info implementation common between SPIR and +// SPIR-V. //===----------------------------------------------------------------------===// namespace { -class SPIRABIInfo : public DefaultABIInfo { +class CommonSPIRABIInfo : public DefaultABIInfo { public: - SPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); } + CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); } private: void setCCs(); }; } // end anonymous namespace namespace { -class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { +class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: - SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<SPIRABIInfo>(CGT)) {} + CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {} LangAS getASTAllocaAddressSpace() const override { return getLangASFromTargetAS( @@ -10148,7 +10199,7 @@ public: }; } // End anonymous namespace. -void SPIRABIInfo::setCCs() { +void CommonSPIRABIInfo::setCCs() { assert(getRuntimeCC() == llvm::CallingConv::C); RuntimeCC = llvm::CallingConv::SPIR_FUNC; } @@ -10162,7 +10213,7 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { } } -unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { +unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } @@ -11231,7 +11282,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new ARCTargetCodeGenInfo(Types)); case llvm::Triple::spir: case llvm::Triple::spir64: - return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types)); case llvm::Triple::ve: return SetCGInfo(new VETargetCodeGenInfo(Types)); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/VarBypassDetector.h b/contrib/llvm-project/clang/lib/CodeGen/VarBypassDetector.h index b654eefd963d..164e88c0b2f1 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/VarBypassDetector.h +++ b/contrib/llvm-project/clang/lib/CodeGen/VarBypassDetector.h @@ -55,7 +55,7 @@ public: /// Returns true if the variable declaration was by bypassed by any goto or /// switch statement. bool IsBypassed(const VarDecl *D) const { - return AlwaysBypassed || Bypasses.find(D) != Bypasses.end(); + return AlwaysBypassed || Bypasses.contains(D); } private: |