diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen')
59 files changed, 6741 insertions, 2918 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp index 497652e85b47..0bfcab88a3a9 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp @@ -37,6 +37,7 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" +#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -60,6 +61,7 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" @@ -73,6 +75,10 @@ using namespace clang; using namespace llvm; +#define HANDLE_EXTENSION(Ext) \ + llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + namespace { // Default filename used for profile generation. @@ -117,8 +123,8 @@ class EmitAssemblyHelper { std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) { std::error_code EC; - auto F = llvm::make_unique<llvm::ToolOutputFile>(Path, EC, - llvm::sys::fs::F_None); + auto F = std::make_unique<llvm::ToolOutputFile>(Path, EC, + llvm::sys::fs::OF_None); if (EC) { Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message(); F.reset(); @@ -195,11 +201,8 @@ static void addBoundsCheckingPass(const PassManagerBuilder &Builder, PM.add(createBoundsCheckingLegacyPass()); } -static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - const PassManagerBuilderWrapper &BuilderWrapper = - static_cast<const PassManagerBuilderWrapper&>(Builder); - const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); +static SanitizerCoverageOptions +getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { SanitizerCoverageOptions Opts; Opts.CoverageType = static_cast<SanitizerCoverageOptions::Type>(CGOpts.SanitizeCoverageType); @@ -215,7 +218,16 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; Opts.PCTable = CGOpts.SanitizeCoveragePCTable; Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; - PM.add(createSanitizerCoverageModulePass(Opts)); + return Opts; +} + +static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + const PassManagerBuilderWrapper &BuilderWrapper = + static_cast<const PassManagerBuilderWrapper &>(Builder); + const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); + auto Opts = getSancovOptsFromCGOpts(CGOpts); + PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts)); } // Check if ASan should use GC-friendly instrumentation for globals. @@ -231,9 +243,13 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { return true; case Triple::ELF: return CGOpts.DataSections && !CGOpts.DisableIntegratedAS; - default: - return false; + case Triple::XCOFF: + llvm::report_fatal_error("ASan not implemented for XCOFF."); + case Triple::Wasm: + case Triple::UnknownObjectFormat: + break; } + return false; } static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -327,15 +343,6 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); - if (!CodeGenOpts.SimplifyLibCalls) - TLII->disableAllFunctions(); - else { - // Disable individual libc/libm calls in TargetLibraryInfo. - LibFunc F; - for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs()) - if (TLII->getLibFunc(FuncName, F)) - TLII->setUnavailable(F); - } switch (CodeGenOpts.getVecLib()) { case CodeGenOptions::Accelerate: @@ -395,14 +402,14 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) { return static_cast<llvm::CodeModel::Model>(CodeModel); } -static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) { +static CodeGenFileType getCodeGenFileType(BackendAction Action) { if (Action == Backend_EmitObj) - return TargetMachine::CGFT_ObjectFile; + return CGFT_ObjectFile; else if (Action == Backend_EmitMCNull) - return TargetMachine::CGFT_Null; + return CGFT_Null; else { assert(Action == Backend_EmitAssembly && "Invalid action!"); - return TargetMachine::CGFT_AssemblyFile; + return CGFT_AssemblyFile; } } @@ -456,6 +463,8 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.ExceptionModel = llvm::ExceptionHandling::WinEH; if (LangOpts.DWARFExceptions) Options.ExceptionModel = llvm::ExceptionHandling::DwarfCFI; + if (LangOpts.WasmExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::Wasm; Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; @@ -465,12 +474,14 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.TLSSize = CodeGenOpts.TLSSize; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.EmitAddrsig = CodeGenOpts.Addrsig; Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues; + Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; @@ -479,8 +490,8 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; Options.MCOptions.MCIncrementalLinkerCompatible = CodeGenOpts.IncrementalLinkerCompatible; - Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; + Options.MCOptions.MCNoWarn = CodeGenOpts.NoWarn; Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; Options.MCOptions.ABIName = TargetOpts.ABI; @@ -772,7 +783,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. - TargetMachine::CodeGenFileType CGFT = getCodeGenFileType(Action); + CodeGenFileType CGFT = getCodeGenFileType(Action); // Add ObjC ARC final-cleanup optimizations. This is done as part of the // "codegen" passes so that it isn't run multiple times when there is @@ -848,7 +859,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - CodeGenOpts.EnableSplitLTOUnit); + uint32_t(1)); } PerModulePasses.add(createBitcodeWriterPass( @@ -880,6 +891,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, { PrettyStackTraceString CrashInfo("Per-function optimization"); + llvm::TimeTraceScope TimeScope("PerFunctionPasses"); PerFunctionPasses.doInitialization(); for (Function &F : *TheModule) @@ -890,11 +902,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, { PrettyStackTraceString CrashInfo("Per-module optimization passes"); + llvm::TimeTraceScope TimeScope("PerModulePasses"); PerModulePasses.run(*TheModule); } { PrettyStackTraceString CrashInfo("Code generation"); + llvm::TimeTraceScope TimeScope("CodeGenPasses"); CodeGenPasses.run(*TheModule); } @@ -956,6 +970,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + MPM.addPass(MemorySanitizerPass({})); MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); } @@ -965,6 +980,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(ThreadSanitizerPass()); MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); } } @@ -1050,7 +1066,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; - PassBuilder PB(TM.get(), PTO, PGOOpt); + PassInstrumentationCallbacks PIC; + StandardInstrumentations SI; + SI.registerCallbacks(PIC); + PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC); // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { @@ -1062,6 +1081,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( << PluginFN << toString(PassPlugin.takeError()); } } +#define HANDLE_EXTENSION(Ext) \ + get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); +#include "llvm/Support/Extension.def" LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager); FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager); @@ -1077,7 +1099,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); - MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); @@ -1105,6 +1126,16 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // code generation. MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); + // At -O0, we can still do PGO. Add all the requested passes for + // instrumentation PGO, if requested. + if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) + PB.addPGOInstrPassesForO0( + MPM, CodeGenOpts.DebugPassManager, + /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), + /* IsCS */ false, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + // At -O0 we directly run necessary sanitizer passes. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass())); @@ -1132,16 +1163,23 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); - if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(MemorySanitizerPass({})); + }); PB.registerOptimizerLastEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(MemorySanitizerPass({})); }); - if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + } + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + PB.registerPipelineStartEPCallback( + [](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); }); PB.registerOptimizerLastEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(ThreadSanitizerPass()); }); + } if (LangOpts.Sanitize.has(SanitizerKind::Address)) { PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { MPM.addPass( @@ -1191,6 +1229,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } + if (CodeGenOpts.SanitizeCoverageType || + CodeGenOpts.SanitizeCoverageIndirectCalls || + CodeGenOpts.SanitizeCoverageTraceCmp) { + auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); + MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts)); + } + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress); MPM.addPass(HWAddressSanitizerPass( @@ -1201,8 +1246,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( /*CompileKernel=*/true, /*Recover=*/true)); } - if (CodeGenOpts.OptimizationLevel == 0) + if (CodeGenOpts.OptimizationLevel == 0) { addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); + } } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1239,7 +1285,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - CodeGenOpts.EnableSplitLTOUnit); + uint32_t(1)); } MPM.addPass( BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); @@ -1372,7 +1418,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, OwnedImports.push_back(std::move(*MBOrErr)); } auto AddStream = [&](size_t Task) { - return llvm::make_unique<lto::NativeObjectStream>(std::move(OS)); + return std::make_unique<lto::NativeObjectStream>(std::move(OS)); }; lto::Config Conf; if (CGOpts.SaveTempsFilePrefix != "") { @@ -1392,6 +1438,12 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.OptLevel = CGOpts.OptimizationLevel; initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; + // For historical reasons, loop interleaving is set to mirror setting for loop + // unrolling. + Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops; + Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop; + Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP; // Context sensitive profile. if (CGOpts.hasProfileCSIRInstr()) { @@ -1451,7 +1503,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { - llvm::TimeTraceScope TimeScope("Backend", StringRef("")); + llvm::TimeTraceScope TimeScope("Backend"); std::unique_ptr<llvm::Module> EmptyModule; if (!CGOpts.ThinLTOIndexFile.empty()) { @@ -1484,7 +1536,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // trying to read it. Also for some features, like CFI, we must skip // the compilation as CombinedIndex does not contain all required // information. - EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext()); + EmptyModule = std::make_unique<llvm::Module>("empty", M->getContext()); EmptyModule->setTargetTriple(M->getTargetTriple()); M = EmptyModule.get(); } @@ -1510,129 +1562,14 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, } } -static const char* getSectionNameForBitcode(const Triple &T) { - switch (T.getObjectFormat()) { - case Triple::MachO: - return "__LLVM,__bitcode"; - case Triple::COFF: - case Triple::ELF: - case Triple::Wasm: - case Triple::UnknownObjectFormat: - return ".llvmbc"; - case Triple::XCOFF: - llvm_unreachable("XCOFF is not yet implemented"); - break; - } - llvm_unreachable("Unimplemented ObjectFormatType"); -} - -static const char* getSectionNameForCommandline(const Triple &T) { - switch (T.getObjectFormat()) { - case Triple::MachO: - return "__LLVM,__cmdline"; - case Triple::COFF: - case Triple::ELF: - case Triple::Wasm: - case Triple::UnknownObjectFormat: - return ".llvmcmd"; - case Triple::XCOFF: - llvm_unreachable("XCOFF is not yet implemented"); - break; - } - llvm_unreachable("Unimplemented ObjectFormatType"); -} - // With -fembed-bitcode, save a copy of the llvm IR as data in the // __LLVM,__bitcode section. void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf) { if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) return; - - // Save llvm.compiler.used and remote it. - SmallVector<Constant*, 2> UsedArray; - SmallPtrSet<GlobalValue*, 4> UsedGlobals; - Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0); - GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true); - for (auto *GV : UsedGlobals) { - if (GV->getName() != "llvm.embedded.module" && - GV->getName() != "llvm.cmdline") - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - } - if (Used) - Used->eraseFromParent(); - - // Embed the bitcode for the llvm module. - std::string Data; - ArrayRef<uint8_t> ModuleData; - Triple T(M->getTargetTriple()); - // Create a constant that contains the bitcode. - // In case of embedding a marker, ignore the input Buf and use the empty - // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. - if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) { - if (!isBitcode((const unsigned char *)Buf.getBufferStart(), - (const unsigned char *)Buf.getBufferEnd())) { - // If the input is LLVM Assembly, bitcode is produced by serializing - // the module. Use-lists order need to be perserved in this case. - llvm::raw_string_ostream OS(Data); - llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true); - ModuleData = - ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size()); - } else - // If the input is LLVM bitcode, write the input byte stream directly. - ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(), - Buf.getBufferSize()); - } - llvm::Constant *ModuleConstant = - llvm::ConstantDataArray::get(M->getContext(), ModuleData); - llvm::GlobalVariable *GV = new llvm::GlobalVariable( - *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, - ModuleConstant); - GV->setSection(getSectionNameForBitcode(T)); - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - if (llvm::GlobalVariable *Old = - M->getGlobalVariable("llvm.embedded.module", true)) { - assert(Old->hasOneUse() && - "llvm.embedded.module can only be used once in llvm.compiler.used"); - GV->takeName(Old); - Old->eraseFromParent(); - } else { - GV->setName("llvm.embedded.module"); - } - - // Skip if only bitcode needs to be embedded. - if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) { - // Embed command-line options. - ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()), - CGOpts.CmdArgs.size()); - llvm::Constant *CmdConstant = - llvm::ConstantDataArray::get(M->getContext(), CmdData); - GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true, - llvm::GlobalValue::PrivateLinkage, - CmdConstant); - GV->setSection(getSectionNameForCommandline(T)); - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - if (llvm::GlobalVariable *Old = - M->getGlobalVariable("llvm.cmdline", true)) { - assert(Old->hasOneUse() && - "llvm.cmdline can only be used once in llvm.compiler.used"); - GV->takeName(Old); - Old->eraseFromParent(); - } else { - GV->setName("llvm.cmdline"); - } - } - - if (UsedArray.empty()) - return; - - // Recreate llvm.compiler.used. - ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); - auto *NewUsed = new GlobalVariable( - *M, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); - NewUsed->setSection("llvm.metadata"); + llvm::EmbedBitcodeInModule( + *M, Buf, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker, + CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode, + &CGOpts.CmdArgs); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp index a95cd12c2d64..149982d82790 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp @@ -102,12 +102,13 @@ namespace { llvm::APInt Size( /*numBits=*/32, C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity()); - AtomicTy = C.getConstantArrayType(C.CharTy, Size, ArrayType::Normal, - /*IndexTypeQuals=*/0); + AtomicTy = + C.getConstantArrayType(C.CharTy, Size, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); } AtomicAlign = ValueAlign = lvalue.getAlignment(); } else if (lvalue.isVectorElt()) { - ValueTy = lvalue.getType()->getAs<VectorType>()->getElementType(); + ValueTy = lvalue.getType()->castAs<VectorType>()->getElementType(); ValueSizeInBits = C.getTypeSize(ValueTy); AtomicTy = lvalue.getType(); AtomicSizeInBits = C.getTypeSize(AtomicTy); @@ -138,7 +139,7 @@ namespace { const LValue &getAtomicLValue() const { return LVal; } llvm::Value *getAtomicPointer() const { if (LVal.isSimple()) - return LVal.getPointer(); + return LVal.getPointer(CGF); else if (LVal.isBitField()) return LVal.getBitFieldPointer(); else if (LVal.isVectorElt()) @@ -342,14 +343,14 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const { bool AtomicInfo::emitMemSetZeroIfNecessary() const { assert(LVal.isSimple()); - llvm::Value *addr = LVal.getPointer(); + llvm::Value *addr = LVal.getPointer(CGF); if (!requiresMemSetZero(addr->getType()->getPointerElementType())) return false; CGF.Builder.CreateMemSet( addr, llvm::ConstantInt::get(CGF.Int8Ty, 0), CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(), - LVal.getAlignment().getQuantity()); + LVal.getAlignment().getAsAlign()); return true; } @@ -487,13 +488,36 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, CGF.Builder.SetInsertPoint(ContBB); } +/// Duplicate the atomic min/max operation in conventional IR for the builtin +/// variants that return the new rather than the original value. +static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder, + AtomicExpr::AtomicOp Op, + bool IsSigned, + llvm::Value *OldVal, + llvm::Value *RHS) { + llvm::CmpInst::Predicate Pred; + switch (Op) { + default: + llvm_unreachable("Unexpected min/max operation"); + case AtomicExpr::AO__atomic_max_fetch: + Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT; + break; + case AtomicExpr::AO__atomic_min_fetch: + Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT; + break; + } + llvm::Value *Cmp = Builder.CreateICmp(Pred, OldVal, RHS, "tst"); + return Builder.CreateSelect(Cmp, OldVal, RHS, "newval"); +} + static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, Address Ptr, Address Val1, Address Val2, llvm::Value *IsWeak, llvm::Value *FailureOrder, uint64_t Size, llvm::AtomicOrdering Order, llvm::SyncScope::ID Scope) { llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; - llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; + bool PostOpMinMax = false; + unsigned PostOp = 0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: @@ -587,12 +611,20 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, Op = llvm::AtomicRMWInst::Sub; break; + case AtomicExpr::AO__atomic_min_fetch: + PostOpMinMax = true; + LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min : llvm::AtomicRMWInst::UMin; break; + case AtomicExpr::AO__atomic_max_fetch: + PostOpMinMax = true; + LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max @@ -642,8 +674,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, // For __atomic_*_fetch operations, perform the operation again to // determine the value which was written. llvm::Value *Result = RMWI; - if (PostOp) - Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1); + if (PostOpMinMax) + Result = EmitPostAtomicMinMax(CGF.Builder, E->getOp(), + E->getValueType()->isSignedIntegerType(), + RMWI, LoadVal1); + else if (PostOp) + Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI, + LoadVal1); if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) Result = CGF.Builder.CreateNot(Result); CGF.Builder.CreateStore(Result, Dest); @@ -852,6 +889,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_xor: @@ -865,8 +904,10 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_nand_fetch: - case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: Val1 = EmitValToTemp(*this, E->getVal1()); break; } @@ -915,14 +956,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_and_fetch: case AtomicExpr::AO__atomic_nand_fetch: case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__atomic_xor_fetch: - case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: // For these, only library calls for certain sizes exist. UseOptimizedLibcall = true; break; @@ -969,7 +1014,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) { if (!E->isOpenCL()) return V; - auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace(); + auto AS = PT->castAs<PointerType>()->getPointeeType().getAddressSpace(); if (AS == LangAS::opencl_generic) return V; auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); @@ -990,6 +1035,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { QualType RetTy; bool HaveRetTy = false; llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; + bool PostOpMinMax = false; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: case AtomicExpr::AO__opencl_atomic_init: @@ -1111,6 +1157,10 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__atomic_min_fetch: + PostOpMinMax = true; + LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: LibCallName = E->getValueType()->isSignedIntegerType() @@ -1119,6 +1169,10 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), LoweredMemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__atomic_max_fetch: + PostOpMinMax = true; + LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: LibCallName = E->getValueType()->isSignedIntegerType() @@ -1170,7 +1224,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // PostOp is only needed for the atomic_*_fetch operations, and // thus is only needed for and implemented in the // UseOptimizedLibcall codepath. - assert(UseOptimizedLibcall || !PostOp); + assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax)); RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args); // The value is returned directly from the libcall. @@ -1181,7 +1235,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // provided an out-param. if (UseOptimizedLibcall && Res.getScalarVal()) { llvm::Value *ResVal = Res.getScalarVal(); - if (PostOp) { + if (PostOpMinMax) { + llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal(); + ResVal = EmitPostAtomicMinMax(Builder, E->getOp(), + E->getValueType()->isSignedIntegerType(), + ResVal, LoadVal1); + } else if (PostOp) { llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal(); ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1); } @@ -1569,7 +1628,7 @@ Address AtomicInfo::materializeRValue(RValue rvalue) const { LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType()); AtomicInfo Atomics(CGF, TempLV); Atomics.emitCopyIntoMemory(rvalue); - return TempLV.getAddress(); + return TempLV.getAddress(CGF); } llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const { @@ -1916,8 +1975,8 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, // If this is an aggregate r-value, it should agree in type except // maybe for address-space qualification. assert(!rvalue.isAggregate() || - rvalue.getAggregateAddress().getElementType() - == dest.getAddress().getElementType()); + rvalue.getAggregateAddress().getElementType() == + dest.getAddress(*this).getElementType()); AtomicInfo atomics(*this, dest); LValue LVal = atomics.getAtomicLValue(); @@ -1984,10 +2043,10 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange( // maybe for address-space qualification. assert(!Expected.isAggregate() || Expected.getAggregateAddress().getElementType() == - Obj.getAddress().getElementType()); + Obj.getAddress(*this).getElementType()); assert(!Desired.isAggregate() || Desired.getAggregateAddress().getElementType() == - Obj.getAddress().getElementType()); + Obj.getAddress(*this).getElementType()); AtomicInfo Atomics(*this, Obj); return Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, Failure, @@ -2027,13 +2086,11 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { } // Evaluate the expression directly into the destination. - AggValueSlot slot = AggValueSlot::forLValue(dest, - AggValueSlot::IsNotDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::DoesNotOverlap, - Zeroed ? AggValueSlot::IsZeroed : - AggValueSlot::IsNotZeroed); + AggValueSlot slot = AggValueSlot::forLValue( + dest, *this, AggValueSlot::IsNotDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, + Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed); EmitAggExpr(init, slot); return; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp index c3ee7129d9d7..11f54d1f7fb2 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBlocks.cpp @@ -19,6 +19,7 @@ #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "TargetInfo.h" +#include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" @@ -1076,7 +1077,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type.getNonReferenceType(), VK_LValue, SourceLocation()); - src = EmitDeclRefLValue(&declRef).getAddress(); + src = EmitDeclRefLValue(&declRef).getAddress(*this); }; // For byrefs, we just write the pointer to the byref struct into @@ -1253,8 +1254,7 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() { RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { - const BlockPointerType *BPT = - E->getCallee()->getType()->getAs<BlockPointerType>(); + const auto *BPT = E->getCallee()->getType()->castAs<BlockPointerType>(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); llvm::Value *Func = nullptr; @@ -1483,8 +1483,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr"); Builder.CreateStore(arg, alloc); if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().hasReducedDebugInfo()) { DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, @@ -1656,8 +1655,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const VarDecl *variable = CI.getVariable(); DI->EmitLocation(Builder, variable->getLocation()); - if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().hasReducedDebugInfo()) { const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (capture.isConstant()) { auto addr = LocalDeclMap.find(variable)->second; @@ -1802,7 +1800,7 @@ struct CallBlockRelease final : EHScopeStack::Cleanup { bool CodeGenFunction::cxxDestructorCanThrow(QualType T) { if (const auto *RD = T->getAsCXXRecordDecl()) if (const CXXDestructorDecl *DD = RD->getDestructor()) - return DD->getType()->getAs<FunctionProtoType>()->canThrow(); + return DD->getType()->castAs<FunctionProtoType>()->canThrow(); return false; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuilder.h b/contrib/llvm-project/clang/lib/CodeGen/CGBuilder.h index 68c8c641139f..107c9275431c 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBuilder.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuilder.h @@ -107,7 +107,7 @@ public: llvm::StoreInst *CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile = false) { return CreateAlignedStore(Val, Addr.getPointer(), - Addr.getAlignment().getQuantity(), IsVolatile); + Addr.getAlignment().getAsAlign(), IsVolatile); } using CGBuilderBaseTy::CreateAlignedStore; @@ -273,22 +273,22 @@ public: using CGBuilderBaseTy::CreateMemCpy; llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { - return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(), - Src.getPointer(), Src.getAlignment().getQuantity(), - Size,IsVolatile); + return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getAsAlign(), + Src.getPointer(), Src.getAlignment().getAsAlign(), Size, + IsVolatile); } llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size, bool IsVolatile = false) { - return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(), - Src.getPointer(), Src.getAlignment().getQuantity(), - Size, IsVolatile); + return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getAsAlign(), + Src.getPointer(), Src.getAlignment().getAsAlign(), Size, + IsVolatile); } using CGBuilderBaseTy::CreateMemMove; llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { - return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getQuantity(), - Src.getPointer(), Src.getAlignment().getQuantity(), + return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getAsAlign(), + Src.getPointer(), Src.getAlignment().getAsAlign(), Size, IsVolatile); } @@ -296,7 +296,7 @@ public: llvm::CallInst *CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile = false) { return CreateMemSet(Dest.getPointer(), Value, Size, - Dest.getAlignment().getQuantity(), IsVolatile); + Dest.getAlignment().getAsAlign(), IsVolatile); } using CGBuilderBaseTy::CreatePreserveStructAccessIndex; @@ -309,7 +309,7 @@ public: const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return Address(CreatePreserveStructAccessIndex(Addr.getPointer(), + return Address(CreatePreserveStructAccessIndex(ElTy, Addr.getPointer(), Index, FieldIndex, DbgInfo), Addr.getAlignment().alignmentAtOffset(Offset)); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp index cadce507412b..2d20f92fbb3d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp @@ -20,6 +20,7 @@ #include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/OSLog.h" #include "clang/Basic/TargetBuiltins.h" @@ -30,6 +31,17 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsBPF.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsR600.h" +#include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" @@ -45,7 +57,8 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) { return std::min(High, std::max(Low, Value)); } -static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, + Align AlignmentInBytes) { ConstantInt *Byte; switch (CGF.getLangOpts().getTrivialAutoVarInit()) { case LangOptions::TrivialAutoVarInitKind::Uninitialized: @@ -347,6 +360,58 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { } // Emit a simple mangled intrinsic that has 1 argument and a return type +// matching the argument type. Depending on mode, this may be a constrained +// floating-point intrinsic. +static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, + const CallExpr *E, unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + + if (CGF.Builder.getIsFPConstrained()) { + Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); + return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); + } else { + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, Src0); + } +} + +// Emit an intrinsic that has 2 operands of the same type as its result. +// Depending on mode, this may be a constrained floating-point intrinsic. +static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, + const CallExpr *E, unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + if (CGF.Builder.getIsFPConstrained()) { + Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); + return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); + } else { + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, { Src0, Src1 }); + } +} + +// Emit an intrinsic that has 3 operands of the same type as its result. +// Depending on mode, this may be a constrained floating-point intrinsic. +static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, + const CallExpr *E, unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + + if (CGF.Builder.getIsFPConstrained()) { + Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); + return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); + } else { + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); + } +} + +// Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, @@ -392,15 +457,22 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, } // Emit an intrinsic that has overloaded integer result and fp operand. -static Value *emitFPToIntRoundBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); +static Value * +emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, + unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID) { + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, - {ResultType, Src0->getType()}); - return CGF.Builder.CreateCall(F, Src0); + if (CGF.Builder.getIsFPConstrained()) { + Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, + {ResultType, Src0->getType()}); + return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); + } else { + Function *F = + CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); + return CGF.Builder.CreateCall(F, Src0); + } } /// EmitFAbs - Emit a call to @llvm.fabs(). @@ -749,8 +821,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, // X86 has special BT, BTC, BTR, and BTS instructions that handle the array // indexing operation internally. Use them if possible. - llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch(); - if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) + if (CGF.getTarget().getTriple().isX86()) return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos); // Otherwise, use generic code to load one byte and test the bit. Use all but @@ -843,10 +914,12 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); + Arg1 = CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); } else - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), - llvm::ConstantInt::get(CGF.Int32Ty, 0)); + Arg1 = CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. @@ -1394,9 +1467,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, Value *&RecordPtr, CharUnits Align, llvm::FunctionCallee Func, int Lvl) { - const auto *RT = RType->getAs<RecordType>(); ASTContext &Context = CGF.getContext(); - RecordDecl *RD = RT->getDecl()->getDefinition(); + RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition(); std::string Pad = std::string(Lvl * 4, ' '); Value *GString = @@ -1555,14 +1627,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIceill: case Builtin::BI__builtin_ceil: case Builtin::BI__builtin_ceilf: + case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::ceil, + Intrinsic::experimental_constrained_ceil)); case Builtin::BIcopysign: case Builtin::BIcopysignf: case Builtin::BIcopysignl: case Builtin::BI__builtin_copysign: case Builtin::BI__builtin_copysignf: + case Builtin::BI__builtin_copysignf16: case Builtin::BI__builtin_copysignl: case Builtin::BI__builtin_copysignf128: return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); @@ -1572,30 +1648,40 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIcosl: case Builtin::BI__builtin_cos: case Builtin::BI__builtin_cosf: + case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::cos, + Intrinsic::experimental_constrained_cos)); case Builtin::BIexp: case Builtin::BIexpf: case Builtin::BIexpl: case Builtin::BI__builtin_exp: case Builtin::BI__builtin_expf: + case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::exp, + Intrinsic::experimental_constrained_exp)); case Builtin::BIexp2: case Builtin::BIexp2f: case Builtin::BIexp2l: case Builtin::BI__builtin_exp2: case Builtin::BI__builtin_exp2f: + case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::exp2, + Intrinsic::experimental_constrained_exp2)); case Builtin::BIfabs: case Builtin::BIfabsf: case Builtin::BIfabsl: case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: + case Builtin::BI__builtin_fabsf16: case Builtin::BI__builtin_fabsl: case Builtin::BI__builtin_fabsf128: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); @@ -1605,32 +1691,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfloorl: case Builtin::BI__builtin_floor: case Builtin::BI__builtin_floorf: + case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::floor, + Intrinsic::experimental_constrained_floor)); case Builtin::BIfma: case Builtin::BIfmaf: case Builtin::BIfmal: case Builtin::BI__builtin_fma: case Builtin::BI__builtin_fmaf: + case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: - return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma)); + return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::fma, + Intrinsic::experimental_constrained_fma)); case Builtin::BIfmax: case Builtin::BIfmaxf: case Builtin::BIfmaxl: case Builtin::BI__builtin_fmax: case Builtin::BI__builtin_fmaxf: + case Builtin::BI__builtin_fmaxf16: case Builtin::BI__builtin_fmaxl: - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::maxnum, + Intrinsic::experimental_constrained_maxnum)); case Builtin::BIfmin: case Builtin::BIfminf: case Builtin::BIfminl: case Builtin::BI__builtin_fmin: case Builtin::BI__builtin_fminf: + case Builtin::BI__builtin_fminf16: case Builtin::BI__builtin_fminl: - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::minnum, + Intrinsic::experimental_constrained_minnum)); // fmod() is a special-case. It maps to the frem instruction rather than an // LLVM intrinsic. @@ -1639,6 +1737,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfmodl: case Builtin::BI__builtin_fmod: case Builtin::BI__builtin_fmodf: + case Builtin::BI__builtin_fmodf16: case Builtin::BI__builtin_fmodl: { Value *Arg1 = EmitScalarExpr(E->getArg(0)); Value *Arg2 = EmitScalarExpr(E->getArg(1)); @@ -1650,24 +1749,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIlogl: case Builtin::BI__builtin_log: case Builtin::BI__builtin_logf: + case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::log, + Intrinsic::experimental_constrained_log)); case Builtin::BIlog10: case Builtin::BIlog10f: case Builtin::BIlog10l: case Builtin::BI__builtin_log10: case Builtin::BI__builtin_log10f: + case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::log10, + Intrinsic::experimental_constrained_log10)); case Builtin::BIlog2: case Builtin::BIlog2f: case Builtin::BIlog2l: case Builtin::BI__builtin_log2: case Builtin::BI__builtin_log2f: + case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::log2, + Intrinsic::experimental_constrained_log2)); case Builtin::BInearbyint: case Builtin::BInearbyintf: @@ -1675,55 +1783,75 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_nearbyint: case Builtin::BI__builtin_nearbyintf: case Builtin::BI__builtin_nearbyintl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::nearbyint, + Intrinsic::experimental_constrained_nearbyint)); case Builtin::BIpow: case Builtin::BIpowf: case Builtin::BIpowl: case Builtin::BI__builtin_pow: case Builtin::BI__builtin_powf: + case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow)); + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::pow, + Intrinsic::experimental_constrained_pow)); case Builtin::BIrint: case Builtin::BIrintf: case Builtin::BIrintl: case Builtin::BI__builtin_rint: case Builtin::BI__builtin_rintf: + case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::rint, + Intrinsic::experimental_constrained_rint)); case Builtin::BIround: case Builtin::BIroundf: case Builtin::BIroundl: case Builtin::BI__builtin_round: case Builtin::BI__builtin_roundf: + case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::round, + Intrinsic::experimental_constrained_round)); case Builtin::BIsin: case Builtin::BIsinf: case Builtin::BIsinl: case Builtin::BI__builtin_sin: case Builtin::BI__builtin_sinf: + case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::sin, + Intrinsic::experimental_constrained_sin)); case Builtin::BIsqrt: case Builtin::BIsqrtf: case Builtin::BIsqrtl: case Builtin::BI__builtin_sqrt: case Builtin::BI__builtin_sqrtf: + case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::sqrt, + Intrinsic::experimental_constrained_sqrt)); case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: case Builtin::BI__builtin_trunc: case Builtin::BI__builtin_truncf: + case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::trunc, + Intrinsic::experimental_constrained_trunc)); case Builtin::BIlround: case Builtin::BIlroundf: @@ -1731,7 +1859,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_lround: case Builtin::BI__builtin_lroundf: case Builtin::BI__builtin_lroundl: - return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lround)); + return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( + *this, E, Intrinsic::lround, + Intrinsic::experimental_constrained_lround)); case Builtin::BIllround: case Builtin::BIllroundf: @@ -1739,7 +1869,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_llround: case Builtin::BI__builtin_llroundf: case Builtin::BI__builtin_llroundl: - return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llround)); + return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( + *this, E, Intrinsic::llround, + Intrinsic::experimental_constrained_llround)); case Builtin::BIlrint: case Builtin::BIlrintf: @@ -1747,7 +1879,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_lrint: case Builtin::BI__builtin_lrintf: case Builtin::BI__builtin_lrintl: - return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lrint)); + return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( + *this, E, Intrinsic::lrint, + Intrinsic::experimental_constrained_lrint)); case Builtin::BIllrint: case Builtin::BIllrintf: @@ -1755,7 +1889,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_llrint: case Builtin::BI__builtin_llrintf: case Builtin::BI__builtin_llrintl: - return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llrint)); + return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( + *this, E, Intrinsic::llrint, + Intrinsic::experimental_constrained_llrint)); default: break; @@ -1801,16 +1937,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: - case Builtin::BI__builtin_conjl: { + case Builtin::BI__builtin_conjl: + case Builtin::BIconj: + case Builtin::BIconjf: + case Builtin::BIconjl: { ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); Value *Real = ComplexVal.first; Value *Imag = ComplexVal.second; - Value *Zero = - Imag->getType()->isFPOrFPVectorTy() - ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) - : llvm::Constant::getNullValue(Imag->getType()); - - Imag = Builder.CreateFSub(Zero, Imag, "sub"); + Imag = Builder.CreateFNeg(Imag, "neg"); return RValue::getComplex(std::make_pair(Real, Imag)); } case Builtin::BI__builtin_creal: @@ -2026,11 +2160,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); - unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); + if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) + AlignmentCI = ConstantInt::get(AlignmentCI->getType(), + llvm::Value::MaximumAlignment); EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), - Alignment, OffsetValue); + AlignmentCI, OffsetValue); return RValue::get(PtrValue); } case Builtin::BI__assume: @@ -2077,10 +2213,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_constant_p: { llvm::Type *ResultType = ConvertType(E->getType()); - if (CGM.getCodeGenOpts().OptimizationLevel == 0) - // At -O0, we don't perform inlining, so we don't need to delay the - // processing. - return RValue::get(ConstantInt::get(ResultType, 0)); const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); @@ -2131,7 +2263,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { @@ -2159,13 +2291,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_powi: case Builtin::BI__builtin_powif: - case Builtin::BI__builtin_powil: { - Value *Base = EmitScalarExpr(E->getArg(0)); - Value *Exponent = EmitScalarExpr(E->getArg(1)); - llvm::Type *ArgType = Base->getType(); - Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); - return RValue::get(Builder.CreateCall(F, {Base, Exponent})); - } + case Builtin::BI__builtin_powil: + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::powi, Intrinsic::experimental_constrained_powi)); case Builtin::BI__builtin_isgreater: case Builtin::BI__builtin_isgreaterequal: @@ -2339,10 +2467,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Size = EmitScalarExpr(E->getArg(0)); const TargetInfo &TI = getContext().getTargetInfo(); // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. - unsigned SuitableAlignmentInBytes = + const Align SuitableAlignmentInBytes = CGM.getContext() .toCharUnitsFromBits(TI.getSuitableAlign()) - .getQuantity(); + .getAsAlign(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); @@ -2354,8 +2482,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); - unsigned AlignmentInBytes = - CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); + const Align AlignmentInBytes = + CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); initializeAlloca(*this, AI, Size, AlignmentInBytes); @@ -2372,7 +2500,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BImemcpy: - case Builtin::BI__builtin_memcpy: { + case Builtin::BI__builtin_memcpy: + case Builtin::BImempcpy: + case Builtin::BI__builtin_mempcpy: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); @@ -2381,7 +2511,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 1); Builder.CreateMemCpy(Dest, Src, SizeVal, false); - return RValue::get(Dest.getPointer()); + if (BuiltinID == Builtin::BImempcpy || + BuiltinID == Builtin::BI__builtin_mempcpy) + return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal)); + else + return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin_char_memchr: @@ -2556,7 +2690,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); + Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2637,9 +2771,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Buf = EmitPointerWithAlignment(E->getArg(0)); // Store the frame pointer to the setjmp buffer. - Value *FrameAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), - ConstantInt::get(Int32Ty, 0)); + Value *FrameAddr = Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), + ConstantInt::get(Int32Ty, 0)); Builder.CreateStore(FrameAddr, Buf); // Store the stack pointer to the setjmp buffer. @@ -3088,6 +3222,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), ConvertType(E->getType()))); } + case Builtin::BI__warn_memset_zero_len: + return RValue::getIgnored(); case Builtin::BI__annotation: { // Re-encode each wide string to UTF8 and make an MDString. SmallVector<Metadata *, 1> Strings; @@ -3348,7 +3484,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Carry); } case Builtin::BI__builtin_addressof: - return RValue::get(EmitLValue(E->getArg(0)).getPointer()); + return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__builtin_operator_new: return EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); @@ -3356,6 +3492,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); + case Builtin::BI__builtin_is_aligned: + return EmitBuiltinIsAligned(E); + case Builtin::BI__builtin_align_up: + return EmitBuiltinAlignTo(E, true); + case Builtin::BI__builtin_align_down: + return EmitBuiltinAlignTo(E, false); + case Builtin::BI__noop: // __noop always evaluates to an integer literal zero. return RValue::get(ConstantInt::get(IntTy, 0)); @@ -3673,13 +3816,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIget_pipe_num_packets: case Builtin::BIget_pipe_max_packets: { const char *BaseName; - const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>(); + const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>(); if (BuiltinID == Builtin::BIget_pipe_num_packets) BaseName = "__get_pipe_num_packets"; else BaseName = "__get_pipe_max_packets"; - auto Name = std::string(BaseName) + - std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); + std::string Name = std::string(BaseName) + + std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); // Building the generic function prototype. Value *Arg0 = EmitScalarExpr(E->getArg(0)); @@ -3731,8 +3874,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); - llvm::Value *Range = NDRangeL.getAddress().getPointer(); - llvm::Type *RangeTy = NDRangeL.getAddress().getType(); + llvm::Value *Range = NDRangeL.getAddress(*this).getPointer(); + llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: @@ -3751,7 +3894,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B; - B.addByValAttr(NDRangeL.getAddress().getElementType()); + B.addByValAttr(NDRangeL.getAddress(*this).getElementType()); llvm::AttributeList ByValAttrSet = llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); @@ -3769,7 +3912,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { llvm::APInt ArraySize(32, NumArgs - First); QualType SizeArrayTy = getContext().getConstantArrayType( - getContext().getSizeType(), ArraySize, ArrayType::Normal, + getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); @@ -3936,7 +4079,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); - llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); + llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); @@ -3977,6 +4120,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: + case Builtin::BI__builtin_canonicalizef16: case Builtin::BI__builtin_canonicalizel: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); @@ -4197,9 +4341,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(V); } - // See if we have a target specific builtin that needs to be lowered. - if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) - return RValue::get(V); + // Some target-specific builtins can have aggregate return values, e.g. + // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force + // ReturnValue to be non-null, so that the target-specific emission code can + // always just emit into it. + TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); + if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { + Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); + ReturnValue = ReturnValueSlot(DestPtr, false); + } + + // Now see if we can emit a target-specific builtin. + if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { + switch (EvalKind) { + case TEK_Scalar: + return RValue::get(V); + case TEK_Aggregate: + return RValue::getAggregate(ReturnValue.getValue(), + ReturnValue.isVolatile()); + case TEK_Complex: + llvm_unreachable("No current target builtin returns complex"); + } + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + } ErrorUnsupported(E, "builtin function"); @@ -4209,16 +4373,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: - return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch); + return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch); case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + return CGF->EmitBPFBuiltinExpr(BuiltinID, E); case llvm::Triple::x86: case llvm::Triple::x86_64: return CGF->EmitX86BuiltinExpr(BuiltinID, E); @@ -4245,15 +4414,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, } Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { + const CallExpr *E, + ReturnValueSlot ReturnValue) { if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { assert(getContext().getAuxTargetInfo() && "Missing aux target info"); return EmitTargetArchBuiltinExpr( this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, - getContext().getAuxTargetInfo()->getTriple().getArch()); + ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); } - return EmitTargetArchBuiltinExpr(this, BuiltinID, E, + return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue, getTarget().getTriple().getArch()); } @@ -4428,6 +4598,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), + NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, arm_neon_vacge, 0), NEONMAP1(vcageq_v, arm_neon_vacge, 0), NEONMAP1(vcagt_v, arm_neon_vacgt, 0), @@ -4595,10 +4769,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), - NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), - NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), - NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), - NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), + NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), + NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), @@ -4616,8 +4790,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), - NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), - NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), @@ -4701,6 +4875,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, aarch64_neon_facge, 0), NEONMAP1(vcageq_v, aarch64_neon_facge, 0), NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), @@ -5430,6 +5608,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); } + case NEON::BI__builtin_neon_vcvtx_f32_v: { + llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty}; + return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); + + } case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vextq_v: { int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); @@ -5645,7 +5828,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); // TODO: Currently in AArch32 mode the pointer operand comes first, whereas // in AArch64 it comes last. We may want to stick to one or another. - if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) { + if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || + Arch == llvm::Triple::aarch64_32) { llvm::Type *Tys[2] = { VTy, PTy }; std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); @@ -5981,6 +6165,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { if (auto Hint = GetValueForARMHint(BuiltinID)) return Hint; @@ -6019,7 +6204,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6029,6 +6214,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == ARM::BI__builtin_arm_cls) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); + } + if (BuiltinID == ARM::BI__builtin_arm_cls64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, + "cls"); + } + if (BuiltinID == ARM::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); @@ -6297,6 +6492,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); } + // Deal with MVE builtins + if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) + return Result; + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -6746,6 +6945,152 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } +template<typename Integer> +static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { + llvm::APSInt IntVal; + bool IsConst = E->isIntegerConstantExpr(IntVal, Context); + assert(IsConst && "Sema should have checked this was a constant"); + (void)IsConst; + return IntVal.getExtValue(); +} + +static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, + llvm::Type *T, bool Unsigned) { + // Helper function called by Tablegen-constructed ARM MVE builtin codegen, + // which finds it convenient to specify signed/unsigned as a boolean flag. + return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); +} + +static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, + uint32_t Shift, bool Unsigned) { + // MVE helper function for integer shift right. This must handle signed vs + // unsigned, and also deal specially with the case where the shift count is + // equal to the lane size. In LLVM IR, an LShr with that parameter would be + // undefined behavior, but in MVE it's legal, so we must convert it to code + // that is not undefined in IR. + unsigned LaneBits = + V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); + if (Shift == LaneBits) { + // An unsigned shift of the full lane size always generates zero, so we can + // simply emit a zero vector. A signed shift of the full lane size does the + // same thing as shifting by one bit fewer. + if (Unsigned) + return llvm::Constant::getNullValue(V->getType()); + else + --Shift; + } + return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); +} + +static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { + // MVE-specific helper function for a vector splat, which infers the element + // count of the output vector by knowing that MVE vectors are all 128 bits + // wide. + unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits(); + return Builder.CreateVectorSplat(Elements, V); +} + +Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + ReturnValueSlot ReturnValue, + llvm::Triple::ArchType Arch) { + enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; + Intrinsic::ID IRIntr; + unsigned NumVectors; + + // Code autogenerated by Tablegen will handle all the simple builtins. + switch (BuiltinID) { + #include "clang/Basic/arm_mve_builtin_cg.inc" + + // If we didn't match an MVE builtin id at all, go back to the + // main EmitARMBuiltinExpr. + default: + return nullptr; + } + + // Anything that breaks from that switch is an MVE builtin that + // needs handwritten code to generate. + + switch (CustomCodeGenType) { + + case CustomCodeGen::VLD24: { + llvm::SmallVector<Value *, 4> Ops; + llvm::SmallVector<llvm::Type *, 4> Tys; + + auto MvecCType = E->getType(); + auto MvecLType = ConvertType(MvecCType); + assert(MvecLType->isStructTy() && + "Return type for vld[24]q should be a struct"); + assert(MvecLType->getStructNumElements() == 1 && + "Return-type struct for vld[24]q should have one element"); + auto MvecLTypeInner = MvecLType->getStructElementType(0); + assert(MvecLTypeInner->isArrayTy() && + "Return-type struct for vld[24]q should contain an array"); + assert(MvecLTypeInner->getArrayNumElements() == NumVectors && + "Array member of return-type struct vld[24]q has wrong length"); + auto VecLType = MvecLTypeInner->getArrayElementType(); + + Tys.push_back(VecLType); + + auto Addr = E->getArg(0); + Ops.push_back(EmitScalarExpr(Addr)); + Tys.push_back(ConvertType(Addr->getType())); + + Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); + Value *LoadResult = Builder.CreateCall(F, Ops); + Value *MvecOut = UndefValue::get(MvecLType); + for (unsigned i = 0; i < NumVectors; ++i) { + Value *Vec = Builder.CreateExtractValue(LoadResult, i); + MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); + } + + if (ReturnValue.isNull()) + return MvecOut; + else + return Builder.CreateStore(MvecOut, ReturnValue.getValue()); + } + + case CustomCodeGen::VST24: { + llvm::SmallVector<Value *, 4> Ops; + llvm::SmallVector<llvm::Type *, 4> Tys; + + auto Addr = E->getArg(0); + Ops.push_back(EmitScalarExpr(Addr)); + Tys.push_back(ConvertType(Addr->getType())); + + auto MvecCType = E->getArg(1)->getType(); + auto MvecLType = ConvertType(MvecCType); + assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); + assert(MvecLType->getStructNumElements() == 1 && + "Data-type struct for vst2q should have one element"); + auto MvecLTypeInner = MvecLType->getStructElementType(0); + assert(MvecLTypeInner->isArrayTy() && + "Data-type struct for vst2q should contain an array"); + assert(MvecLTypeInner->getArrayNumElements() == NumVectors && + "Array member of return-type struct vld[24]q has wrong length"); + auto VecLType = MvecLTypeInner->getArrayElementType(); + + Tys.push_back(VecLType); + + AggValueSlot MvecSlot = CreateAggTemp(MvecCType); + EmitAggExpr(E->getArg(1), MvecSlot); + auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); + for (unsigned i = 0; i < NumVectors; i++) + Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); + + Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); + Value *ToReturn = nullptr; + for (unsigned i = 0; i < NumVectors; i++) { + Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); + ToReturn = Builder.CreateCall(F, Ops); + Ops.pop_back(); + } + return ToReturn; + } + } + llvm_unreachable("unknown custom codegen type."); +} + static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, @@ -6958,7 +7303,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6977,6 +7322,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == AArch64::BI__builtin_arm_cls) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, + "cls"); + } + if (BuiltinID == AArch64::BI__builtin_arm_cls64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, + "cls"); + } + if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { assert((getContext().getTypeSize(E->getType()) == 32) && "__jcvt of unusual size!"); @@ -7293,12 +7649,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { - llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } if (BuiltinID == AArch64::BI__builtin_sponentry) { - llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); return Builder.CreateCall(F); } @@ -9276,6 +9633,37 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } } +Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + assert(BuiltinID == BPF::BI__builtin_preserve_field_info && + "unexpected ARM builtin"); + + const Expr *Arg = E->getArg(0); + bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; + + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g"); + return IsBitField ? EmitLValue(Arg).getBitFieldPointer() + : EmitLValue(Arg).getPointer(*this); + } + + // Enable underlying preserve_*_access_index() generation. + bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; + IsInPreservedAIRegion = true; + Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() + : EmitLValue(Arg).getPointer(*this); + IsInPreservedAIRegion = OldIsInPreservedAIRegion; + + ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); + Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); + + // Built the IR for the preserve_field_info intrinsic. + llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, + {FieldAddr->getType()}); + return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); +} + llvm::Value *CodeGenFunction:: BuildVector(ArrayRef<llvm::Value*> Ops) { assert((Ops.size() & (Ops.size() - 1)) == 0 && @@ -10034,7 +10422,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { @@ -11169,7 +11557,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Unaligned nontemporal store of the scalar value. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - SI->setAlignment(1); + SI->setAlignment(llvm::Align::None()); return SI; } // Rotate is a special case of funnel shift - 1st 2 args are the same. @@ -12113,13 +12501,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_AddressOfReturnAddress: { - Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + Function *F = + CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } case X86::BI__stosb: { // We treat __stosb as a volatile memset - it may not generate "rep stosb" // instruction, but it will create a memset that won't be optimized away. - return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); + return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align::None(), true); } case X86::BI__ud2: // llvm.trap makes a ud2a instruction on x86. @@ -12937,9 +13326,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); + return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); } case SystemZ::BI__builtin_s390_vfnmasb: case SystemZ::BI__builtin_s390_vfnmadb: { @@ -12947,9 +13335,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); + return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); } case SystemZ::BI__builtin_s390_vfnmssb: case SystemZ::BI__builtin_s390_vfnmsdb: { @@ -12957,10 +13344,9 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); - return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); + Value *NegZ = Builder.CreateFNeg(Z, "neg"); + return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); } case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { @@ -12973,9 +13359,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, case SystemZ::BI__builtin_s390_vflndb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); - Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); - return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); + return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg"); } case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: { @@ -13877,6 +14262,96 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { } } +namespace { +struct BuiltinAlignArgs { + llvm::Value *Src = nullptr; + llvm::Type *SrcType = nullptr; + llvm::Value *Alignment = nullptr; + llvm::Value *Mask = nullptr; + llvm::IntegerType *IntType = nullptr; + + BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { + QualType AstType = E->getArg(0)->getType(); + if (AstType->isArrayType()) + Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + else + Src = CGF.EmitScalarExpr(E->getArg(0)); + SrcType = Src->getType(); + if (SrcType->isPointerTy()) { + IntType = IntegerType::get( + CGF.getLLVMContext(), + CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); + } else { + assert(SrcType->isIntegerTy()); + IntType = cast<llvm::IntegerType>(SrcType); + } + Alignment = CGF.EmitScalarExpr(E->getArg(1)); + Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment"); + auto *One = llvm::ConstantInt::get(IntType, 1); + Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); + } +}; +} // namespace + +/// Generate (x & (y-1)) == 0. +RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { + BuiltinAlignArgs Args(E, *this); + llvm::Value *SrcAddress = Args.Src; + if (Args.SrcType->isPointerTy()) + SrcAddress = + Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr"); + return RValue::get(Builder.CreateICmpEQ( + Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"), + llvm::Constant::getNullValue(Args.IntType), "is_aligned")); +} + +/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. +/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the +/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). +/// TODO: actually use ptrmask once most optimization passes know about it. +RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { + BuiltinAlignArgs Args(E, *this); + llvm::Value *SrcAddr = Args.Src; + if (Args.Src->getType()->isPointerTy()) + SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); + llvm::Value *SrcForMask = SrcAddr; + if (AlignUp) { + // When aligning up we have to first add the mask to ensure we go over the + // next alignment value and then align down to the next valid multiple. + // By adding the mask, we ensure that align_up on an already aligned + // value will not change the value. + SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); + } + // Invert the mask to only clear the lower bits. + llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); + llvm::Value *Result = + Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); + if (Args.Src->getType()->isPointerTy()) { + /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well. + // Result = Builder.CreateIntrinsic( + // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType}, + // {SrcForMask, NegatedMask}, nullptr, "aligned_result"); + Result->setName("aligned_intptr"); + llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); + // The result must point to the same underlying allocation. This means we + // can use an inbounds GEP to enable better optimization. + Value *Base = EmitCastToVoidPtr(Args.Src); + if (getLangOpts().isSignedOverflowDefined()) + Result = Builder.CreateGEP(Base, Difference, "aligned_result"); + else + Result = EmitCheckedInBoundsGEP(Base, Difference, + /*SignedIndices=*/true, + /*isSubtraction=*/!AlignUp, + E->getExprLoc(), "aligned_result"); + Result = Builder.CreatePointerCast(Result, Args.SrcType); + // Emit an alignment assumption to ensure that the new alignment is + // propagated to loads/stores, etc. + EmitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); + } + assert(Result->getType() == Args.SrcType); + return RValue::get(Result); +} + Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { @@ -13924,6 +14399,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); return Builder.CreateCall(Callee); } + case WebAssembly::BI__builtin_wasm_tls_align: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); + return Builder.CreateCall(Callee); + } + case WebAssembly::BI__builtin_wasm_tls_base: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); @@ -13954,6 +14438,26 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } + case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: @@ -13998,6 +14502,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_swizzle_v8x16: { + Value *Src = EmitScalarExpr(E->getArg(0)); + Value *Indices = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); + return Builder.CreateCall(Callee, {Src, Indices}); + } case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: @@ -14090,6 +14600,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: + case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); @@ -14098,6 +14616,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } + case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_any_true_i8x16: case WebAssembly::BI__builtin_wasm_any_true_i16x8: case WebAssembly::BI__builtin_wasm_any_true_i32x4: @@ -14139,7 +14663,86 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } - + case WebAssembly::BI__builtin_wasm_qfma_f32x4: + case WebAssembly::BI__builtin_wasm_qfms_f32x4: + case WebAssembly::BI__builtin_wasm_qfma_f64x2: + case WebAssembly::BI__builtin_wasm_qfms_f64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_qfma_f32x4: + case WebAssembly::BI__builtin_wasm_qfma_f64x2: + IntNo = Intrinsic::wasm_qfma; + break; + case WebAssembly::BI__builtin_wasm_qfms_f32x4: + case WebAssembly::BI__builtin_wasm_qfms_f64x2: + IntNo = Intrinsic::wasm_qfms; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } + case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: + case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { + Value *Low = EmitScalarExpr(E->getArg(0)); + Value *High = EmitScalarExpr(E->getArg(1)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: + IntNo = Intrinsic::wasm_narrow_signed; + break; + case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: + IntNo = Intrinsic::wasm_narrow_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = + CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); + return Builder.CreateCall(Callee, {Low, High}); + } + case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_low_signed; + break; + case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_high_signed; + break; + case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_low_unsigned; + break; + case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_high_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = + CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()}); + return Builder.CreateCall(Callee, Vec); + } default: return nullptr; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp index 4d4038dae9cf..5c5cbaff0252 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp @@ -93,7 +93,7 @@ private: GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); } if (Alignment) - GV->setAlignment(Alignment); + GV->setAlignment(llvm::Align(Alignment)); return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), ConstStr.getPointer(), Zeros); @@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), - CudaFeature::CUDA_USES_NEW_LAUNCH)) + CudaFeature::CUDA_USES_NEW_LAUNCH) || + CGF.getLangOpts().HIPUseNewLaunchAPI) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); @@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); - // Lookup cudaLaunchKernel function. + // Lookup cudaLaunchKernel/hipLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); + // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // hipStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + auto LaunchKernelName = addPrefixToName("LaunchKernel"); IdentifierInfo &cudaLaunchKernelII = - CGM.getContext().Idents.get("cudaLaunchKernel"); + CGM.getContext().Idents.get(LaunchKernelName); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) @@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), - "Can't find declaration for cudaLaunchKernel()"); + "Can't find declaration for " + LaunchKernelName); return; } // Create temporary dim3 grid_dim, block_dim. @@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), - "__cudaPopCallConfiguration"); + addUnderscoredPrefixToName("PopCallConfiguration")); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), @@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = - CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGM.CreateRuntimeFunction(FTy, LaunchKernelName); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); @@ -623,7 +628,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { Linkage, /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); - GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); // Prevent the weak symbol in different shared libraries being merged. if (Linkage != llvm::GlobalValue::InternalLinkage) GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -664,7 +669,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle"); - GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCXX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCXX.cpp index 6d903a0d09e2..1928e0df3809 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCXX.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCXX.cpp @@ -12,10 +12,11 @@ // We might split this into multiple files if it gets too unwieldy -#include "CodeGenModule.h" #include "CGCXXABI.h" #include "CodeGenFunction.h" +#include "CodeGenModule.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -80,7 +81,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // Skip base classes with trivial destructors. const auto *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); if (Base->hasTrivialDestructor()) continue; // If we've already found a base class with a non-trivial @@ -104,8 +105,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // Give up if the calling conventions don't match. We could update the call, // but it is probably not worth it. const CXXDestructorDecl *BaseD = UniqueBase->getDestructor(); - if (BaseD->getType()->getAs<FunctionType>()->getCallConv() != - D->getType()->getAs<FunctionType>()->getCallConv()) + if (BaseD->getType()->castAs<FunctionType>()->getCallConv() != + D->getType()->castAs<FunctionType>()->getCallConv()) return true; GlobalDecl AliasDecl(D, Dtor_Base); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.cpp index 041c0f8959fd..7ada4032b3ee 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.cpp @@ -13,6 +13,7 @@ #include "CGCXXABI.h" #include "CGCleanup.h" +#include "clang/AST/Attr.h" using namespace clang; using namespace CodeGen; @@ -46,8 +47,8 @@ CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer( ThisPtrForCall = This.getPointer(); const FunctionProtoType *FPT = MPT->getPointeeType()->getAs<FunctionProtoType>(); - const CXXRecordDecl *RD = - cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); + const auto *RD = + cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); llvm::Constant *FnPtr = llvm::Constant::getNullValue(FTy->getPointerTo()); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.h index 3a9c3b347439..bff49be7a3c4 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCXXABI.h @@ -577,7 +577,7 @@ public: // Determine if references to thread_local global variables can be made // directly or require access through a thread wrapper function. - virtual bool usesThreadWrapperFunction() const = 0; + virtual bool usesThreadWrapperFunction(const VarDecl *VD) const = 0; /// Emit a reference to a non-local thread_local variable (including /// triggering the initialization of all thread_local variables in its diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp index cf8024550eee..e4803fde230f 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp @@ -19,6 +19,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -28,7 +29,6 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -36,6 +36,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Transforms/Utils/Local.h" using namespace clang; using namespace CodeGen; @@ -903,7 +904,7 @@ struct NoExpansion : TypeExpansion { static std::unique_ptr<TypeExpansion> getTypeExpansion(QualType Ty, const ASTContext &Context) { if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { - return llvm::make_unique<ConstantArrayExpansion>( + return std::make_unique<ConstantArrayExpansion>( AT->getElementType(), AT->getSize().getZExtValue()); } if (const RecordType *RT = Ty->getAs<RecordType>()) { @@ -947,13 +948,13 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) { Fields.push_back(FD); } } - return llvm::make_unique<RecordExpansion>(std::move(Bases), + return std::make_unique<RecordExpansion>(std::move(Bases), std::move(Fields)); } if (const ComplexType *CT = Ty->getAs<ComplexType>()) { - return llvm::make_unique<ComplexExpansion>(CT->getElementType()); + return std::make_unique<ComplexExpansion>(CT->getElementType()); } - return llvm::make_unique<NoExpansion>(); + return std::make_unique<NoExpansion>(); } static int getExpansionSize(QualType Ty, const ASTContext &Context) { @@ -1020,13 +1021,13 @@ void CodeGenFunction::ExpandTypeFromArgs( auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) { - forConstantArrayExpansion(*this, CAExp, LV.getAddress(), - [&](Address EltAddr) { - LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy); - ExpandTypeFromArgs(CAExp->EltTy, LV, AI); - }); + forConstantArrayExpansion( + *this, CAExp, LV.getAddress(*this), [&](Address EltAddr) { + LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy); + ExpandTypeFromArgs(CAExp->EltTy, LV, AI); + }); } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) { - Address This = LV.getAddress(); + Address This = LV.getAddress(*this); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. Address Base = @@ -1047,8 +1048,13 @@ void CodeGenFunction::ExpandTypeFromArgs( auto imagValue = *AI++; EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /*init*/ true); } else { + // Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a + // primitive store. assert(isa<NoExpansion>(Exp.get())); - EmitStoreThroughLValue(RValue::get(*AI++), LV); + if (LV.isBitField()) + EmitStoreThroughLValue(RValue::get(*AI++), LV); + else + EmitStoreOfScalar(*AI++, LV); } } @@ -1057,7 +1063,7 @@ void CodeGenFunction::ExpandTypeToArgs( SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) { auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) { - Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress(*this) : Arg.getKnownRValue().getAggregateAddress(); forConstantArrayExpansion( *this, CAExp, Addr, [&](Address EltAddr) { @@ -1068,7 +1074,7 @@ void CodeGenFunction::ExpandTypeToArgs( IRCallArgPos); }); } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) { - Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress(*this) : Arg.getKnownRValue().getAggregateAddress(); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. @@ -1305,6 +1311,15 @@ static void CreateCoercedStore(llvm::Value *Src, DstTy = Dst.getType()->getElementType(); } + llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy); + llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy); + if (SrcPtrTy && DstPtrTy && + SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) { + Src = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DstTy); + CGF.Builder.CreateStore(Src, Dst, DstIsVolatile); + return; + } + // If the source and destination are integer or pointer types, just do an // extension or truncation to the desired type. if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) && @@ -1713,24 +1728,28 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (!CodeGenOpts.TrapFuncName.empty()) FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); } else { - // Attributes that should go on the function, but not the call site. - if (!CodeGenOpts.DisableFPElim) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - } else if (CodeGenOpts.OmitLeafFramePointer) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } else { - FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + StringRef FpKind; + switch (CodeGenOpts.getFramePointer()) { + case CodeGenOptions::FramePointerKind::None: + FpKind = "none"; + break; + case CodeGenOptions::FramePointerKind::NonLeaf: + FpKind = "non-leaf"; + break; + case CodeGenOptions::FramePointerKind::All: + FpKind = "all"; + break; } + FuncAttrs.addAttribute("frame-pointer", FpKind); FuncAttrs.addAttribute("less-precise-fpmad", llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); if (CodeGenOpts.NullPointerIsValid) FuncAttrs.addAttribute("null-pointer-is-valid", "true"); - if (!CodeGenOpts.FPDenormalMode.empty()) - FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode); + if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::Invalid) + FuncAttrs.addAttribute("denormal-fp-math", + llvm::denormalModeName(CodeGenOpts.FPDenormalMode)); FuncAttrs.addAttribute("no-trapping-math", llvm::toStringRef(CodeGenOpts.NoTrappingMath)); @@ -1850,11 +1869,30 @@ void CodeGenModule::ConstructAttributeList( if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( getContext(), FuncAttrs, Fn->getType()->getAs<FunctionProtoType>()); - // Don't use [[noreturn]] or _Noreturn for a call to a virtual function. - // These attributes are not inherited by overloads. const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn); - if (Fn->isNoReturn() && !(AttrOnCallSite && MD && MD->isVirtual())) - FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + const bool IsVirtualCall = MD && MD->isVirtual(); + // Don't use [[noreturn]], _Noreturn or [[no_builtin]] for a call to a + // virtual function. These attributes are not inherited by overloads. + if (!(AttrOnCallSite && IsVirtualCall)) { + if (Fn->isNoReturn()) + FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + + const auto *NBA = Fn->getAttr<NoBuiltinAttr>(); + bool HasWildcard = NBA && llvm::is_contained(NBA->builtinNames(), "*"); + if (getLangOpts().NoBuiltin || HasWildcard) + FuncAttrs.addAttribute("no-builtins"); + else { + auto AddNoBuiltinAttr = [&FuncAttrs](StringRef BuiltinName) { + SmallString<32> AttributeName; + AttributeName += "no-builtin-"; + AttributeName += BuiltinName; + FuncAttrs.addAttribute(AttributeName); + }; + llvm::for_each(getLangOpts().NoBuiltinFuncs, AddNoBuiltinAttr); + if (NBA) + llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr); + } + } } // 'const', 'pure' and 'noalias' attributed functions are also nounwind. @@ -2123,8 +2161,8 @@ void CodeGenModule::ConstructAttributeList( if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { auto info = getContext().getTypeInfoInChars(PTy); Attrs.addDereferenceableAttr(info.first.getQuantity()); - Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(), - info.second.getQuantity())); + Attrs.addAttribute(llvm::Attribute::getWithAlignment( + getLLVMContext(), info.second.getAsAlign())); } break; } @@ -3089,8 +3127,8 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, // Deactivate the cleanup for the callee-destructed param that was pushed. if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk && - type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee() && - type.isDestructedType()) { + type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee() && + param->needsDestruction(getContext())) { EHScopeStack::stable_iterator cleanup = CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param)); assert(cleanup.isValid() && @@ -3109,7 +3147,7 @@ static bool isProvablyNull(llvm::Value *addr) { static void emitWriteback(CodeGenFunction &CGF, const CallArgList::Writeback &writeback) { const LValue &srcLV = writeback.Source; - Address srcAddr = srcLV.getAddress(); + Address srcAddr = srcLV.getAddress(CGF); assert(!isProvablyNull(srcAddr.getPointer()) && "shouldn't have writeback for provably null argument"); @@ -3217,7 +3255,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args, CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType(); srcLV = CGF.MakeAddrLValue(srcAddr, srcAddrType); } - Address srcAddr = srcLV.getAddress(); + Address srcAddr = srcLV.getAddress(CGF); // The dest and src types don't necessarily match in LLVM terms // because of the crazy ObjC compatibility rules. @@ -3531,7 +3569,7 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const { CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap, LV.isVolatile()); IsUsed = true; - return RValue::getAggregate(Copy.getAddress()); + return RValue::getAggregate(Copy.getAddress(CGF)); } void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { @@ -3541,7 +3579,7 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { else if (!HasLV && RV.isComplex()) CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true); else { - auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress(); + auto Addr = HasLV ? LV.getAddress(CGF) : RV.getAggregateAddress(); LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty); // We assume that call args are never copied into subobjects. CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap, @@ -3574,7 +3612,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, // However, we still have to push an EH-only cleanup in case we unwind before // we make it to the call. if (HasAggregateEvalKind && - type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { + type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { // If we're using inalloca, use the argument memory. Otherwise, use a // temporary. AggValueSlot Slot; @@ -3838,7 +3876,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, AI = CreateTempAlloca(ArgStruct, "argmem"); } auto Align = CallInfo.getArgStructAlignment(); - AI->setAlignment(Align.getQuantity()); + AI->setAlignment(Align.getAsAlign()); AI->setUsedWithInAlloca(true); assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca()); ArgMemory = Address(AI, Align); @@ -3875,6 +3913,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address swiftErrorTemp = Address::invalid(); Address swiftErrorArg = Address::invalid(); + // When passing arguments using temporary allocas, we need to add the + // appropriate lifetime markers. This vector keeps track of all the lifetime + // markers that need to be ended right after the call. + SmallVector<CallLifetimeEnd, 2> CallLifetimeEndAfterCall; + // Translate all of the arguments as necessary to match the IR lowering. assert(CallInfo.arg_size() == CallArgs.size() && "Mismatch between function signature & arguments."); @@ -3899,7 +3942,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (I->isAggregate()) { // Replace the placeholder with the appropriate argument slot GEP. Address Addr = I->hasLValue() - ? I->getKnownLValue().getAddress() + ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); llvm::Instruction *Placeholder = cast<llvm::Instruction>(Addr.getPointer()); @@ -3944,7 +3987,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // 3. If the argument is byval, but RV is not located in default // or alloca address space. Address Addr = I->hasLValue() - ? I->getKnownLValue().getAddress() + ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); llvm::Value *V = Addr.getPointer(); CharUnits Align = ArgInfo.getIndirectAlign(); @@ -3965,9 +4008,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto LV = I->getKnownLValue(); auto AS = LV.getAddressSpace(); - if ((!ArgInfo.getIndirectByVal() && - (LV.getAlignment() >= - getContext().getTypeAlignInChars(I->Ty)))) { + if (!ArgInfo.getIndirectByVal() || + (LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) { NeedCopy = true; } if (!getLangOpts().OpenCL) { @@ -3991,6 +4033,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address AI = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); IRCallArgs[FirstIRArg] = AI.getPointer(); + + // Emit lifetime markers for the temporary alloca. + uint64_t ByvalTempElementSize = + CGM.getDataLayout().getTypeAllocSize(AI.getElementType()); + llvm::Value *LifetimeSize = + EmitLifetimeStart(ByvalTempElementSize, AI.getPointer()); + + // Add cleanup code to emit the end lifetime marker after the call. + if (LifetimeSize) // In case we disabled lifetime markers. + CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize); + + // Generate the copy. I->copyInto(*this, AI); } else { // Skip the extra memcpy call. @@ -4019,7 +4073,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, V = I->getKnownRValue().getScalarVal(); else V = Builder.CreateLoad( - I->hasLValue() ? I->getKnownLValue().getAddress() + I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress()); // Implement swifterror by copying into a new swifterror argument. @@ -4062,7 +4116,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Src = CreateMemTemp(I->Ty, "coerce"); I->copyInto(*this, Src); } else { - Src = I->hasLValue() ? I->getKnownLValue().getAddress() + Src = I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); } @@ -4117,7 +4171,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address addr = Address::invalid(); Address AllocaAddr = Address::invalid(); if (I->isAggregate()) { - addr = I->hasLValue() ? I->getKnownLValue().getAddress() + addr = I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); } else { @@ -4129,11 +4183,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); // Materialize to a temporary. - addr = CreateTempAlloca(RV.getScalarVal()->getType(), - CharUnits::fromQuantity(std::max( - layout->getAlignment(), scalarAlign)), - "tmp", - /*ArraySize=*/nullptr, &AllocaAddr); + addr = CreateTempAlloca( + RV.getScalarVal()->getType(), + CharUnits::fromQuantity(std::max( + (unsigned)layout->getAlignment().value(), scalarAlign)), + "tmp", + /*ArraySize=*/nullptr, &AllocaAddr); tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); @@ -4273,8 +4328,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update the largest vector width if any arguments have vector types. for (unsigned i = 0; i < IRCallArgs.size(); ++i) { if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); } // Compute the calling convention and attributes. @@ -4284,6 +4339,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Callee.getAbstractInfo(), Attrs, CallingConv, /*AttrOnCallSite=*/true); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) + if (FD->usesFPIntrin()) + // All calls within a strictfp function are marked strictfp + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::StrictFP); + // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -4333,6 +4395,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SmallVector<llvm::OperandBundleDef, 1> BundleList = getBundlesForFunclet(CalleePtr); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) + if (FD->usesFPIntrin()) + // All calls within a strictfp function are marked strictfp + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::StrictFP); + // Emit the actual call/invoke instruction. llvm::CallBase *CI; if (!InvokeDest) { @@ -4346,6 +4415,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (callOrInvoke) *callOrInvoke = CI; + // If this is within a function that has the guard(nocf) attribute and is an + // indirect call, add the "guard_nocf" attribute to this call to indicate that + // Control Flow Guard checks should not be added, even if the call is inlined. + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) { + if (const auto *A = FD->getAttr<CFGuardAttr>()) { + if (A->getGuard() == CFGuardAttr::GuardArg::nocf && !CI->getCalledFunction()) + Attrs = Attrs.addAttribute( + getLLVMContext(), llvm::AttributeList::FunctionIndex, "guard_nocf"); + } + } + // Apply the attributes and calling convention. CI->setAttributes(Attrs); CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -4357,8 +4437,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update largest vector width from the return type. if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of @@ -4548,7 +4628,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(), - AlignmentCI->getZExtValue(), OffsetValue); + AlignmentCI, OffsetValue); } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()] .getRValue(*this) @@ -4558,6 +4638,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } } + // Explicitly call CallLifetimeEnd::Emit just to re-use the code even though + // we can't use the full cleanup mechanism. + for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall) + LifetimeEnd.Emit(*this, /*Flags=*/{}); + return Ret; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.h b/contrib/llvm-project/clang/lib/CodeGen/CGCall.h index cc11ded704ab..34558be5adb1 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.h @@ -29,14 +29,14 @@ class AttributeList; class Function; class Type; class Value; -} +} // namespace llvm namespace clang { - class ASTContext; - class Decl; - class FunctionDecl; - class ObjCMethodDecl; - class VarDecl; +class ASTContext; +class Decl; +class FunctionDecl; +class ObjCMethodDecl; +class VarDecl; namespace CodeGen { @@ -60,331 +60,327 @@ public: return CalleeProtoTy; } const GlobalDecl getCalleeDecl() const { return CalleeDecl; } +}; + +/// All available information about a concrete callee. +class CGCallee { + enum class SpecialKind : uintptr_t { + Invalid, + Builtin, + PseudoDestructor, + Virtual, + + Last = Virtual }; - /// All available information about a concrete callee. - class CGCallee { - enum class SpecialKind : uintptr_t { - Invalid, - Builtin, - PseudoDestructor, - Virtual, - - Last = Virtual - }; - - struct BuiltinInfoStorage { - const FunctionDecl *Decl; - unsigned ID; - }; - struct PseudoDestructorInfoStorage { - const CXXPseudoDestructorExpr *Expr; - }; - struct VirtualInfoStorage { - const CallExpr *CE; - GlobalDecl MD; - Address Addr; - llvm::FunctionType *FTy; - }; - - SpecialKind KindOrFunctionPointer; - union { - CGCalleeInfo AbstractInfo; - BuiltinInfoStorage BuiltinInfo; - PseudoDestructorInfoStorage PseudoDestructorInfo; - VirtualInfoStorage VirtualInfo; - }; - - explicit CGCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {} - - CGCallee(const FunctionDecl *builtinDecl, unsigned builtinID) - : KindOrFunctionPointer(SpecialKind::Builtin) { - BuiltinInfo.Decl = builtinDecl; - BuiltinInfo.ID = builtinID; - } - - public: - CGCallee() : KindOrFunctionPointer(SpecialKind::Invalid) {} - - /// Construct a callee. Call this constructor directly when this - /// isn't a direct call. - CGCallee(const CGCalleeInfo &abstractInfo, llvm::Value *functionPtr) - : KindOrFunctionPointer(SpecialKind(uintptr_t(functionPtr))) { - AbstractInfo = abstractInfo; - assert(functionPtr && "configuring callee without function pointer"); - assert(functionPtr->getType()->isPointerTy()); - assert(functionPtr->getType()->getPointerElementType()->isFunctionTy()); - } - - static CGCallee forBuiltin(unsigned builtinID, - const FunctionDecl *builtinDecl) { - CGCallee result(SpecialKind::Builtin); - result.BuiltinInfo.Decl = builtinDecl; - result.BuiltinInfo.ID = builtinID; - return result; - } - - static CGCallee forPseudoDestructor(const CXXPseudoDestructorExpr *E) { - CGCallee result(SpecialKind::PseudoDestructor); - result.PseudoDestructorInfo.Expr = E; - return result; - } - - static CGCallee forDirect(llvm::Constant *functionPtr, - const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { - return CGCallee(abstractInfo, functionPtr); - } - - static CGCallee - forDirect(llvm::FunctionCallee functionPtr, - const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { - return CGCallee(abstractInfo, functionPtr.getCallee()); - } - - static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr, - llvm::FunctionType *FTy) { - CGCallee result(SpecialKind::Virtual); - result.VirtualInfo.CE = CE; - result.VirtualInfo.MD = MD; - result.VirtualInfo.Addr = Addr; - result.VirtualInfo.FTy = FTy; - return result; - } - - bool isBuiltin() const { - return KindOrFunctionPointer == SpecialKind::Builtin; - } - const FunctionDecl *getBuiltinDecl() const { - assert(isBuiltin()); - return BuiltinInfo.Decl; - } - unsigned getBuiltinID() const { - assert(isBuiltin()); - return BuiltinInfo.ID; - } - - bool isPseudoDestructor() const { - return KindOrFunctionPointer == SpecialKind::PseudoDestructor; - } - const CXXPseudoDestructorExpr *getPseudoDestructorExpr() const { - assert(isPseudoDestructor()); - return PseudoDestructorInfo.Expr; - } - - bool isOrdinary() const { - return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last); - } - CGCalleeInfo getAbstractInfo() const { - if (isVirtual()) - return VirtualInfo.MD; - assert(isOrdinary()); - return AbstractInfo; - } - llvm::Value *getFunctionPointer() const { - assert(isOrdinary()); - return reinterpret_cast<llvm::Value*>(uintptr_t(KindOrFunctionPointer)); - } - void setFunctionPointer(llvm::Value *functionPtr) { - assert(isOrdinary()); - KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr)); - } - - bool isVirtual() const { - return KindOrFunctionPointer == SpecialKind::Virtual; - } - const CallExpr *getVirtualCallExpr() const { - assert(isVirtual()); - return VirtualInfo.CE; - } - GlobalDecl getVirtualMethodDecl() const { - assert(isVirtual()); - return VirtualInfo.MD; - } - Address getThisAddress() const { - assert(isVirtual()); - return VirtualInfo.Addr; - } - llvm::FunctionType *getVirtualFunctionType() const { - assert(isVirtual()); - return VirtualInfo.FTy; - } - - /// If this is a delayed callee computation of some sort, prepare - /// a concrete callee. - CGCallee prepareConcreteCallee(CodeGenFunction &CGF) const; + struct BuiltinInfoStorage { + const FunctionDecl *Decl; + unsigned ID; + }; + struct PseudoDestructorInfoStorage { + const CXXPseudoDestructorExpr *Expr; + }; + struct VirtualInfoStorage { + const CallExpr *CE; + GlobalDecl MD; + Address Addr; + llvm::FunctionType *FTy; }; - struct CallArg { - private: - union { - RValue RV; - LValue LV; /// The argument is semantically a load from this l-value. - }; - bool HasLV; - - /// A data-flow flag to make sure getRValue and/or copyInto are not - /// called twice for duplicated IR emission. - mutable bool IsUsed; - - public: - QualType Ty; - CallArg(RValue rv, QualType ty) - : RV(rv), HasLV(false), IsUsed(false), Ty(ty) {} - CallArg(LValue lv, QualType ty) - : LV(lv), HasLV(true), IsUsed(false), Ty(ty) {} - bool hasLValue() const { return HasLV; } - QualType getType() const { return Ty; } - - /// \returns an independent RValue. If the CallArg contains an LValue, - /// a temporary copy is returned. - RValue getRValue(CodeGenFunction &CGF) const; - - LValue getKnownLValue() const { - assert(HasLV && !IsUsed); - return LV; - } - RValue getKnownRValue() const { - assert(!HasLV && !IsUsed); - return RV; - } - void setRValue(RValue _RV) { - assert(!HasLV); - RV = _RV; - } - - bool isAggregate() const { return HasLV || RV.isAggregate(); } - - void copyInto(CodeGenFunction &CGF, Address A) const; + SpecialKind KindOrFunctionPointer; + union { + CGCalleeInfo AbstractInfo; + BuiltinInfoStorage BuiltinInfo; + PseudoDestructorInfoStorage PseudoDestructorInfo; + VirtualInfoStorage VirtualInfo; }; - /// CallArgList - Type for representing both the value and type of - /// arguments in a call. - class CallArgList : - public SmallVector<CallArg, 8> { - public: - CallArgList() : StackBase(nullptr) {} - - struct Writeback { - /// The original argument. Note that the argument l-value - /// is potentially null. - LValue Source; - - /// The temporary alloca. - Address Temporary; - - /// A value to "use" after the writeback, or null. - llvm::Value *ToUse; - }; - - struct CallArgCleanup { - EHScopeStack::stable_iterator Cleanup; - - /// The "is active" insertion point. This instruction is temporary and - /// will be removed after insertion. - llvm::Instruction *IsActiveIP; - }; - - void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); } - - void addUncopiedAggregate(LValue LV, QualType type) { - push_back(CallArg(LV, type)); - } - - /// Add all the arguments from another CallArgList to this one. After doing - /// this, the old CallArgList retains its list of arguments, but must not - /// be used to emit a call. - void addFrom(const CallArgList &other) { - insert(end(), other.begin(), other.end()); - Writebacks.insert(Writebacks.end(), - other.Writebacks.begin(), other.Writebacks.end()); - CleanupsToDeactivate.insert(CleanupsToDeactivate.end(), - other.CleanupsToDeactivate.begin(), - other.CleanupsToDeactivate.end()); - assert(!(StackBase && other.StackBase) && "can't merge stackbases"); - if (!StackBase) - StackBase = other.StackBase; - } - - void addWriteback(LValue srcLV, Address temporary, - llvm::Value *toUse) { - Writeback writeback = { srcLV, temporary, toUse }; - Writebacks.push_back(writeback); - } - - bool hasWritebacks() const { return !Writebacks.empty(); } - - typedef llvm::iterator_range<SmallVectorImpl<Writeback>::const_iterator> - writeback_const_range; + explicit CGCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {} + + CGCallee(const FunctionDecl *builtinDecl, unsigned builtinID) + : KindOrFunctionPointer(SpecialKind::Builtin) { + BuiltinInfo.Decl = builtinDecl; + BuiltinInfo.ID = builtinID; + } - writeback_const_range writebacks() const { - return writeback_const_range(Writebacks.begin(), Writebacks.end()); - } +public: + CGCallee() : KindOrFunctionPointer(SpecialKind::Invalid) {} + + /// Construct a callee. Call this constructor directly when this + /// isn't a direct call. + CGCallee(const CGCalleeInfo &abstractInfo, llvm::Value *functionPtr) + : KindOrFunctionPointer(SpecialKind(uintptr_t(functionPtr))) { + AbstractInfo = abstractInfo; + assert(functionPtr && "configuring callee without function pointer"); + assert(functionPtr->getType()->isPointerTy()); + assert(functionPtr->getType()->getPointerElementType()->isFunctionTy()); + } - void addArgCleanupDeactivation(EHScopeStack::stable_iterator Cleanup, - llvm::Instruction *IsActiveIP) { - CallArgCleanup ArgCleanup; - ArgCleanup.Cleanup = Cleanup; - ArgCleanup.IsActiveIP = IsActiveIP; - CleanupsToDeactivate.push_back(ArgCleanup); - } + static CGCallee forBuiltin(unsigned builtinID, + const FunctionDecl *builtinDecl) { + CGCallee result(SpecialKind::Builtin); + result.BuiltinInfo.Decl = builtinDecl; + result.BuiltinInfo.ID = builtinID; + return result; + } - ArrayRef<CallArgCleanup> getCleanupsToDeactivate() const { - return CleanupsToDeactivate; - } + static CGCallee forPseudoDestructor(const CXXPseudoDestructorExpr *E) { + CGCallee result(SpecialKind::PseudoDestructor); + result.PseudoDestructorInfo.Expr = E; + return result; + } - void allocateArgumentMemory(CodeGenFunction &CGF); - llvm::Instruction *getStackBase() const { return StackBase; } - void freeArgumentMemory(CodeGenFunction &CGF) const; + static CGCallee forDirect(llvm::Constant *functionPtr, + const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { + return CGCallee(abstractInfo, functionPtr); + } - /// Returns if we're using an inalloca struct to pass arguments in - /// memory. - bool isUsingInAlloca() const { return StackBase; } + static CGCallee forDirect(llvm::FunctionCallee functionPtr, + const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { + return CGCallee(abstractInfo, functionPtr.getCallee()); + } - private: - SmallVector<Writeback, 1> Writebacks; + static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr, + llvm::FunctionType *FTy) { + CGCallee result(SpecialKind::Virtual); + result.VirtualInfo.CE = CE; + result.VirtualInfo.MD = MD; + result.VirtualInfo.Addr = Addr; + result.VirtualInfo.FTy = FTy; + return result; + } - /// Deactivate these cleanups immediately before making the call. This - /// is used to cleanup objects that are owned by the callee once the call - /// occurs. - SmallVector<CallArgCleanup, 1> CleanupsToDeactivate; + bool isBuiltin() const { + return KindOrFunctionPointer == SpecialKind::Builtin; + } + const FunctionDecl *getBuiltinDecl() const { + assert(isBuiltin()); + return BuiltinInfo.Decl; + } + unsigned getBuiltinID() const { + assert(isBuiltin()); + return BuiltinInfo.ID; + } - /// The stacksave call. It dominates all of the argument evaluation. - llvm::CallInst *StackBase; + bool isPseudoDestructor() const { + return KindOrFunctionPointer == SpecialKind::PseudoDestructor; + } + const CXXPseudoDestructorExpr *getPseudoDestructorExpr() const { + assert(isPseudoDestructor()); + return PseudoDestructorInfo.Expr; + } + + bool isOrdinary() const { + return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last); + } + CGCalleeInfo getAbstractInfo() const { + if (isVirtual()) + return VirtualInfo.MD; + assert(isOrdinary()); + return AbstractInfo; + } + llvm::Value *getFunctionPointer() const { + assert(isOrdinary()); + return reinterpret_cast<llvm::Value *>(uintptr_t(KindOrFunctionPointer)); + } + void setFunctionPointer(llvm::Value *functionPtr) { + assert(isOrdinary()); + KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr)); + } + + bool isVirtual() const { + return KindOrFunctionPointer == SpecialKind::Virtual; + } + const CallExpr *getVirtualCallExpr() const { + assert(isVirtual()); + return VirtualInfo.CE; + } + GlobalDecl getVirtualMethodDecl() const { + assert(isVirtual()); + return VirtualInfo.MD; + } + Address getThisAddress() const { + assert(isVirtual()); + return VirtualInfo.Addr; + } + llvm::FunctionType *getVirtualFunctionType() const { + assert(isVirtual()); + return VirtualInfo.FTy; + } + + /// If this is a delayed callee computation of some sort, prepare + /// a concrete callee. + CGCallee prepareConcreteCallee(CodeGenFunction &CGF) const; +}; + +struct CallArg { +private: + union { + RValue RV; + LValue LV; /// The argument is semantically a load from this l-value. }; + bool HasLV; + + /// A data-flow flag to make sure getRValue and/or copyInto are not + /// called twice for duplicated IR emission. + mutable bool IsUsed; - /// FunctionArgList - Type for representing both the decl and type - /// of parameters to a function. The decl must be either a - /// ParmVarDecl or ImplicitParamDecl. - class FunctionArgList : public SmallVector<const VarDecl*, 16> { +public: + QualType Ty; + CallArg(RValue rv, QualType ty) + : RV(rv), HasLV(false), IsUsed(false), Ty(ty) {} + CallArg(LValue lv, QualType ty) + : LV(lv), HasLV(true), IsUsed(false), Ty(ty) {} + bool hasLValue() const { return HasLV; } + QualType getType() const { return Ty; } + + /// \returns an independent RValue. If the CallArg contains an LValue, + /// a temporary copy is returned. + RValue getRValue(CodeGenFunction &CGF) const; + + LValue getKnownLValue() const { + assert(HasLV && !IsUsed); + return LV; + } + RValue getKnownRValue() const { + assert(!HasLV && !IsUsed); + return RV; + } + void setRValue(RValue _RV) { + assert(!HasLV); + RV = _RV; + } + + bool isAggregate() const { return HasLV || RV.isAggregate(); } + + void copyInto(CodeGenFunction &CGF, Address A) const; +}; + +/// CallArgList - Type for representing both the value and type of +/// arguments in a call. +class CallArgList : public SmallVector<CallArg, 8> { +public: + CallArgList() : StackBase(nullptr) {} + + struct Writeback { + /// The original argument. Note that the argument l-value + /// is potentially null. + LValue Source; + + /// The temporary alloca. + Address Temporary; + + /// A value to "use" after the writeback, or null. + llvm::Value *ToUse; + }; + + struct CallArgCleanup { + EHScopeStack::stable_iterator Cleanup; + + /// The "is active" insertion point. This instruction is temporary and + /// will be removed after insertion. + llvm::Instruction *IsActiveIP; + }; + + void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); } + + void addUncopiedAggregate(LValue LV, QualType type) { + push_back(CallArg(LV, type)); + } + + /// Add all the arguments from another CallArgList to this one. After doing + /// this, the old CallArgList retains its list of arguments, but must not + /// be used to emit a call. + void addFrom(const CallArgList &other) { + insert(end(), other.begin(), other.end()); + Writebacks.insert(Writebacks.end(), other.Writebacks.begin(), + other.Writebacks.end()); + CleanupsToDeactivate.insert(CleanupsToDeactivate.end(), + other.CleanupsToDeactivate.begin(), + other.CleanupsToDeactivate.end()); + assert(!(StackBase && other.StackBase) && "can't merge stackbases"); + if (!StackBase) + StackBase = other.StackBase; + } + + void addWriteback(LValue srcLV, Address temporary, llvm::Value *toUse) { + Writeback writeback = {srcLV, temporary, toUse}; + Writebacks.push_back(writeback); + } + + bool hasWritebacks() const { return !Writebacks.empty(); } + + typedef llvm::iterator_range<SmallVectorImpl<Writeback>::const_iterator> + writeback_const_range; + + writeback_const_range writebacks() const { + return writeback_const_range(Writebacks.begin(), Writebacks.end()); + } + + void addArgCleanupDeactivation(EHScopeStack::stable_iterator Cleanup, + llvm::Instruction *IsActiveIP) { + CallArgCleanup ArgCleanup; + ArgCleanup.Cleanup = Cleanup; + ArgCleanup.IsActiveIP = IsActiveIP; + CleanupsToDeactivate.push_back(ArgCleanup); + } + + ArrayRef<CallArgCleanup> getCleanupsToDeactivate() const { + return CleanupsToDeactivate; + } + + void allocateArgumentMemory(CodeGenFunction &CGF); + llvm::Instruction *getStackBase() const { return StackBase; } + void freeArgumentMemory(CodeGenFunction &CGF) const; + + /// Returns if we're using an inalloca struct to pass arguments in + /// memory. + bool isUsingInAlloca() const { return StackBase; } + +private: + SmallVector<Writeback, 1> Writebacks; + + /// Deactivate these cleanups immediately before making the call. This + /// is used to cleanup objects that are owned by the callee once the call + /// occurs. + SmallVector<CallArgCleanup, 1> CleanupsToDeactivate; + + /// The stacksave call. It dominates all of the argument evaluation. + llvm::CallInst *StackBase; +}; + +/// FunctionArgList - Type for representing both the decl and type +/// of parameters to a function. The decl must be either a +/// ParmVarDecl or ImplicitParamDecl. +class FunctionArgList : public SmallVector<const VarDecl *, 16> {}; + +/// ReturnValueSlot - Contains the address where the return value of a +/// function can be stored, and whether the address is volatile or not. +class ReturnValueSlot { + llvm::PointerIntPair<llvm::Value *, 2, unsigned int> Value; + CharUnits Alignment; + + // Return value slot flags + enum Flags { + IS_VOLATILE = 0x1, + IS_UNUSED = 0x2, }; - /// ReturnValueSlot - Contains the address where the return value of a - /// function can be stored, and whether the address is volatile or not. - class ReturnValueSlot { - llvm::PointerIntPair<llvm::Value *, 2, unsigned int> Value; - CharUnits Alignment; - - // Return value slot flags - enum Flags { - IS_VOLATILE = 0x1, - IS_UNUSED = 0x2, - }; - - public: - ReturnValueSlot() {} - ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false) +public: + ReturnValueSlot() {} + ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false) : Value(Addr.isValid() ? Addr.getPointer() : nullptr, (IsVolatile ? IS_VOLATILE : 0) | (IsUnused ? IS_UNUSED : 0)), Alignment(Addr.isValid() ? Addr.getAlignment() : CharUnits::Zero()) {} - bool isNull() const { return !getValue().isValid(); } + bool isNull() const { return !getValue().isValid(); } - bool isVolatile() const { return Value.getInt() & IS_VOLATILE; } - Address getValue() const { return Address(Value.getPointer(), Alignment); } - bool isUnused() const { return Value.getInt() & IS_UNUSED; } - }; + bool isVolatile() const { return Value.getInt() & IS_VOLATILE; } + Address getValue() const { return Address(Value.getPointer(), Alignment); } + bool isUnused() const { return Value.getInt() & IS_UNUSED; } +}; -} // end namespace CodeGen -} // end namespace clang +} // end namespace CodeGen +} // end namespace clang #endif diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp index c8bb63c5c4b1..3f3825b76275 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGClass.cpp @@ -16,6 +16,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "TargetInfo.h" +#include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" @@ -161,8 +162,8 @@ CharUnits CodeGenModule::computeNonVirtualBaseClassOffset( // Get the layout. const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl()); + const auto *BaseDecl = + cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl()); // Add the offset. Offset += Layout.getBaseClassOffset(BaseDecl); @@ -246,7 +247,8 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr, // Apply the base offset. llvm::Value *ptr = addr.getPointer(); - ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8PtrTy); + unsigned AddrSpace = ptr->getType()->getPointerAddressSpace(); + ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8Ty->getPointerTo(AddrSpace)); ptr = CGF.Builder.CreateInBoundsGEP(ptr, baseOffset, "add.ptr"); // If we have a virtual component, the alignment of the result will @@ -279,8 +281,8 @@ Address CodeGenFunction::GetAddressOfBaseClass( // *start* with a step down to the correct virtual base subobject, // and hence will not require any further steps. if ((*Start)->isVirtual()) { - VBase = - cast<CXXRecordDecl>((*Start)->getType()->getAs<RecordType>()->getDecl()); + VBase = cast<CXXRecordDecl>( + (*Start)->getType()->castAs<RecordType>()->getDecl()); ++Start; } @@ -381,7 +383,9 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr, QualType DerivedTy = getContext().getCanonicalType(getContext().getTagDeclType(Derived)); - llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo(); + unsigned AddrSpace = + BaseAddr.getPointer()->getType()->getPointerAddressSpace(); + llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo(AddrSpace); llvm::Value *NonVirtualOffset = CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd); @@ -536,8 +540,8 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, Address ThisPtr = CGF.LoadCXXThisAddress(); const Type *BaseType = BaseInit->getBaseClass(); - CXXRecordDecl *BaseClassDecl = - cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl()); + const auto *BaseClassDecl = + cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl()); bool isBaseVirtual = BaseInit->isBaseVirtual(); @@ -654,7 +658,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, // the constructor. QualType::DestructionKind dtorKind = FieldType.isDestructedType(); if (CGF.needsEHCleanup(dtorKind)) - CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType); + CGF.pushEHDestroy(dtorKind, LHS.getAddress(CGF), FieldType); return; } } @@ -678,16 +682,12 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, EmitComplexExprIntoLValue(Init, LHS, /*isInit*/ true); break; case TEK_Aggregate: { - AggValueSlot Slot = - AggValueSlot::forLValue( - LHS, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - getOverlapForFieldInit(Field), - AggValueSlot::IsNotZeroed, - // Checks are made by the code that calls constructor. - AggValueSlot::IsSanitizerChecked); + AggValueSlot Slot = AggValueSlot::forLValue( + LHS, *this, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + getOverlapForFieldInit(Field), AggValueSlot::IsNotZeroed, + // Checks are made by the code that calls constructor. + AggValueSlot::IsSanitizerChecked); EmitAggExpr(Init, Slot); break; } @@ -697,7 +697,7 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, // later in the constructor. QualType::DestructionKind dtorKind = FieldType.isDestructedType(); if (needsEHCleanup(dtorKind)) - pushEHDestroy(dtorKind, LHS.getAddress(), FieldType); + pushEHDestroy(dtorKind, LHS.getAddress(*this), FieldType); } /// Checks whether the given constructor is a valid subject for the @@ -739,7 +739,7 @@ bool CodeGenFunction::IsConstructorDelegationValid( // We also disable the optimization for variadic functions because // it's impossible to "re-pass" varargs. - if (Ctor->getType()->getAs<FunctionProtoType>()->isVariadic()) + if (Ctor->getType()->castAs<FunctionProtoType>()->isVariadic()) return false; // FIXME: Decide if we can do a delegation of a delegating constructor. @@ -911,6 +911,8 @@ namespace { } void addMemcpyableField(FieldDecl *F) { + if (F->isZeroSize(CGF.getContext())) + return; if (!FirstField) addInitialField(F); else @@ -958,9 +960,10 @@ namespace { LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy); LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField); - emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddress() : Dest.getAddress(), - Src.isBitField() ? Src.getBitFieldAddress() : Src.getAddress(), - MemcpySize); + emitMemcpyIR( + Dest.isBitField() ? Dest.getBitFieldAddress() : Dest.getAddress(CGF), + Src.isBitField() ? Src.getBitFieldAddress() : Src.getAddress(CGF), + MemcpySize); reset(); } @@ -1114,7 +1117,7 @@ namespace { continue; LValue FieldLHS = LHS; EmitLValueForAnyFieldInitialization(CGF, MemberInit, FieldLHS); - CGF.pushEHDestroy(dtorKind, FieldLHS.getAddress(), FieldType); + CGF.pushEHDestroy(dtorKind, FieldLHS.getAddress(CGF), FieldType); } } @@ -1245,7 +1248,7 @@ namespace { static bool isInitializerOfDynamicClass(const CXXCtorInitializer *BaseInit) { const Type *BaseType = BaseInit->getBaseClass(); const auto *BaseClassDecl = - cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl()); return BaseClassDecl->isDynamicClass(); } @@ -1624,7 +1627,7 @@ namespace { LValue LV = CGF.EmitLValueForField(ThisLV, field); assert(LV.isSimple()); - CGF.emitDestroy(LV.getAddress(), field->getType(), destroyer, + CGF.emitDestroy(LV.getAddress(CGF), field->getType(), destroyer, flags.isForNormalCleanup() && useEHCleanupForArray); } }; @@ -1814,8 +1817,8 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, // We push them in the forward order so that they'll be popped in // the reverse order. for (const auto &Base : ClassDecl->vbases()) { - CXXRecordDecl *BaseClassDecl - = cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl()); + auto *BaseClassDecl = + cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl()); // Ignore trivial destructors. if (BaseClassDecl->hasTrivialDestructor()) @@ -2083,7 +2086,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) { // If the parameters are callee-cleanup, it's not safe to forward. for (auto *P : Ctor->parameters()) - if (P->getType().isDestructedType()) + if (P->needsDestruction(CGF.getContext())) return false; // Likewise if they're inalloca. @@ -2530,8 +2533,8 @@ void CodeGenFunction::getVTablePointers(BaseSubobject Base, // Traverse bases. for (const auto &I : RD->bases()) { - CXXRecordDecl *BaseDecl - = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // Ignore classes without a vtable. if (!BaseDecl->isDynamicClass()) @@ -2784,11 +2787,16 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { if (!CGM.getCodeGenOpts().WholeProgramVTables || - !SanOpts.has(SanitizerKind::CFIVCall) || - !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) || !CGM.HasHiddenLTOVisibility(RD)) return false; + if (CGM.getCodeGenOpts().VirtualFunctionElimination) + return true; + + if (!SanOpts.has(SanitizerKind::CFIVCall) || + !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall)) + return false; + std::string TypeName = RD->getQualifiedNameAsString(); return !getContext().getSanitizerBlacklist().isBlacklistedType( SanitizerKind::CFIVCall, TypeName); @@ -2811,8 +2819,13 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( TypeId}); llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); - EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), - SanitizerHandler::CFICheckFail, nullptr, nullptr); + std::string TypeName = RD->getQualifiedNameAsString(); + if (SanOpts.has(SanitizerKind::CFIVCall) && + !getContext().getSanitizerBlacklist().isBlacklistedType( + SanitizerKind::CFIVCall, TypeName)) { + EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), + SanitizerHandler::CFICheckFail, {}, {}); + } return Builder.CreateBitCast( Builder.CreateExtractValue(CheckedLoad, 0), diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCleanup.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCleanup.cpp index 5594f3030229..c117dd5c25c1 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCleanup.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCleanup.cpp @@ -304,13 +304,13 @@ void EHScopeStack::Cleanup::anchor() {} static void createStoreInstBefore(llvm::Value *value, Address addr, llvm::Instruction *beforeInst) { auto store = new llvm::StoreInst(value, addr.getPointer(), beforeInst); - store->setAlignment(addr.getAlignment().getQuantity()); + store->setAlignment(addr.getAlignment().getAsAlign()); } static llvm::LoadInst *createLoadInstBefore(Address addr, const Twine &name, llvm::Instruction *beforeInst) { auto load = new llvm::LoadInst(addr.getPointer(), name, beforeInst); - load->setAlignment(addr.getAlignment().getQuantity()); + load->setAlignment(addr.getAlignment().getAsAlign()); return load; } @@ -740,14 +740,15 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // here. Unfortunately, if you ask for a SmallVector<char>, the // alignment isn't sufficient. auto *CleanupSource = reinterpret_cast<char *>(Scope.getCleanupBuffer()); - llvm::AlignedCharArray<EHScopeStack::ScopeStackAlignment, 8 * sizeof(void *)> CleanupBufferStack; + alignas(EHScopeStack::ScopeStackAlignment) char + CleanupBufferStack[8 * sizeof(void *)]; std::unique_ptr<char[]> CleanupBufferHeap; size_t CleanupSize = Scope.getCleanupSize(); EHScopeStack::Cleanup *Fn; if (CleanupSize <= sizeof(CleanupBufferStack)) { - memcpy(CleanupBufferStack.buffer, CleanupSource, CleanupSize); - Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack.buffer); + memcpy(CleanupBufferStack, CleanupSource, CleanupSize); + Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack); } else { CleanupBufferHeap.reset(new char[CleanupSize]); memcpy(CleanupBufferHeap.get(), CleanupSource, CleanupSize); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp index f6ee7ee26d4b..cbd524eda9d0 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.cpp @@ -18,8 +18,8 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" -#include "clang/Analysis/Analyses/ExprMutationAnalyzer.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" @@ -46,6 +46,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TimeProfiler.h" using namespace clang; using namespace clang::CodeGen; @@ -235,8 +236,7 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { PP.MSVCFormatting = true; // Apply -fdebug-prefix-map. - PP.RemapFilePaths = true; - PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); }; + PP.Callbacks = &PrintCB; return PP; } @@ -293,13 +293,6 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { } } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')'; - } else if (isa<ObjCProtocolDecl>(DC)) { - // We can extract the type of the class from the self pointer. - if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) { - QualType ClassTy = - cast<ObjCObjectPointerType>(SelfDecl->getType())->getPointeeType(); - ClassTy.print(OS, PrintingPolicy(LangOptions())); - } } OS << ' ' << OMD->getSelector().getAsString() << ']'; @@ -314,7 +307,9 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { if (isa<ClassTemplateSpecializationDecl>(RD)) { SmallString<128> Name; llvm::raw_svector_ostream OS(Name); - RD->getNameForDiagnostic(OS, getPrintingPolicy(), + PrintingPolicy PP = getPrintingPolicy(); + PP.PrintCanonicalTypes = true; + RD->getNameForDiagnostic(OS, PP, /*Qualified*/ false); // Copy this name on the side and use its reference. @@ -537,11 +532,11 @@ void CGDebugInfo::CreateCompileUnit() { // file to determine the real absolute path for the file. std::string MainFileDir; if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - MainFileDir = remapDIPath(MainFile->getDir()->getName()); - if (MainFileDir != ".") { + MainFileDir = MainFile->getDir()->getName(); + if (!llvm::sys::path::is_absolute(MainFileName)) { llvm::SmallString<1024> MainFileDirSS(MainFileDir); llvm::sys::path::append(MainFileDirSS, MainFileName); - MainFileName = MainFileDirSS.str(); + MainFileName = llvm::sys::path::remove_leading_dotslash(MainFileDirSS); } // If the main file name provided is identical to the input file name, and // if the input file is a preprocessed source, use the module name for @@ -561,6 +556,10 @@ void CGDebugInfo::CreateCompileUnit() { if (LO.CPlusPlus) { if (LO.ObjC) LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus; + else if (LO.CPlusPlus14) + LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14; + else if (LO.CPlusPlus11) + LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11; else LangTag = llvm::dwarf::DW_LANG_C_plus_plus; } else if (LO.ObjC) { @@ -592,6 +591,7 @@ void CGDebugInfo::CreateCompileUnit() { case codegenoptions::DebugDirectivesOnly: EmissionKind = llvm::DICompileUnit::DebugDirectivesOnly; break; + case codegenoptions::DebugInfoConstructor: case codegenoptions::LimitedDebugInfo: case codegenoptions::FullDebugInfo: EmissionKind = llvm::DICompileUnit::FullDebug; @@ -697,6 +697,22 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Id: \ return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty); #include "clang/Basic/OpenCLExtensionTypes.def" + // TODO: real support for SVE types requires more infrastructure + // to be added first. The types have a variable length and are + // represented in debug info as types whose length depends on a + // target-specific pseudo register. +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" + { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "cannot yet generate debug info for SVE type '%0'"); + auto Name = BT->getName(CGM.getContext().getPrintingPolicy()); + CGM.getDiags().Report(DiagID) << Name; + // Return something safe. + return CreateType(cast<const BuiltinType>(CGM.getContext().IntTy)); + } case BuiltinType::UChar: case BuiltinType::Char_U: @@ -862,6 +878,8 @@ llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) { switch (TheCU->getSourceLanguage()) { case llvm::dwarf::DW_LANG_C_plus_plus: + case llvm::dwarf::DW_LANG_C_plus_plus_11: + case llvm::dwarf::DW_LANG_C_plus_plus_14: return true; case llvm::dwarf::DW_LANG_ObjC_plus_plus: return isa<CXXRecordDecl>(TD) || isa<EnumDecl>(TD); @@ -1118,10 +1136,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, // declared. SourceLocation Loc = Ty->getDecl()->getLocation(); + uint32_t Align = getDeclAlignIfRequired(Ty->getDecl(), CGM.getContext()); // Typedefs are derived from some other type. return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc), - getDeclContextDescriptor(Ty->getDecl())); + getDeclContextDescriptor(Ty->getDecl()), Align); } static unsigned getDwarfCC(CallingConv CC) { @@ -1583,6 +1602,31 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( ContainingType = RecordTy; } + // We're checking for deleted C++ special member functions + // [Ctors,Dtors, Copy/Move] + auto checkAttrDeleted = [&](const auto *Method) { + if (Method->getCanonicalDecl()->isDeleted()) + SPFlags |= llvm::DISubprogram::SPFlagDeleted; + }; + + switch (Method->getKind()) { + + case Decl::CXXConstructor: + case Decl::CXXDestructor: + checkAttrDeleted(Method); + break; + case Decl::CXXMethod: + if (Method->isCopyAssignmentOperator() || + Method->isMoveAssignmentOperator()) + checkAttrDeleted(Method); + break; + default: + break; + } + + if (Method->isNoReturn()) + Flags |= llvm::DINode::FlagNoReturn; + if (Method->isStatic()) Flags |= llvm::DINode::FlagStaticMember; if (Method->isImplicit()) @@ -1604,6 +1648,12 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( if (CGM.getLangOpts().Optimize) SPFlags |= llvm::DISubprogram::SPFlagOptimized; + // In this debug mode, emit type info for a class when its constructor type + // info is emitted. + if (DebugKind == codegenoptions::DebugInfoConstructor) + if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(Method)) + completeClass(CD->getParent()); + llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine, @@ -1637,7 +1687,7 @@ void CGDebugInfo::CollectCXXMemberFunctions( if (!Method || Method->isImplicit() || Method->hasAttr<NoDebugAttr>()) continue; - if (Method->getType()->getAs<FunctionProtoType>()->getContainedAutoType()) + if (Method->getType()->castAs<FunctionProtoType>()->getContainedAutoType()) continue; // Reuse the existing member function declaration if it exists. @@ -1677,7 +1727,7 @@ void CGDebugInfo::CollectCXXBasesAux( const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD); for (const auto &BI : Bases) { const auto *Base = - cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(BI.getType()->castAs<RecordType>()->getDecl()); if (!SeenTypes.insert(Base).second) continue; auto *BaseTy = getOrCreateType(BI.getType(), Unit); @@ -1769,6 +1819,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); } + assert(V && "Failed to find template parameter pointer"); V = V->stripPointerCasts(); } TemplateParams.push_back(DBuilder.createTemplateValueParameter( @@ -1999,7 +2050,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy, SourceLocation Loc) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc)); return T; } @@ -2011,7 +2062,7 @@ llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D, llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, SourceLocation Loc) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); assert(!D.isNull() && "null type"); llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc)); assert(T && "could not create debug info for type"); @@ -2166,6 +2217,17 @@ static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, !isClassOrMethodDLLImport(CXXDecl)) return true; + // In constructor debug mode, only emit debug info for a class when its + // constructor is emitted. Skip this optimization if the class or any of + // its methods are marked dllimport. + if (DebugKind == codegenoptions::DebugInfoConstructor && + !CXXDecl->isLambda() && !isClassOrMethodDLLImport(CXXDecl)) { + for (const auto *Ctor : CXXDecl->ctors()) { + if (Ctor->isUserProvided()) + return true; + } + } + TemplateSpecializationKind Spec = TSK_Undeclared; if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) Spec = SD->getSpecializationKind(); @@ -2550,8 +2612,8 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, SourceLocation Loc = PD->getLocation(); llvm::DIFile *PUnit = getOrCreateFile(Loc); unsigned PLine = getLineNumber(Loc); - ObjCMethodDecl *Getter = PD->getGetterMethodDecl(); - ObjCMethodDecl *Setter = PD->getSetterMethodDecl(); + ObjCMethodDecl *Getter = PImpD->getGetterMethodDecl(); + ObjCMethodDecl *Setter = PImpD->getSetterMethodDecl(); PropertyNode = DBuilder.createObjCProperty( PD->getName(), PUnit, PLine, hasDefaultGetterName(PD, Getter) @@ -2684,16 +2746,16 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, // Set the MS inheritance model. There is no flag for the unspecified model. if (CGM.getTarget().getCXXABI().isMicrosoft()) { switch (Ty->getMostRecentCXXRecordDecl()->getMSInheritanceModel()) { - case MSInheritanceAttr::Keyword_single_inheritance: + case MSInheritanceModel::Single: Flags |= llvm::DINode::FlagSingleInheritance; break; - case MSInheritanceAttr::Keyword_multiple_inheritance: + case MSInheritanceModel::Multiple: Flags |= llvm::DINode::FlagMultipleInheritance; break; - case MSInheritanceAttr::Keyword_virtual_inheritance: + case MSInheritanceModel::Virtual: Flags |= llvm::DINode::FlagVirtualInheritance; break; - case MSInheritanceAttr::Keyword_unspecified_inheritance: + case MSInheritanceModel::Unspecified: break; } } @@ -2917,6 +2979,13 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { if (Ty.isNull()) return nullptr; + llvm::TimeTraceScope TimeScope("DebugType", [&]() { + std::string Name; + llvm::raw_string_ostream OS(Name); + Ty.print(OS, getPrintingPolicy()); + return Name; + }); + // Unwrap the type as needed for debug information. Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext()); @@ -2978,7 +3047,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Dependent types cannot show up in debug information"); case Type::ExtVector: @@ -3105,7 +3174,8 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); - // Explicitly record the calling convention for C++ records. + // Explicitly record the calling convention and export symbols for C++ + // records. auto Flags = llvm::DINode::FlagZero; if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) { if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect) @@ -3116,6 +3186,10 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { // Record if a C++ record is non-trivial type. if (!CXXRD->isTrivial()) Flags |= llvm::DINode::FlagNonTrivial; + + // Record exports it symbols to the containing structure. + if (CXXRD->isAnonymousStructOrUnion()) + Flags |= llvm::DINode::FlagExportSymbols; } llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( @@ -3214,7 +3288,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, DebugKind <= codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); - if (DebugKind >= codegenoptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().hasReducedDebugInfo()) { if (const NamespaceDecl *NSDecl = dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) FDContext = getOrCreateNamespace(NSDecl); @@ -3247,8 +3321,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, llvm::APInt ConstVal(32, 1); QualType ET = CGM.getContext().getAsArrayType(T)->getElementType(); - T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal, - 0); + T = CGM.getContext().getConstantArrayType(ET, ConstVal, nullptr, + ArrayType::Normal, 0); } Name = VD->getName(); @@ -3298,13 +3372,13 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, unsigned Line = getLineNumber(Loc); collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray, Flags); - auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); + auto *FD = cast<FunctionDecl>(GD.getDecl()); // Build function type. SmallVector<QualType, 16> ArgTypes; - if (FD) - for (const ParmVarDecl *Parm : FD->parameters()) - ArgTypes.push_back(Parm->getType()); + for (const ParmVarDecl *Parm : FD->parameters()) + ArgTypes.push_back(Parm->getType()); + CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); @@ -3426,6 +3500,42 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { return nullptr; } +llvm::DISubprogram *CGDebugInfo::getObjCMethodDeclaration( + const Decl *D, llvm::DISubroutineType *FnType, unsigned LineNo, + llvm::DINode::DIFlags Flags, llvm::DISubprogram::DISPFlags SPFlags) { + if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) + return nullptr; + + const auto *OMD = dyn_cast<ObjCMethodDecl>(D); + if (!OMD) + return nullptr; + + if (CGM.getCodeGenOpts().DwarfVersion < 5 && !OMD->isDirectMethod()) + return nullptr; + + if (OMD->isDirectMethod()) + SPFlags |= llvm::DISubprogram::SPFlagObjCDirect; + + // Starting with DWARF V5 method declarations are emitted as children of + // the interface type. + auto *ID = dyn_cast_or_null<ObjCInterfaceDecl>(D->getDeclContext()); + if (!ID) + ID = OMD->getClassInterface(); + if (!ID) + return nullptr; + QualType QTy(ID->getTypeForDecl(), 0); + auto It = TypeCache.find(QTy.getAsOpaquePtr()); + if (It == TypeCache.end()) + return nullptr; + auto *InterfaceType = cast<llvm::DICompositeType>(It->second); + llvm::DISubprogram *FD = DBuilder.createFunction( + InterfaceType, getObjCMethodName(OMD), StringRef(), + InterfaceType->getFile(), LineNo, FnType, LineNo, Flags, SPFlags); + DBuilder.finalizeSubprogram(FD); + ObjCMethodCache[ID].push_back({FD, OMD->isDirectMethod()}); + return FD; +} + // getOrCreateFunctionType - Construct type. If it is a c++ method, include // implicit parameter "this". llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, @@ -3568,6 +3678,12 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = getLineNumber(ScopeLoc); + llvm::DISubroutineType *DIFnType = getOrCreateFunctionType(D, FnType, Unit); + llvm::DISubprogram *Decl = nullptr; + if (D) + Decl = isa<ObjCMethodDecl>(D) + ? getObjCMethodDeclaration(D, DIFnType, LineNo, Flags, SPFlags) + : getFunctionDeclaration(D); // FIXME: The function declaration we're constructing here is mostly reusing // declarations from CXXMethodDecl and not constructing new ones for arbitrary @@ -3575,9 +3691,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // all subprograms instead of the actual context since subprogram definitions // are emitted as CU level entities by the backend. llvm::DISubprogram *SP = DBuilder.createFunction( - FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef, - SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D)); + FDContext, Name, LinkageName, Unit, LineNo, DIFnType, ScopeLine, + FlagsForDef, SPFlagsForDef, TParamsArray.get(), Decl); Fn->setSubprogram(SP); // We might get here with a VarDecl in the case we're generating // code for the initialization of globals. Do not record these decls @@ -3585,35 +3700,6 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (HasDecl && isa<FunctionDecl>(D)) DeclCache[D->getCanonicalDecl()].reset(SP); - // We use the SPDefCache only in the case when the debug entry values option - // is set, in order to speed up parameters modification analysis. - // - // FIXME: Use AbstractCallee here to support ObjCMethodDecl. - if (CGM.getCodeGenOpts().EnableDebugEntryValues && HasDecl) - if (auto *FD = dyn_cast<FunctionDecl>(D)) - if (FD->hasBody() && !FD->param_empty()) - SPDefCache[FD].reset(SP); - - if (CGM.getCodeGenOpts().DwarfVersion >= 5) { - // Starting with DWARF V5 method declarations are emitted as children of - // the interface type. - if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) { - const ObjCInterfaceDecl *ID = OMD->getClassInterface(); - QualType QTy(ID->getTypeForDecl(), 0); - auto It = TypeCache.find(QTy.getAsOpaquePtr()); - if (It != TypeCache.end()) { - llvm::DICompositeType *InterfaceDecl = - cast<llvm::DICompositeType>(It->second); - llvm::DISubprogram *FD = DBuilder.createFunction( - InterfaceDecl, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, - TParamsArray.get()); - DBuilder.finalizeSubprogram(FD); - ObjCMethodCache[ID].push_back(FD); - } - } - } - // Push the function onto the lexical block stack. LexicalBlockStack.emplace_back(SP); @@ -3630,6 +3716,15 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, if (!D) return; + llvm::TimeTraceScope TimeScope("DebugFunction", [&]() { + std::string Name; + llvm::raw_string_ostream OS(Name); + if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) + ND->getNameForDiagnostic(OS, getPrintingPolicy(), + /*Qualified=*/true); + return Name; + }); + llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; llvm::DIFile *Unit = getOrCreateFile(Loc); bool IsDeclForCallSite = Fn ? true : false; @@ -3675,22 +3770,29 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, QualType CalleeType, const FunctionDecl *CalleeDecl) { - auto &CGOpts = CGM.getCodeGenOpts(); - if (!CGOpts.EnableDebugEntryValues || !CGM.getLangOpts().Optimize || - !CallOrInvoke || - CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + if (!CallOrInvoke) return; - auto *Func = CallOrInvoke->getCalledFunction(); if (!Func) return; + if (Func->getSubprogram()) + return; + + // Do not emit a declaration subprogram for a builtin or if call site info + // isn't required. Also, elide declarations for functions with reserved names, + // as call site-related features aren't interesting in this case (& also, the + // compiler may emit calls to these functions without debug locations, which + // makes the verifier complain). + if (CalleeDecl->getBuiltinID() != 0 || + getCallSiteRelatedAttrs() == llvm::DINode::FlagZero) + return; + if (const auto *Id = CalleeDecl->getIdentifier()) + if (Id->isReservedName()) + return; // If there is no DISubprogram attached to the function being called, // create the one describing the function in order to have complete // call site debug info. - if (Func->getSubprogram()) - return; - if (!CalleeDecl->isStatic() && !CalleeDecl->isInlined()) EmitFunctionDecl(CalleeDecl, CalleeDecl->getLocation(), CalleeType, Func); } @@ -3844,8 +3946,8 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, if (NumPaddingBytes.isPositive()) { llvm::APInt pad(32, NumPaddingBytes.getQuantity()); - FType = CGM.getContext().getConstantArrayType(CGM.getContext().CharTy, - pad, ArrayType::Normal, 0); + FType = CGM.getContext().getConstantArrayType( + CGM.getContext().CharTy, pad, nullptr, ArrayType::Normal, 0); EltTys.push_back(CreateMemberType(Unit, FType, "", &FieldOffset)); } } @@ -3873,7 +3975,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder, const bool UsePointerValue) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (VD->hasAttr<NoDebugAttr>()) return nullptr; @@ -4000,11 +4102,6 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); - if (CGM.getCodeGenOpts().EnableDebugEntryValues && ArgNo) { - if (auto *PD = dyn_cast<ParmVarDecl>(VD)) - ParamCache[PD].reset(D); - } - return D; } @@ -4012,12 +4109,12 @@ llvm::DILocalVariable * CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder, const bool UsePointerValue) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue); } void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (D->hasAttr<NoDebugAttr>()) @@ -4053,7 +4150,7 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder, const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (Builder.GetInsertBlock() == nullptr) @@ -4124,7 +4221,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); EmitDeclare(VD, AI, ArgNo, Builder); } @@ -4181,7 +4278,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, unsigned ArgNo, llvm::AllocaInst *Alloca, CGBuilderTy &Builder) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); ASTContext &C = CGM.getContext(); const BlockDecl *blockDecl = block.getBlockDecl(); @@ -4347,10 +4444,18 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls( void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, const VarDecl *D) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); if (D->hasAttr<NoDebugAttr>()) return; + llvm::TimeTraceScope TimeScope("DebugGlobalVariable", [&]() { + std::string Name; + llvm::raw_string_ostream OS(Name); + D->getNameForDiagnostic(OS, getPrintingPolicy(), + /*Qualified=*/true); + return Name; + }); + // If we already created a DIGlobalVariable for this declaration, just attach // it to the llvm::GlobalVariable. auto Cached = DeclCache.find(D->getCanonicalDecl()); @@ -4398,7 +4503,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, GVE = DBuilder.createGlobalVariableExpression( DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasLocalLinkage(), + Var->hasLocalLinkage(), true, Expr.empty() ? nullptr : DBuilder.createExpression(Expr), getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, Align); @@ -4408,28 +4513,44 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, } void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { - assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); if (VD->hasAttr<NoDebugAttr>()) return; + llvm::TimeTraceScope TimeScope("DebugConstGlobalVariable", [&]() { + std::string Name; + llvm::raw_string_ostream OS(Name); + VD->getNameForDiagnostic(OS, getPrintingPolicy(), + /*Qualified=*/true); + return Name; + }); + auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); // Create the descriptor for the variable. llvm::DIFile *Unit = getOrCreateFile(VD->getLocation()); StringRef Name = VD->getName(); llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit); - // Do not use global variables for enums, unless in CodeView. if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) { const auto *ED = cast<EnumDecl>(ECD->getDeclContext()); assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?"); - (void)ED; - - // If CodeView, emit enums as global variables, unless they are defined - // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for - // enums in classes, and because it is difficult to attach this scope - // information to the global variable. - if (!CGM.getCodeGenOpts().EmitCodeView || - isa<RecordDecl>(ED->getDeclContext())) + + if (CGM.getCodeGenOpts().EmitCodeView) { + // If CodeView, emit enums as global variables, unless they are defined + // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for + // enums in classes, and because it is difficult to attach this scope + // information to the global variable. + if (isa<RecordDecl>(ED->getDeclContext())) + return; + } else { + // If not CodeView, emit DW_TAG_enumeration_type if necessary. For + // example: for "enum { ZERO };", a DW_TAG_enumeration_type is created the + // first time `ZERO` is referenced in a function. + llvm::DIType *EDTy = + getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); + assert (EDTy->getTag() == llvm::dwarf::DW_TAG_enumeration_type); + (void)EDTy; return; + } } llvm::DIScope *DContext = nullptr; @@ -4485,10 +4606,29 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { GV.reset(DBuilder.createGlobalVariableExpression( DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, - true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), + true, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), TemplateParameters, Align)); } +void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, + const VarDecl *D) { + assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); + if (D->hasAttr<NoDebugAttr>()) + return; + + auto Align = getDeclAlignIfRequired(D, CGM.getContext()); + llvm::DIFile *Unit = getOrCreateFile(D->getLocation()); + StringRef Name = D->getName(); + llvm::DIType *Ty = getOrCreateType(D->getType(), Unit); + + llvm::DIScope *DContext = getDeclContextDescriptor(D); + llvm::DIGlobalVariableExpression *GVE = + DBuilder.createGlobalVariableExpression( + DContext, Name, StringRef(), Unit, getLineNumber(D->getLocation()), + Ty, false, false, nullptr, nullptr, nullptr, Align); + Var->addDebugInfo(GVE); +} + llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) { if (!LexicalBlockStack.empty()) return LexicalBlockStack.back(); @@ -4497,7 +4637,7 @@ llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) { } void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { - if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + if (!CGM.getCodeGenOpts().hasReducedDebugInfo()) return; const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || @@ -4510,7 +4650,7 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { } void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { - if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + if (!CGM.getCodeGenOpts().hasReducedDebugInfo()) return; assert(UD.shadow_size() && "We shouldn't be codegening an invalid UsingDecl containing no decls"); @@ -4524,7 +4664,7 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { // return type in the definition) if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl())) if (const auto *AT = - FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType()) + FD->getType()->castAs<FunctionProtoType>()->getContainedAutoType()) if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = @@ -4551,7 +4691,7 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { llvm::DIImportedEntity * CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { - if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + if (!CGM.getCodeGenOpts().hasReducedDebugInfo()) return nullptr; auto &VH = NamespaceAliasCache[&NA]; if (VH) @@ -4596,29 +4736,6 @@ void CGDebugInfo::setDwoId(uint64_t Signature) { TheCU->setDWOId(Signature); } -/// Analyzes each function parameter to determine whether it is constant -/// throughout the function body. -static void analyzeParametersModification( - ASTContext &Ctx, - llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> &SPDefCache, - llvm::DenseMap<const ParmVarDecl *, llvm::TrackingMDRef> &ParamCache) { - for (auto &SP : SPDefCache) { - auto *FD = SP.first; - assert(FD->hasBody() && "Functions must have body here"); - const Stmt *FuncBody = (*FD).getBody(); - for (auto Parm : FD->parameters()) { - ExprMutationAnalyzer FuncAnalyzer(*FuncBody, Ctx); - if (FuncAnalyzer.isMutated(Parm)) - continue; - - auto I = ParamCache.find(Parm); - assert(I != ParamCache.end() && "Parameters should be already cached"); - auto *DIParm = cast<llvm::DILocalVariable>(I->second); - DIParm->setIsNotModified(); - } - } -} - void CGDebugInfo::finalize() { // Creating types might create further types - invalidating the current // element and the size(), so don't cache/reference them. @@ -4630,27 +4747,28 @@ void CGDebugInfo::finalize() { DBuilder.replaceTemporary(llvm::TempDIType(E.Decl), Ty); } - if (CGM.getCodeGenOpts().DwarfVersion >= 5) { - // Add methods to interface. - for (const auto &P : ObjCMethodCache) { - if (P.second.empty()) - continue; + // Add methods to interface. + for (const auto &P : ObjCMethodCache) { + if (P.second.empty()) + continue; + + QualType QTy(P.first->getTypeForDecl(), 0); + auto It = TypeCache.find(QTy.getAsOpaquePtr()); + assert(It != TypeCache.end()); - QualType QTy(P.first->getTypeForDecl(), 0); - auto It = TypeCache.find(QTy.getAsOpaquePtr()); - assert(It != TypeCache.end()); + llvm::DICompositeType *InterfaceDecl = + cast<llvm::DICompositeType>(It->second); - llvm::DICompositeType *InterfaceDecl = - cast<llvm::DICompositeType>(It->second); + auto CurElts = InterfaceDecl->getElements(); + SmallVector<llvm::Metadata *, 16> EltTys(CurElts.begin(), CurElts.end()); - SmallVector<llvm::Metadata *, 16> EltTys; - auto CurrenetElts = InterfaceDecl->getElements(); - EltTys.append(CurrenetElts.begin(), CurrenetElts.end()); - for (auto &MD : P.second) - EltTys.push_back(MD); - llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys); - DBuilder.replaceArrays(InterfaceDecl, Elements); - } + // For DWARF v4 or earlier, only add objc_direct methods. + for (auto &SubprogramDirect : P.second) + if (CGM.getCodeGenOpts().DwarfVersion >= 5 || SubprogramDirect.getInt()) + EltTys.push_back(SubprogramDirect.getPointer()); + + llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys); + DBuilder.replaceArrays(InterfaceDecl, Elements); } for (const auto &P : ReplaceMap) { @@ -4691,15 +4809,11 @@ void CGDebugInfo::finalize() { if (auto MD = TypeCache[RT]) DBuilder.retainType(cast<llvm::DIType>(MD)); - if (CGM.getCodeGenOpts().EnableDebugEntryValues) - // This will be used to emit debug entry values. - analyzeParametersModification(CGM.getContext(), SPDefCache, ParamCache); - DBuilder.finalize(); } void CGDebugInfo::EmitExplicitCastType(QualType Ty) { - if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + if (!CGM.getCodeGenOpts().hasReducedDebugInfo()) return; if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile())) @@ -4728,10 +4842,10 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { bool SupportsDWARFv4Ext = CGM.getCodeGenOpts().DwarfVersion == 4 && (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB || - (CGM.getCodeGenOpts().EnableDebugEntryValues && - CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::GDB)); + CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::GDB); - if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5) + if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5 && + !CGM.getCodeGenOpts().EnableDebugEntryValues) return llvm::DINode::FlagZero; return llvm::DINode::FlagAllCallsDescribed; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h index 7edbea86633a..90e9a61ebe96 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDebugInfo.h @@ -83,12 +83,25 @@ class CGDebugInfo { /// Cache of previously constructed Types. llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache; - llvm::SmallDenseMap<llvm::StringRef, llvm::StringRef> DebugPrefixMap; + std::map<llvm::StringRef, llvm::StringRef, std::greater<llvm::StringRef>> + DebugPrefixMap; /// Cache that maps VLA types to size expressions for that type, /// represented by instantiated Metadata nodes. llvm::SmallDenseMap<QualType, llvm::Metadata *> SizeExprCache; + /// Callbacks to use when printing names and types. + class PrintingCallbacks final : public clang::PrintingCallbacks { + const CGDebugInfo &Self; + + public: + PrintingCallbacks(const CGDebugInfo &Self) : Self(Self) {} + std::string remapPath(StringRef Path) const override { + return Self.remapDIPath(Path); + } + }; + PrintingCallbacks PrintCB = {*this}; + struct ObjCInterfaceCacheEntry { const ObjCInterfaceType *Type; llvm::DIType *Decl; @@ -102,7 +115,10 @@ class CGDebugInfo { llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache; /// Cache of forward declarations for methods belonging to the interface. - llvm::DenseMap<const ObjCInterfaceDecl *, std::vector<llvm::DISubprogram *>> + /// The extra bit on the DISubprogram specifies whether a method is + /// "objc_direct". + llvm::DenseMap<const ObjCInterfaceDecl *, + std::vector<llvm::PointerIntPair<llvm::DISubprogram *, 1>>> ObjCMethodCache; /// Cache of references to clang modules and precompiled headers. @@ -134,10 +150,6 @@ class CGDebugInfo { llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache; llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache; - /// Cache function definitions relevant to use for parameters mutation - /// analysis. - llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPDefCache; - llvm::DenseMap<const ParmVarDecl *, llvm::TrackingMDRef> ParamCache; /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations) that aren't covered by other more specific caches. llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache; @@ -466,6 +478,9 @@ public: /// Emit a constant global variable's debug info. void EmitGlobalVariable(const ValueDecl *VD, const APValue &Init); + /// Emit information about an external variable. + void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl); + /// Emit C++ using directive. void EmitUsingDirective(const UsingDirectiveDecl &UD); @@ -601,6 +616,17 @@ private: /// declaration for the given method definition. llvm::DISubprogram *getFunctionDeclaration(const Decl *D); + /// \return debug info descriptor to the describe method declaration + /// for the given method definition. + /// \param FnType For Objective-C methods, their type. + /// \param LineNo The declaration's line number. + /// \param Flags The DIFlags for the method declaration. + /// \param SPFlags The subprogram-spcific flags for the method declaration. + llvm::DISubprogram * + getObjCMethodDeclaration(const Decl *D, llvm::DISubroutineType *FnType, + unsigned LineNo, llvm::DINode::DIFlags Flags, + llvm::DISubprogram::DISPFlags SPFlags); + /// \return debug info descriptor to describe in-class static data /// member declaration for the given out-of-class definition. If D /// is an out-of-class definition of a static data member of a @@ -725,6 +751,7 @@ public: ApplyDebugLocation(ApplyDebugLocation &&Other) : CGF(Other.CGF) { Other.CGF = nullptr; } + ApplyDebugLocation &operator=(ApplyDebugLocation &&) = default; ~ApplyDebugLocation(); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp index 6ad43cefc4d2..60f1dba7c768 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDecl.cpp @@ -22,6 +22,7 @@ #include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" @@ -109,6 +110,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::OMPRequires: case Decl::Empty: case Decl::Concept: + case Decl::LifetimeExtendedTemporary: + case Decl::RequiresExprBody: // None of these decls require codegen support. return; @@ -250,7 +253,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); - GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); + GV->setAlignment(getContext().getDeclAlign(&D).getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -305,14 +308,6 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( return Addr; } -/// hasNontrivialDestruction - Determine whether a type's destruction is -/// non-trivial. If so, and the variable uses static initialization, we must -/// register its destructor to run on exit. -static bool hasNontrivialDestruction(QualType T) { - CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); - return RD && !RD->hasTrivialDestructor(); -} - /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the /// global variable that has already been created for it. If the initializer /// has a different type than GV does, this may free GV and return a different @@ -372,7 +367,8 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, emitter.finalize(GV); - if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) { + if (D.needsDestruction(getContext()) == QualType::DK_cxx_destructor && + HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the // destructor. @@ -416,7 +412,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); - var->setAlignment(alignment.getQuantity()); + var->setAlignment(alignment.getAsAlign()); if (D.hasAttr<AnnotateAttr>()) CGM.AddGlobalAnnotations(&D, var); @@ -427,6 +423,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, var->addAttribute("data-section", SA->getName()); if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>()) var->addAttribute("rodata-section", SA->getName()); + if (auto *SA = D.getAttr<PragmaClangRelroSectionAttr>()) + var->addAttribute("relro-section", SA->getName()); if (const SectionAttr *SA = D.getAttr<SectionAttr>()) var->setSection(SA->getName()); @@ -449,8 +447,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, // Emit global variable debug descriptor for static vars. CGDebugInfo *DI = getDebugInfo(); - if (DI && - CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { + if (DI && CGM.getCodeGenOpts().hasReducedDebugInfo()) { DI->setLocation(D.getLocation()); DI->EmitGlobalVariable(var, &D); } @@ -575,7 +572,7 @@ namespace { Var.getType(), VK_LValue, SourceLocation()); // Compute the address of the local variable, in case it's a byref // or something. - llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getPointer(); + llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getPointer(CGF); // In some cases, the type of the function argument will be different from // the type of the pointer. An example of this is @@ -690,18 +687,18 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, LValue srcLV = CGF.EmitLValue(srcExpr); // Handle a formal type change to avoid asserting. - auto srcAddr = srcLV.getAddress(); + auto srcAddr = srcLV.getAddress(CGF); if (needsCast) { - srcAddr = CGF.Builder.CreateElementBitCast(srcAddr, - destLV.getAddress().getElementType()); + srcAddr = CGF.Builder.CreateElementBitCast( + srcAddr, destLV.getAddress(CGF).getElementType()); } // If it was an l-value, use objc_copyWeak. if (srcExpr->getValueKind() == VK_LValue) { - CGF.EmitARCCopyWeak(destLV.getAddress(), srcAddr); + CGF.EmitARCCopyWeak(destLV.getAddress(CGF), srcAddr); } else { assert(srcExpr->getValueKind() == VK_XValue); - CGF.EmitARCMoveWeak(destLV.getAddress(), srcAddr); + CGF.EmitARCMoveWeak(destLV.getAddress(CGF), srcAddr); } return true; } @@ -719,7 +716,7 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, static void drillIntoBlockVariable(CodeGenFunction &CGF, LValue &lvalue, const VarDecl *var) { - lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var)); + lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(CGF), var)); } void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, @@ -779,17 +776,18 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, if (capturedByInit) { // We can use a simple GEP for this because it can't have been // moved yet. - tempLV.setAddress(emitBlockByrefAddress(tempLV.getAddress(), + tempLV.setAddress(emitBlockByrefAddress(tempLV.getAddress(*this), cast<VarDecl>(D), /*follow*/ false)); } - auto ty = cast<llvm::PointerType>(tempLV.getAddress().getElementType()); + auto ty = + cast<llvm::PointerType>(tempLV.getAddress(*this).getElementType()); llvm::Value *zero = CGM.getNullPointer(ty, tempLV.getType()); // If __weak, we want to use a barrier under certain conditions. if (lifetime == Qualifiers::OCL_Weak) - EmitARCInitWeak(tempLV.getAddress(), zero); + EmitARCInitWeak(tempLV.getAddress(*this), zero); // Otherwise just do a simple store. else @@ -832,9 +830,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D)); if (accessedByInit) - EmitARCStoreWeak(lvalue.getAddress(), value, /*ignored*/ true); + EmitARCStoreWeak(lvalue.getAddress(*this), value, /*ignored*/ true); else - EmitARCInitWeak(lvalue.getAddress(), value); + EmitARCInitWeak(lvalue.getAddress(*this), value); return; } @@ -1120,11 +1118,11 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D, llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, Constant, Name, InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CacheEntry = GV; } else if (CacheEntry->getAlignment() < Align.getQuantity()) { - CacheEntry->setAlignment(Align.getQuantity()); + CacheEntry->setAlignment(Align.getAsAlign()); } return Address(CacheEntry, Align); @@ -1396,8 +1394,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { EmitVariablyModifiedType(Ty); auto *DI = getDebugInfo(); - bool EmitDebugInfo = DI && CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo; + bool EmitDebugInfo = DI && CGM.getCodeGenOpts().hasReducedDebugInfo(); Address address = Address::invalid(); Address AllocaAddr = Address::invalid(); @@ -1902,11 +1899,10 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, else if (auto *FD = dyn_cast<FieldDecl>(D)) Overlap = getOverlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? - EmitAggExpr(init, AggValueSlot::forLValue(lvalue, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - Overlap)); + EmitAggExpr(init, AggValueSlot::forLValue( + lvalue, *this, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, Overlap)); } return; } @@ -1994,7 +1990,7 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { const VarDecl &D = *emission.Variable; // Check the type for a cleanup. - if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) + if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext())) emitAutoVarTypeCleanup(emission, dtorKind); // In GC mode, honor objc_precise_lifetime. @@ -2403,8 +2399,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // Don't push a cleanup in a thunk for a method that will also emit a // cleanup. if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk && - Ty->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { - if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) { + Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { + if (QualType::DestructionKind DtorKind = + D.needsDestruction(getContext())) { assert((DtorKind == QualType::DK_cxx_destructor || DtorKind == QualType::DK_nontrivial_c_struct) && "unexpected destructor type"); @@ -2461,7 +2458,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // objc_storeStrong attempts to release its old value. llvm::Value *Null = CGM.EmitNullConstant(D.getType()); EmitStoreOfScalar(Null, lv, /* isInitialization */ true); - EmitARCStoreStrongCall(lv.getAddress(), ArgVal, true); + EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true); DoStore = false; } else @@ -2496,10 +2493,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, setAddrOfLocalVar(&D, DeclPtr); - // Emit debug info for param declaration. + // Emit debug info for param declarations in non-thunk functions. if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().hasReducedDebugInfo() && !CurFuncIsThunk) { DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder); } } @@ -2529,10 +2525,11 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, } void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, - CodeGenFunction *CGF) { - if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || LangOpts.OpenMPSimd || + (!LangOpts.EmitAllDecls && !D->isUsed())) return; - // FIXME: need to implement mapper code generation + getOpenMPRuntime().emitUserDefinedMapper(D, CGF); } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp index 7a0605b8450a..3baa0a080f5d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGDeclCXX.cpp @@ -10,11 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" +#include "CodeGenFunction.h" #include "TargetInfo.h" +#include "clang/AST/Attr.h" #include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" @@ -54,10 +55,11 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, CGF.EmitComplexExprIntoLValue(Init, lv, /*isInit*/ true); return; case TEK_Aggregate: - CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::DoesNotOverlap)); + CGF.EmitAggExpr(Init, + AggValueSlot::forLValue(lv, CGF, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return; } llvm_unreachable("bad evaluation kind"); @@ -73,16 +75,10 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, // that isn't balanced out by a destructor call as intended by the // attribute. This also checks for -fno-c++-static-destructors and // bails even if the attribute is not present. - if (D.isNoDestroy(CGF.getContext())) - return; - - CodeGenModule &CGM = CGF.CGM; + QualType::DestructionKind DtorKind = D.needsDestruction(CGF.getContext()); // FIXME: __attribute__((cleanup)) ? - QualType Type = D.getType(); - QualType::DestructionKind DtorKind = Type.isDestructedType(); - switch (DtorKind) { case QualType::DK_none: return; @@ -101,6 +97,9 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, llvm::FunctionCallee Func; llvm::Constant *Argument; + CodeGenModule &CGM = CGF.CGM; + QualType Type = D.getType(); + // Special-case non-array C++ destructors, if they have the right signature. // Under some ABIs, destructors return this instead of void, and cannot be // passed directly to __cxa_atexit if the target does not allow this @@ -251,8 +250,8 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. - if (llvm::Function *dtorFn = - dyn_cast<llvm::Function>(dtor.getCallee()->stripPointerCasts())) + if (auto *dtorFn = dyn_cast<llvm::Function>( + dtor.getCallee()->stripPointerCastsAndAliases())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); @@ -440,7 +439,7 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // that are of class type, cannot have a non-empty constructor. All // the checks have been done in Sema by now. Whatever initializers // are allowed are empty and we just need to ignore them here. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit && (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDASharedAttr>())) return; @@ -611,6 +610,11 @@ CodeGenModule::EmitCXXGlobalInitFunc() { Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); } + if (getLangOpts().HIP) { + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + Fn->addFnAttr("device-init"); + } + CXXGlobalInits.clear(); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp index 3b7a88a0b769..53fafab3e0e6 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGException.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGObjCRuntime.h" +#include "CodeGenFunction.h" #include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/Mangle.h" @@ -21,8 +21,9 @@ #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetBuiltins.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/SaveAndRestore.h" using namespace clang; @@ -165,10 +166,7 @@ static const EHPersonality &getCXXPersonality(const TargetInfo &Target, return EHPersonality::GNU_CPlusPlus; if (L.SEHExceptions) return EHPersonality::GNU_CPlusPlus_SEH; - // Wasm EH is a non-MVP feature for now. - if (Target.hasFeature("exception-handling") && - (T.getArch() == llvm::Triple::wasm32 || - T.getArch() == llvm::Triple::wasm64)) + if (L.WasmExceptions) return EHPersonality::GNU_Wasm_CPlusPlus; return EHPersonality::GNU_CPlusPlus; } @@ -1774,7 +1772,8 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, // EH registration is passed in as the EBP physical register. We can // recover that with llvm.frameaddress(1). EntryFP = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::frameaddress), {Builder.getInt32(1)}); + CGM.getIntrinsic(llvm::Intrinsic::frameaddress, AllocaInt8PtrTy), + {Builder.getInt32(1)}); } else { // Otherwise, for x64 and 32-bit finally functions, the parent FP is the // second parameter. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp index b6c2567bd578..8e0604181fb1 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp @@ -66,7 +66,7 @@ Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, const Twine &Name, llvm::Value *ArraySize) { auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); - Alloca->setAlignment(Align.getQuantity()); + Alloca->setAlignment(Align.getAsAlign()); return Address(Alloca, Align); } @@ -126,7 +126,7 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) { assert(isa<llvm::AllocaInst>(Var.getPointer())); auto *Store = new llvm::StoreInst(Init, Var.getPointer()); - Store->setAlignment(Var.getAlignment().getQuantity()); + Store->setAlignment(Var.getAlignment().getAsAlign()); llvm::BasicBlock *Block = AllocaInsertPt->getParent(); Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); } @@ -392,7 +392,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, llvm::GlobalValue::NotThreadLocal, CGF.getContext().getTargetAddressSpace(AS)); CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty); - GV->setAlignment(alignment.getQuantity()); + GV->setAlignment(alignment.getAsAlign()); llvm::Constant *C = GV; if (AS != LangAS::Default) C = TCG.performAddrSpaceCast( @@ -417,7 +417,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, LValue CodeGenFunction:: EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { - const Expr *E = M->GetTemporaryExpr(); + const Expr *E = M->getSubExpr(); assert((!M->getExtendingDecl() || !isa<VarDecl>(M->getExtendingDecl()) || !cast<VarDecl>(M->getExtendingDecl())->isARCPseudoStrong()) && @@ -516,13 +516,13 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Avoid creating a conditional cleanup just to hold an llvm.lifetime.end // marker. Instead, start the lifetime of a conditional temporary earlier - // so that it's unconditional. Don't do this in ASan's use-after-scope - // mode so that it gets the more precise lifetime marks. If the type has - // a non-trivial destructor, we'll have a cleanup block for it anyway, - // so this typically doesn't help; skip it in that case. + // so that it's unconditional. Don't do this with sanitizers which need + // more precise lifetime marks. ConditionalEvaluation *OldConditional = nullptr; CGBuilderTy::InsertPoint OldIP; if (isInConditionalBranch() && !E->getType().isDestructedType() && + !SanOpts.has(SanitizerKind::HWAddress) && + !SanOpts.has(SanitizerKind::Memory) && !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) { OldConditional = OutermostConditional; OutermostConditional = nullptr; @@ -573,7 +573,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { LV = EmitLValueForField(LV, Adjustment.Field); assert(LV.isSimple() && "materialized temporary field is not a simple lvalue"); - Object = LV.getAddress(); + Object = LV.getAddress(*this); break; } @@ -594,7 +594,7 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E) { // Emit the expression as an lvalue. LValue LV = EmitLValue(E); assert(LV.isSimple()); - llvm::Value *Value = LV.getPointer(); + llvm::Value *Value = LV.getPointer(*this); if (sanitizePerformTypeCheck() && !E->getType()->isFunctionType()) { // C++11 [dcl.ref]p5 (as amended by core issue 453): @@ -677,8 +677,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // Quickly determine whether we have a pointer to an alloca. It's possible // to skip null checks, and some alignment checks, for these pointers. This // can reduce compile-time significantly. - auto PtrToAlloca = - dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases()); + auto PtrToAlloca = dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCasts()); llvm::Value *True = llvm::ConstantInt::getTrue(getLLVMContext()); llvm::Value *IsNonNull = nullptr; @@ -998,7 +997,7 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, // Add the inc/dec to the real part. NextVal = Builder.CreateAdd(InVal.first, NextVal, isInc ? "inc" : "dec"); } else { - QualType ElemTy = E->getType()->getAs<ComplexType>()->getElementType(); + QualType ElemTy = E->getType()->castAs<ComplexType>()->getElementType(); llvm::APFloat FVal(getContext().getFloatTypeSemantics(ElemTy), 1); if (!isInc) FVal.changeSign(); @@ -1012,6 +1011,9 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, // Store the updated result through the lvalue. EmitStoreOfComplex(IncVal, LV, /*init*/ false); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getSubExpr()); // If this is a postinc, return the value read from memory, otherwise use the // updated value. @@ -1130,7 +1132,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, LValue LV = EmitLValue(UO->getSubExpr()); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); - return LV.getAddress(); + return LV.getAddress(*this); } } @@ -1220,8 +1222,8 @@ LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase())) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), - E->getType(), LV.getAlignment(), SkippedChecks); + EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(*this), E->getType(), + LV.getAlignment(), SkippedChecks); } return LV; } @@ -1268,6 +1270,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXOperatorCallExprClass: case Expr::UserDefinedLiteralClass: return EmitCallExprLValue(cast<CallExpr>(E)); + case Expr::CXXRewrittenBinaryOperatorClass: + return EmitLValue(cast<CXXRewrittenBinaryOperator>(E)->getSemanticForm()); case Expr::VAArgExprClass: return EmitVAArgExprLValue(cast<VAArgExpr>(E)); case Expr::DeclRefExprClass: @@ -1306,7 +1310,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { if (LV.isSimple()) { // Defend against branches out of gnu statement expressions surrounded by // cleanups. - llvm::Value *V = LV.getPointer(); + llvm::Value *V = LV.getPointer(*this); Scope.ForceCleanup({&V}); return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), getContext(), LV.getBaseInfo(), LV.getTBAAInfo()); @@ -1522,7 +1526,7 @@ llvm::Value *CodeGenFunction::emitScalarConstant( llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { - return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), + return EmitLoadOfScalar(lvalue.getAddress(*this), lvalue.isVolatile(), lvalue.getType(), Loc, lvalue.getBaseInfo(), lvalue.getTBAAInfo(), lvalue.isNontemporal()); } @@ -1762,7 +1766,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit) { - EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), + EmitStoreOfScalar(value, lvalue.getAddress(*this), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal()); } @@ -1773,18 +1777,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { if (LV.isObjCWeak()) { // load of a __weak object. - Address AddrWeakObj = LV.getAddress(); + Address AddrWeakObj = LV.getAddress(*this); return RValue::get(CGM.getObjCRuntime().EmitObjCWeakRead(*this, AddrWeakObj)); } if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) { // In MRC mode, we do a load+autorelease. if (!getLangOpts().ObjCAutoRefCount) { - return RValue::get(EmitARCLoadWeak(LV.getAddress())); + return RValue::get(EmitARCLoadWeak(LV.getAddress(*this))); } // In ARC mode, we load retained and then consume the value. - llvm::Value *Object = EmitARCLoadWeakRetained(LV.getAddress()); + llvm::Value *Object = EmitARCLoadWeakRetained(LV.getAddress(*this)); Object = EmitObjCConsumeObject(LV.getType(), Object); return RValue::get(Object); } @@ -1879,8 +1883,7 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { /// Generates lvalue for partial ext_vector access. Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { Address VectorAddress = LV.getExtVectorAddress(); - const VectorType *ExprVT = LV.getType()->getAs<VectorType>(); - QualType EQT = ExprVT->getElementType(); + QualType EQT = LV.getType()->castAs<VectorType>()->getElementType(); llvm::Type *VectorElementTy = CGM.getTypes().ConvertType(EQT); Address CastToPointerElement = @@ -1970,9 +1973,10 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, case Qualifiers::OCL_Weak: if (isInit) // Initialize and then skip the primitive store. - EmitARCInitWeak(Dst.getAddress(), Src.getScalarVal()); + EmitARCInitWeak(Dst.getAddress(*this), Src.getScalarVal()); else - EmitARCStoreWeak(Dst.getAddress(), Src.getScalarVal(), /*ignore*/ true); + EmitARCStoreWeak(Dst.getAddress(*this), Src.getScalarVal(), + /*ignore*/ true); return; case Qualifiers::OCL_Autoreleasing: @@ -1985,7 +1989,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, if (Dst.isObjCWeak() && !Dst.isNonGC()) { // load of a __weak object. - Address LvalueDst = Dst.getAddress(); + Address LvalueDst = Dst.getAddress(*this); llvm::Value *src = Src.getScalarVal(); CGM.getObjCRuntime().EmitObjCWeakAssign(*this, src, LvalueDst); return; @@ -1993,7 +1997,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, if (Dst.isObjCStrong() && !Dst.isNonGC()) { // load of a __strong object. - Address LvalueDst = Dst.getAddress(); + Address LvalueDst = Dst.getAddress(*this); llvm::Value *src = Src.getScalarVal(); if (Dst.isObjCIvar()) { assert(Dst.getBaseIvarExp() && "BaseIvarExp is NULL"); @@ -2195,7 +2199,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, // If ivar is a structure pointer, assigning to field of // this struct follows gcc's behavior and makes it a non-ivar // writer-barrier conservatively. - ExpTy = ExpTy->getAs<PointerType>()->getPointeeType(); + ExpTy = ExpTy->castAs<PointerType>()->getPointeeType(); if (ExpTy->isRecordType()) { LV.setObjCIvar(false); return; @@ -2231,7 +2235,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, // a non-ivar write-barrier. QualType ExpTy = E->getType(); if (ExpTy->isPointerType()) - ExpTy = ExpTy->getAs<PointerType>()->getPointeeType(); + ExpTy = ExpTy->castAs<PointerType>()->getPointeeType(); if (ExpTy->isRecordType()) LV.setObjCIvar(false); } @@ -2319,8 +2323,8 @@ Address CodeGenFunction::EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo, TBAAAccessInfo *PointeeTBAAInfo) { - llvm::LoadInst *Load = Builder.CreateLoad(RefLVal.getAddress(), - RefLVal.isVolatile()); + llvm::LoadInst *Load = + Builder.CreateLoad(RefLVal.getAddress(*this), RefLVal.isVolatile()); CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo()); CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(), @@ -2362,7 +2366,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, // If it's thread_local, emit a call to its wrapper function instead. if (VD->getTLSKind() == VarDecl::TLS_Dynamic && - CGF.CGM.getCXXABI().usesThreadWrapperFunction()) + CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD)) return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T); // Check if the variable is marked as declare target with link clause in // device codegen. @@ -2564,21 +2568,35 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { VD = VD->getCanonicalDecl(); if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); - else if (CapturedStmtInfo) { + if (CapturedStmtInfo) { auto I = LocalDeclMap.find(VD); if (I != LocalDeclMap.end()) { + LValue CapLVal; if (VD->getType()->isReferenceType()) - return EmitLoadOfReferenceLValue(I->second, VD->getType(), - AlignmentSource::Decl); - return MakeAddrLValue(I->second, T); + CapLVal = EmitLoadOfReferenceLValue(I->second, VD->getType(), + AlignmentSource::Decl); + else + CapLVal = MakeAddrLValue(I->second, T); + // Mark lvalue as nontemporal if the variable is marked as nontemporal + // in simd context. + if (getLangOpts().OpenMP && + CGM.getOpenMPRuntime().isNontemporalDecl(VD)) + CapLVal.setNontemporal(/*Value=*/true); + return CapLVal; } LValue CapLVal = EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), CapturedStmtInfo->getContextValue()); - return MakeAddrLValue( - Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)), + CapLVal = MakeAddrLValue( + Address(CapLVal.getPointer(*this), getContext().getDeclAlign(VD)), CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl), CapLVal.getTBAAInfo()); + // Mark lvalue as nontemporal if the variable is marked as nontemporal + // in simd context. + if (getLangOpts().OpenMP && + CGM.getOpenMPRuntime().isNontemporalDecl(VD)) + CapLVal.setNontemporal(/*Value=*/true); + return CapLVal; } assert(isa<BlockDecl>(CurCodeDecl)); @@ -2711,7 +2729,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { // __real is valid on scalars. This is a faster way of testing that. // __imag can only produce an rvalue on scalars. if (E->getOpcode() == UO_Real && - !LV.getAddress().getElementType()->isStructTy()) { + !LV.getAddress(*this).getElementType()->isStructTy()) { assert(E->getSubExpr()->getType()->isArithmeticType()); return LV; } @@ -2719,9 +2737,9 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { QualType T = ExprTy->castAs<ComplexType>()->getElementType(); Address Component = - (E->getOpcode() == UO_Real - ? emitAddrOfRealComponent(LV.getAddress(), LV.getType()) - : emitAddrOfImagComponent(LV.getAddress(), LV.getType())); + (E->getOpcode() == UO_Real + ? emitAddrOfRealComponent(LV.getAddress(*this), LV.getType()) + : emitAddrOfImagComponent(LV.getAddress(*this), LV.getType())); LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, T)); ElemLV.getQuals().addQualifiers(LV.getQuals()); @@ -3199,6 +3217,9 @@ void CodeGenFunction::EmitCfiCheckFail() { llvm::Function *F = llvm::Function::Create( llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy}, false), llvm::GlobalValue::WeakODRLinkage, "__cfi_check_fail", &CGM.getModule()); + + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F); + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F); F->setVisibility(llvm::GlobalValue::HiddenVisibility); StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args, @@ -3318,7 +3339,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, // Expressions of array type can't be bitfields or vector elements. LValue LV = EmitLValue(E); - Address Addr = LV.getAddress(); + Address Addr = LV.getAddress(*this); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -3401,10 +3422,48 @@ static QualType getFixedSizeElementType(const ASTContext &ctx, return eltType; } +/// Given an array base, check whether its member access belongs to a record +/// with preserve_access_index attribute or not. +static bool IsPreserveAIArrayBase(CodeGenFunction &CGF, const Expr *ArrayBase) { + if (!ArrayBase || !CGF.getDebugInfo()) + return false; + + // Only support base as either a MemberExpr or DeclRefExpr. + // DeclRefExpr to cover cases like: + // struct s { int a; int b[10]; }; + // struct s *p; + // p[1].a + // p[1] will generate a DeclRefExpr and p[1].a is a MemberExpr. + // p->b[5] is a MemberExpr example. + const Expr *E = ArrayBase->IgnoreImpCasts(); + if (const auto *ME = dyn_cast<MemberExpr>(E)) + return ME->getMemberDecl()->hasAttr<BPFPreserveAccessIndexAttr>(); + + if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) { + const auto *VarDef = dyn_cast<VarDecl>(DRE->getDecl()); + if (!VarDef) + return false; + + const auto *PtrT = VarDef->getType()->getAs<PointerType>(); + if (!PtrT) + return false; + + const auto *PointeeT = PtrT->getPointeeType() + ->getUnqualifiedDesugaredType(); + if (const auto *RecT = dyn_cast<RecordType>(PointeeT)) + return RecT->getDecl()->hasAttr<BPFPreserveAccessIndexAttr>(); + return false; + } + + return false; +} + static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, ArrayRef<llvm::Value *> indices, QualType eltType, bool inbounds, bool signedIndices, SourceLocation loc, + QualType *arrayType = nullptr, + const Expr *Base = nullptr, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG @@ -3426,16 +3485,21 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, llvm::Value *eltPtr; auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back()); - if (!CGF.IsInPreservedAIRegion || !LastIndex) { + if (!LastIndex || + (!CGF.IsInPreservedAIRegion && !IsPreserveAIArrayBase(CGF, Base))) { eltPtr = emitArraySubscriptGEP( CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name); } else { // Remember the original array subscript for bpf target unsigned idx = LastIndex->getZExtValue(); - eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getPointer(), + llvm::DIType *DbgInfo = nullptr; + if (arrayType) + DbgInfo = CGF.getDebugInfo()->getOrCreateStandaloneType(*arrayType, loc); + eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getElementType(), + addr.getPointer(), indices.size() - 1, - idx); + idx, DbgInfo); } return Address(eltPtr, eltAlign); @@ -3478,8 +3542,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); - return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), - LHS.getBaseInfo(), TBAAAccessInfo()); + return LValue::MakeVectorElt(LHS.getAddress(*this), Idx, + E->getBase()->getType(), LHS.getBaseInfo(), + TBAAAccessInfo()); } // All the other cases basically behave like simple offsetting. @@ -3572,19 +3637,22 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, auto *Idx = EmitIdxAfterBase(/*Promote*/true); // Propagate the alignment from the array itself to the result. + QualType arrayType = Array->getType(); Addr = emitArraySubscriptGEP( - *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + *this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx}, E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, - E->getExprLoc()); + E->getExprLoc(), &arrayType, E->getBase()); EltBaseInfo = ArrayLV.getBaseInfo(); EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType()); } else { // The base must be a pointer; emit it with an estimate of its alignment. Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + QualType ptrType = E->getBase()->getType(); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), !getLangOpts().isSignedOverflowDefined(), - SignedIndices, E->getExprLoc()); + SignedIndices, E->getExprLoc(), &ptrType, + E->getBase()); } LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); @@ -3606,7 +3674,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, if (auto *ASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParenImpCasts())) { BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); if (BaseTy->isArrayType()) { - Address Addr = BaseLVal.getAddress(); + Address Addr = BaseLVal.getAddress(CGF); BaseInfo = BaseLVal.getBaseInfo(); // If the array type was an incomplete type, we need to make sure @@ -3631,7 +3699,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, &TypeTBAAInfo); BaseInfo.mergeForCast(TypeBaseInfo); TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo); - return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); + return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress(CGF)), Align); } return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); } @@ -3772,7 +3840,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, // Propagate the alignment from the array itself to the result. EltPtr = emitArraySubscriptGEP( - *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + *this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx}, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), /*signedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); @@ -3801,7 +3869,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); - const PointerType *PT = E->getBase()->getType()->getAs<PointerType>(); + const auto *PT = E->getBase()->getType()->castAs<PointerType>(); Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo, TBAAInfo); Base.getQuals().removeObjCGCAttr(); } else if (E->getBase()->isGLValue()) { @@ -3832,7 +3900,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { if (Base.isSimple()) { llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - return LValue::MakeExtVectorElt(Base.getAddress(), CV, type, + return LValue::MakeExtVectorElt(Base.getAddress(*this), CV, type, Base.getBaseInfo(), TBAAAccessInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -3877,6 +3945,15 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { if (auto *Field = dyn_cast<FieldDecl>(ND)) { LValue LV = EmitLValueForField(BaseLV, Field); setObjCGCLValueClass(getContext(), E, LV); + if (getLangOpts().OpenMP) { + // If the member was explicitly marked as nontemporal, mark it as + // nontemporal. If the base lvalue is marked as nontemporal, mark access + // to children as nontemporal too. + if ((IsWrappedCXXThis(BaseExpr) && + CGM.getOpenMPRuntime().isNontemporalDecl(Field)) || + BaseLV.isNontemporal()) + LV.setNontemporal(/*Value=*/true); + } return LV; } @@ -3983,11 +4060,22 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(field->getParent()); const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); - Address Addr = base.getAddress(); + Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + const RecordDecl *rec = field->getParent(); + if (!IsInPreservedAIRegion && + (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, + getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); + } + // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); @@ -4040,7 +4128,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, getContext().getTypeSizeInChars(FieldType).getQuantity(); } - Address addr = base.getAddress(); + Address addr = base.getAddress(*this); if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) { if (CGM.getCodeGenOpts().StrictVTablePointers && ClassDef->isDynamicClass()) { @@ -4056,7 +4144,6 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, unsigned RecordCVR = base.getVRQualifiers(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. - assert(!FieldType->isReferenceType() && "union has reference member"); if (CGM.getCodeGenOpts().StrictVTablePointers && hasAnyVptr(FieldType, getContext())) // Because unions can easily skip invariant.barriers, we need to add @@ -4064,7 +4151,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()), addr.getAlignment()); - if (IsInPreservedAIRegion) { + if (IsInPreservedAIRegion || + (getDebugInfo() && rec->hasAttr<BPFPreserveAccessIndexAttr>())) { // Remember the original union field index llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( getContext().getRecordType(rec), rec->getLocation()); @@ -4073,27 +4161,31 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), addr.getAlignment()); } - } else { - if (!IsInPreservedAIRegion) + if (FieldType->isReferenceType()) + addr = Builder.CreateElementBitCast( + addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); + } else { + if (!IsInPreservedAIRegion && + (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) // For structs, we GEP to the field that the record layout suggests. addr = emitAddrOfFieldStorage(*this, addr, field); else // Remember the original struct field index addr = emitPreserveStructAccess(*this, addr, field); + } - // If this is a reference field, load the reference right now. - if (FieldType->isReferenceType()) { - LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo, - FieldTBAAInfo); - if (RecordCVR & Qualifiers::Volatile) - RefLVal.getQuals().addVolatile(); - addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); - - // Qualifiers on the struct don't apply to the referencee. - RecordCVR = 0; - FieldType = FieldType->getPointeeType(); - } + // If this is a reference field, load the reference right now. + if (FieldType->isReferenceType()) { + LValue RefLVal = + MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); + if (RecordCVR & Qualifiers::Volatile) + RefLVal.getQuals().addVolatile(); + addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); + + // Qualifiers on the struct don't apply to the referencee. + RecordCVR = 0; + FieldType = FieldType->getPointeeType(); } // Make sure that the address is pointing to the right type. This is critical @@ -4124,7 +4216,7 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, if (!FieldType->isReferenceType()) return EmitLValueForField(Base, Field); - Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field); + Address V = emitAddrOfFieldStorage(*this, Base.getAddress(*this), Field); // Make sure that the address is pointing to the right type. llvm::Type *llvmType = ConvertTypeForMem(FieldType); @@ -4242,10 +4334,10 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { EmitBlock(contBlock); if (lhs && rhs) { - llvm::PHINode *phi = Builder.CreatePHI(lhs->getPointer()->getType(), - 2, "cond-lvalue"); - phi->addIncoming(lhs->getPointer(), lhsBlock); - phi->addIncoming(rhs->getPointer(), rhsBlock); + llvm::PHINode *phi = + Builder.CreatePHI(lhs->getPointer(*this)->getType(), 2, "cond-lvalue"); + phi->addIncoming(lhs->getPointer(*this), lhsBlock); + phi->addIncoming(rhs->getPointer(*this), rhsBlock); Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment())); AlignmentSource alignSource = std::max(lhs->getBaseInfo().getAlignmentSource(), @@ -4328,7 +4420,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_Dynamic: { LValue LV = EmitLValue(E->getSubExpr()); - Address V = LV.getAddress(); + Address V = LV.getAddress(*this); const auto *DCE = cast<CXXDynamicCastExpr>(E); return MakeNaturalAlignAddrLValue(EmitDynamicCast(V, DCE), E->getType()); } @@ -4343,12 +4435,12 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { - const RecordType *DerivedClassTy = - E->getSubExpr()->getType()->getAs<RecordType>(); + const auto *DerivedClassTy = + E->getSubExpr()->getType()->castAs<RecordType>(); auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl()); LValue LV = EmitLValue(E->getSubExpr()); - Address This = LV.getAddress(); + Address This = LV.getAddress(*this); // Perform the derived-to-base conversion Address Base = GetAddressOfBaseClass( @@ -4364,16 +4456,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_ToUnion: return EmitAggExprToLValue(E); case CK_BaseToDerived: { - const RecordType *DerivedClassTy = E->getType()->getAs<RecordType>(); + const auto *DerivedClassTy = E->getType()->castAs<RecordType>(); auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl()); LValue LV = EmitLValue(E->getSubExpr()); // Perform the base-to-derived conversion - Address Derived = - GetAddressOfDerivedClass(LV.getAddress(), DerivedClassDecl, - E->path_begin(), E->path_end(), - /*NullCheckValue=*/false); + Address Derived = GetAddressOfDerivedClass( + LV.getAddress(*this), DerivedClassDecl, E->path_begin(), E->path_end(), + /*NullCheckValue=*/false); // C++11 [expr.static.cast]p2: Behavior is undefined if a downcast is // performed and the object is not of the derived type. @@ -4395,7 +4486,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { CGM.EmitExplicitCastExprType(CE, this); LValue LV = EmitLValue(E->getSubExpr()); - Address V = Builder.CreateBitCast(LV.getAddress(), + Address V = Builder.CreateBitCast(LV.getAddress(*this), ConvertType(CE->getTypeAsWritten())); if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) @@ -4410,14 +4501,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { LValue LV = EmitLValue(E->getSubExpr()); QualType DestTy = getContext().getPointerType(E->getType()); llvm::Value *V = getTargetHooks().performAddrSpaceCast( - *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(), + *this, LV.getPointer(*this), + E->getSubExpr()->getType().getAddressSpace(), E->getType().getAddressSpace(), ConvertType(DestTy)); - return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()), + return MakeAddrLValue(Address(V, LV.getAddress(*this).getAlignment()), E->getType(), LV.getBaseInfo(), LV.getTBAAInfo()); } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); - Address V = Builder.CreateElementBitCast(LV.getAddress(), + Address V = Builder.CreateElementBitCast(LV.getAddress(*this), ConvertType(E->getType())); return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); @@ -4471,13 +4563,17 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV, case TEK_Complex: return RValue::getComplex(EmitLoadOfComplex(FieldLV, Loc)); case TEK_Aggregate: - return FieldLV.asAggregateRValue(); + return FieldLV.asAggregateRValue(*this); case TEK_Scalar: // This routine is used to load fields one-by-one to perform a copy, so // don't load reference fields. if (FD->getType()->isReferenceType()) - return RValue::get(FieldLV.getPointer()); - return EmitLoadOfLValue(FieldLV, Loc); + return RValue::get(FieldLV.getPointer(*this)); + // Call EmitLoadOfScalar except when the lvalue is a bitfield to emit a + // primitive load. + if (FieldLV.isBitField()) + return EmitLoadOfLValue(FieldLV, Loc); + return RValue::get(EmitLoadOfScalar(FieldLV, Loc)); } llvm_unreachable("bad evaluation kind"); } @@ -4525,8 +4621,15 @@ RValue CodeGenFunction::EmitSimpleCallExpr(const CallExpr *E, } static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) { + if (auto builtinID = FD->getBuiltinID()) { - return CGCallee::forBuiltin(builtinID, FD); + // Replaceable builtin provide their own implementation of a builtin. Unless + // we are in the builtin implementation itself, don't call the actual + // builtin. If we are in the builtin implementation, avoid trivial infinite + // recursion. + if (!FD->isInlineBuiltinDeclaration() || + CGF.CurFn->getName() == FD->getName()) + return CGCallee::forBuiltin(builtinID, FD); } llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD); @@ -4571,7 +4674,7 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { functionType = ptrType->getPointeeType(); } else { functionType = E->getType(); - calleePtr = EmitLValue(E).getPointer(); + calleePtr = EmitLValue(E).getPointer(*this); } assert(functionType->isFunctionType()); @@ -4623,6 +4726,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); EmitStoreThroughLValue(RV, LV); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getLHS()); return LV; } @@ -4731,7 +4837,7 @@ LValue CodeGenFunction::EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E) { BaseQuals = ObjectTy.getQualifiers(); } else { LValue BaseLV = EmitLValue(BaseExpr); - BaseValue = BaseLV.getPointer(); + BaseValue = BaseLV.getPointer(*this); ObjectTy = BaseExpr->getType(); BaseQuals = ObjectTy.getQualifiers(); } @@ -4941,13 +5047,11 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { if (E->getOpcode() == BO_PtrMemI) { BaseAddr = EmitPointerWithAlignment(E->getLHS()); } else { - BaseAddr = EmitLValue(E->getLHS()).getAddress(); + BaseAddr = EmitLValue(E->getLHS()).getAddress(*this); } llvm::Value *OffsetV = EmitScalarExpr(E->getRHS()); - - const MemberPointerType *MPT - = E->getRHS()->getType()->getAs<MemberPointerType>(); + const auto *MPT = E->getRHS()->getType()->castAs<MemberPointerType>(); LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; @@ -4968,7 +5072,7 @@ RValue CodeGenFunction::convertTempToRValue(Address addr, case TEK_Complex: return RValue::getComplex(EmitLoadOfComplex(lvalue, loc)); case TEK_Aggregate: - return lvalue.asAggregateRValue(); + return lvalue.asAggregateRValue(*this); case TEK_Scalar: return RValue::get(EmitLoadOfScalar(lvalue, loc)); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp index 0a57870a7c58..8de609a2ccd9 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp @@ -10,20 +10,21 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/StmtVisitor.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" using namespace clang; using namespace CodeGen; @@ -150,6 +151,9 @@ public: void VisitBinAssign(const BinaryOperator *E); void VisitBinComma(const BinaryOperator *E); void VisitBinCmp(const BinaryOperator *E); + void VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { + Visit(E->getSemanticForm()); + } void VisitObjCMessageExpr(ObjCMessageExpr *E); void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) { @@ -342,10 +346,9 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src, } } - AggValueSlot srcAgg = - AggValueSlot::forLValue(src, AggValueSlot::IsDestructed, - needsGC(type), AggValueSlot::IsAliased, - AggValueSlot::MayOverlap); + AggValueSlot srcAgg = AggValueSlot::forLValue( + src, CGF, AggValueSlot::IsDestructed, needsGC(type), + AggValueSlot::IsAliased, AggValueSlot::MayOverlap); EmitCopy(type, Dest, srcAgg); } @@ -383,7 +386,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) { ASTContext &Ctx = CGF.getContext(); LValue Array = CGF.EmitLValue(E->getSubExpr()); assert(Array.isSimple() && "initializer_list array not a simple lvalue"); - Address ArrayPtr = Array.getAddress(); + Address ArrayPtr = Array.getAddress(CGF); const ConstantArrayType *ArrayType = Ctx.getAsConstantArrayType(E->getSubExpr()->getType()); @@ -490,7 +493,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, if (NumInitElements * elementSize.getQuantity() > 16 && elementType.isTriviallyCopyableType(CGF.getContext())) { CodeGen::CodeGenModule &CGM = CGF.CGM; - ConstantEmitter Emitter(CGM); + ConstantEmitter Emitter(CGF); LangAS AS = ArrayQTy.getAddressSpace(); if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) { auto GV = new llvm::GlobalVariable( @@ -501,7 +504,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CGM.getContext().getTargetAddressSpace(AS)); Emitter.finalize(GV); CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align)); return; } @@ -634,7 +637,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, //===----------------------------------------------------------------------===// void AggExprEmitter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E){ - Visit(E->GetTemporaryExpr()); + Visit(E->getSubExpr()); } void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) { @@ -685,7 +688,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { CodeGenFunction::TCK_Load); // FIXME: Do we also need to handle property references here? if (LV.isSimple()) - CGF.EmitDynamicCast(LV.getAddress(), cast<CXXDynamicCastExpr>(E)); + CGF.EmitDynamicCast(LV.getAddress(CGF), cast<CXXDynamicCastExpr>(E)); else CGF.CGM.ErrorUnsupported(E, "non-simple lvalue dynamic_cast"); @@ -720,7 +723,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { LValue SourceLV = CGF.EmitLValue(E->getSubExpr()); Address SourceAddress = - Builder.CreateElementBitCast(SourceLV.getAddress(), CGF.Int8Ty); + Builder.CreateElementBitCast(SourceLV.getAddress(CGF), CGF.Int8Ty); Address DestAddress = Builder.CreateElementBitCast(Dest.getAddress(), CGF.Int8Ty); llvm::Value *SizeVal = llvm::ConstantInt::get( @@ -978,10 +981,6 @@ void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) { QualType ArgTy = E->getLHS()->getType(); - // TODO: Handle comparing these types. - if (ArgTy->isVectorType()) - return CGF.ErrorUnsupported( - E, "aggregate three-way comparison with vector arguments"); if (!ArgTy->isIntegralOrEnumerationType() && !ArgTy->isRealFloatingType() && !ArgTy->isNullPtrType() && !ArgTy->isPointerType() && !ArgTy->isMemberPointerType() && !ArgTy->isAnyComplexType()) { @@ -1019,10 +1018,6 @@ void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) { Value *Select; if (ArgTy->isNullPtrType()) { Select = EmitCmpRes(CmpInfo.getEqualOrEquiv()); - } else if (CmpInfo.isEquality()) { - Select = Builder.CreateSelect( - EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()), - EmitCmpRes(CmpInfo.getNonequalOrNonequiv()), "sel.eq"); } else if (!CmpInfo.isPartial()) { Value *SelectOne = Builder.CreateSelect(EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()), @@ -1160,7 +1155,7 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { } EmitCopy(E->getLHS()->getType(), - AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, + AggValueSlot::forLValue(LHS, CGF, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), AggValueSlot::IsAliased, AggValueSlot::MayOverlap), @@ -1181,11 +1176,9 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { } // Codegen the RHS so that it stores directly into the LHS. - AggValueSlot LHSSlot = - AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, - needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased, - AggValueSlot::MayOverlap); + AggValueSlot LHSSlot = AggValueSlot::forLValue( + LHS, CGF, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), + AggValueSlot::IsAliased, AggValueSlot::MayOverlap); // A non-volatile aggregate destination might have volatile member. if (!LHSSlot.isVolatile() && CGF.hasVolatileMember(E->getLHS()->getType())) @@ -1317,7 +1310,7 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { llvm::Constant::getNullValue(CGF.Int8PtrTy), CharUnits::One()); // placeholder - CGF.pushDestroy(EHCleanup, LV.getAddress(), CurField->getType(), + CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), CurField->getType(), CGF.getDestroyer(DtorKind), false); Cleanups.push_back(CGF.EHStack.stable_begin()); } @@ -1405,12 +1398,11 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) { CGF.EmitComplexExprIntoLValue(E, LV, /*isInit*/ true); return; case TEK_Aggregate: - CGF.EmitAggExpr(E, AggValueSlot::forLValue(LV, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::MayOverlap, - Dest.isZeroed())); + CGF.EmitAggExpr( + E, AggValueSlot::forLValue(LV, CGF, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap, Dest.isZeroed())); return; case TEK_Scalar: if (LV.isSimple()) { @@ -1446,7 +1438,7 @@ void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) { // There's a potential optimization opportunity in combining // memsets; that would be easy for arrays, but relatively // difficult for structures with the current code. - CGF.EmitNullInitialization(lv.getAddress(), lv.getType()); + CGF.EmitNullInitialization(lv.getAddress(CGF), lv.getType()); } } @@ -1603,7 +1595,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { = field->getType().isDestructedType()) { assert(LV.isSimple()); if (CGF.needsEHCleanup(dtorKind)) { - CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(), + CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), field->getType(), CGF.getDestroyer(dtorKind), false); addCleanup(CGF.EHStack.stable_begin()); pushedCleanup = true; @@ -1614,7 +1606,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // else, clean it up for -O0 builds and general tidiness. if (!pushedCleanup && LV.isSimple()) if (llvm::GetElementPtrInst *GEP = - dyn_cast<llvm::GetElementPtrInst>(LV.getPointer())) + dyn_cast<llvm::GetElementPtrInst>(LV.getPointer(CGF))) if (GEP->use_empty()) GEP->eraseFromParent(); } @@ -1696,9 +1688,8 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, if (InnerLoop) { // If the subexpression is an ArrayInitLoopExpr, share its cleanup. auto elementSlot = AggValueSlot::forLValue( - elementLV, AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, + elementLV, CGF, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap); AggExprEmitter(CGF, elementSlot, false) .VisitArrayInitLoopExpr(InnerLoop, outerBegin); @@ -1759,7 +1750,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { // referencee. InitListExprs for unions and arrays can't have references. if (const RecordType *RT = E->getType()->getAs<RecordType>()) { if (!RT->isUnionType()) { - RecordDecl *SD = E->getType()->getAs<RecordType>()->getDecl(); + RecordDecl *SD = RT->getDecl(); CharUnits NumNonZeroBytes = CharUnits::Zero(); unsigned ILEElement = 0; @@ -1861,10 +1852,10 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { assert(hasAggregateEvaluationKind(E->getType()) && "Invalid argument!"); Address Temp = CreateMemTemp(E->getType()); LValue LV = MakeAddrLValue(Temp, E->getType()); - EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::DoesNotOverlap)); + EmitAggExpr(E, AggValueSlot::forLValue( + LV, *this, AggValueSlot::IsNotDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap)); return LV; } @@ -1913,8 +1904,8 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, bool isVolatile) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); - Address DestPtr = Dest.getAddress(); - Address SrcPtr = Src.getAddress(); + Address DestPtr = Dest.getAddress(*this); + Address SrcPtr = Src.getAddress(*this); if (getLangOpts().CPlusPlus) { if (const RecordType *RT = Ty->getAs<RecordType>()) { diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp index 5476d13b7c46..42c1c34c57ad 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprCXX.cpp @@ -129,11 +129,11 @@ RValue CodeGenFunction::EmitCXXPseudoDestructorExpr( // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. if (E->isArrow()) { BaseValue = EmitPointerWithAlignment(BaseExpr); - const PointerType *PTy = BaseExpr->getType()->getAs<PointerType>(); + const auto *PTy = BaseExpr->getType()->castAs<PointerType>(); BaseQuals = PTy->getPointeeType().getQualifiers(); } else { LValue BaseLV = EmitLValue(BaseExpr); - BaseValue = BaseLV.getAddress(); + BaseValue = BaseLV.getAddress(*this); QualType BaseTy = BaseExpr->getType(); BaseQuals = BaseTy.getQualifiers(); } @@ -241,16 +241,28 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( } } + bool TrivialForCodegen = + MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion()); + bool TrivialAssignment = + TrivialForCodegen && + (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) && + !MD->getParent()->mayInsertExtraPadding(); + // C++17 demands that we evaluate the RHS of a (possibly-compound) assignment // operator before the LHS. CallArgList RtlArgStorage; CallArgList *RtlArgs = nullptr; + LValue TrivialAssignmentRHS; if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(CE)) { if (OCE->isAssignmentOp()) { - RtlArgs = &RtlArgStorage; - EmitCallArgs(*RtlArgs, MD->getType()->castAs<FunctionProtoType>(), - drop_begin(CE->arguments(), 1), CE->getDirectCallee(), - /*ParamsToSkip*/0, EvaluationOrder::ForceRightToLeft); + if (TrivialAssignment) { + TrivialAssignmentRHS = EmitLValue(CE->getArg(1)); + } else { + RtlArgs = &RtlArgStorage; + EmitCallArgs(*RtlArgs, MD->getType()->castAs<FunctionProtoType>(), + drop_begin(CE->arguments(), 1), CE->getDirectCallee(), + /*ParamsToSkip*/0, EvaluationOrder::ForceRightToLeft); + } } } @@ -271,32 +283,35 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); CallArgList Args; commonEmitCXXMemberOrOperatorCall( - *this, Ctor, This.getPointer(), /*ImplicitParam=*/nullptr, + *this, Ctor, This.getPointer(*this), /*ImplicitParam=*/nullptr, /*ImplicitParamTy=*/QualType(), CE, Args, nullptr); EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, This.getAddress(), Args, + /*Delegating=*/false, This.getAddress(*this), Args, AggValueSlot::DoesNotOverlap, CE->getExprLoc(), /*NewPointerIsChecked=*/false); return RValue::get(nullptr); } - if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) { - if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr); - if (!MD->getParent()->mayInsertExtraPadding()) { - if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) { - // We don't like to generate the trivial copy/move assignment operator - // when it isn't necessary; just produce the proper effect here. - LValue RHS = isa<CXXOperatorCallExpr>(CE) - ? MakeNaturalAlignAddrLValue( - (*RtlArgs)[0].getRValue(*this).getScalarVal(), - (*(CE->arg_begin() + 1))->getType()) - : EmitLValue(*CE->arg_begin()); - EmitAggregateAssign(This, RHS, CE->getType()); - return RValue::get(This.getPointer()); - } - llvm_unreachable("unknown trivial member function"); + if (TrivialForCodegen) { + if (isa<CXXDestructorDecl>(MD)) + return RValue::get(nullptr); + + if (TrivialAssignment) { + // We don't like to generate the trivial copy/move assignment operator + // when it isn't necessary; just produce the proper effect here. + // It's important that we use the result of EmitLValue here rather than + // emitting call arguments, in order to preserve TBAA information from + // the RHS. + LValue RHS = isa<CXXOperatorCallExpr>(CE) + ? TrivialAssignmentRHS + : EmitLValue(*CE->arg_begin()); + EmitAggregateAssign(This, RHS, CE->getType()); + return RValue::get(This.getPointer(*this)); } + + assert(MD->getParent()->mayInsertExtraPadding() && + "unknown trivial member function"); } // Compute the function type we're calling. @@ -328,7 +343,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, This.getPointer(), + EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, + This.getPointer(*this), C.getRecordType(CalleeDecl->getParent()), /*Alignment=*/CharUnits::Zero(), SkippedChecks); @@ -345,9 +361,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( "Destructor shouldn't have explicit parameters"); assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); if (UseVirtualCall) { - CGM.getCXXABI().EmitVirtualDestructorCall( - *this, Dtor, Dtor_Complete, This.getAddress(), - cast<CXXMemberCallExpr>(CE)); + CGM.getCXXABI().EmitVirtualDestructorCall(*this, Dtor, Dtor_Complete, + This.getAddress(*this), + cast<CXXMemberCallExpr>(CE)); } else { GlobalDecl GD(Dtor, Dtor_Complete); CGCallee Callee; @@ -362,7 +378,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( QualType ThisTy = IsArrow ? Base->getType()->getPointeeType() : Base->getType(); - EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, + EmitCXXDestructorCall(GD, Callee, This.getPointer(*this), ThisTy, /*ImplicitParam=*/nullptr, /*ImplicitParamTy=*/QualType(), nullptr); } @@ -374,15 +390,14 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CGCallee Callee; if (UseVirtualCall) { - Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); + Callee = CGCallee::forVirtual(CE, MD, This.getAddress(*this), Ty); } else { if (SanOpts.has(SanitizerKind::CFINVCall) && MD->getParent()->isDynamicClass()) { llvm::Value *VTable; const CXXRecordDecl *RD; - std::tie(VTable, RD) = - CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(), - MD->getParent()); + std::tie(VTable, RD) = CGM.getCXXABI().LoadVTablePtr( + *this, This.getAddress(*this), CalleeDecl->getParent()); EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc()); } @@ -401,12 +416,12 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (MD->isVirtual()) { Address NewThisAddr = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall( - *this, CalleeDecl, This.getAddress(), UseVirtualCall); + *this, CalleeDecl, This.getAddress(*this), UseVirtualCall); This.setAddress(NewThisAddr); } return EmitCXXMemberOrOperatorCall( - CalleeDecl, Callee, ReturnValue, This.getPointer(), + CalleeDecl, Callee, ReturnValue, This.getPointer(*this), /*ImplicitParam=*/nullptr, QualType(), CE, RtlArgs); } @@ -418,20 +433,17 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, const Expr *BaseExpr = BO->getLHS(); const Expr *MemFnExpr = BO->getRHS(); - const MemberPointerType *MPT = - MemFnExpr->getType()->castAs<MemberPointerType>(); - - const FunctionProtoType *FPT = - MPT->getPointeeType()->castAs<FunctionProtoType>(); - const CXXRecordDecl *RD = - cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); + const auto *MPT = MemFnExpr->getType()->castAs<MemberPointerType>(); + const auto *FPT = MPT->getPointeeType()->castAs<FunctionProtoType>(); + const auto *RD = + cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); // Emit the 'this' pointer. Address This = Address::invalid(); if (BO->getOpcode() == BO_PtrMemI) This = EmitPointerWithAlignment(BaseExpr); else - This = EmitLValue(BaseExpr).getAddress(); + This = EmitLValue(BaseExpr).getAddress(*this); EmitTypeCheck(TCK_MemberCall, E->getExprLoc(), This.getPointer(), QualType(MPT->getClass(), 0)); @@ -535,7 +547,7 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF, CharUnits Align = std::max(Layout.getNonVirtualAlignment(), DestPtr.getAlignment()); - NullVariable->setAlignment(Align.getQuantity()); + NullVariable->setAlignment(Align.getAsAlign()); Address SrcPtr = Address(CGF.EmitCastToVoidPtr(NullVariable), Align); @@ -1415,8 +1427,7 @@ namespace { } void Emit(CodeGenFunction &CGF, Flags flags) override { - const FunctionProtoType *FPT = - OperatorDelete->getType()->getAs<FunctionProtoType>(); + const auto *FPT = OperatorDelete->getType()->castAs<FunctionProtoType>(); CallArgList DeleteArgs; // The first argument is always a void* (or C* for a destroying operator @@ -1758,9 +1769,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, assert((!NumElements && CookieSize.isZero()) || DeleteFD->getOverloadedOperator() == OO_Array_Delete); - const FunctionProtoType *DeleteFTy = - DeleteFD->getType()->getAs<FunctionProtoType>(); - + const auto *DeleteFTy = DeleteFD->getType()->castAs<FunctionProtoType>(); CallArgList DeleteArgs; auto Params = getUsualDeleteParams(DeleteFD); @@ -1882,9 +1891,33 @@ static void EmitObjectDelete(CodeGenFunction &CGF, Dtor = RD->getDestructor(); if (Dtor->isVirtual()) { - CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, - Dtor); - return; + bool UseVirtualCall = true; + const Expr *Base = DE->getArgument(); + if (auto *DevirtualizedDtor = + dyn_cast_or_null<const CXXDestructorDecl>( + Dtor->getDevirtualizedMethod( + Base, CGF.CGM.getLangOpts().AppleKext))) { + UseVirtualCall = false; + const CXXRecordDecl *DevirtualizedClass = + DevirtualizedDtor->getParent(); + if (declaresSameEntity(getCXXRecord(Base), DevirtualizedClass)) { + // Devirtualized to the class of the base type (the type of the + // whole expression). + Dtor = DevirtualizedDtor; + } else { + // Devirtualized to some other type. Would need to cast the this + // pointer to that type but we don't have support for that yet, so + // do a virtual call. FIXME: handle the case where it is + // devirtualized to the derived type (the type of the inner + // expression) as in EmitCXXMemberOrOperatorMemberCallExpr. + UseVirtualCall = true; + } + } + if (UseVirtualCall) { + CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, + Dtor); + return; + } } } } @@ -2082,7 +2115,7 @@ static bool isGLValueFromPointerDeref(const Expr *E) { static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, llvm::Type *StdTypeInfoPtrTy) { // Get the vtable pointer. - Address ThisPtr = CGF.EmitLValue(E).getAddress(); + Address ThisPtr = CGF.EmitLValue(E).getAddress(CGF); QualType SrcRecordTy = E->getType(); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprComplex.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprComplex.cpp index 6a5fb45ba259..f7a4e9e94712 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprComplex.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprComplex.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/StmtVisitor.h" @@ -279,6 +280,10 @@ public: return EmitBinDiv(EmitBinOps(E)); } + ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { + return Visit(E->getSemanticForm()); + } + // Compound assignments. ComplexPairTy VisitBinAddAssign(const CompoundAssignOperator *E) { return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinAdd); @@ -344,7 +349,7 @@ ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue, if (lvalue.getType()->isAtomicType()) return CGF.EmitAtomicLoad(lvalue, loc).getComplexVal(); - Address SrcPtr = lvalue.getAddress(); + Address SrcPtr = lvalue.getAddress(CGF); bool isVolatile = lvalue.isVolatileQualified(); llvm::Value *Real = nullptr, *Imag = nullptr; @@ -370,7 +375,7 @@ void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, LValue lvalue, (!isInit && CGF.LValueIsSuitableForInlineAtomic(lvalue))) return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit); - Address Ptr = lvalue.getAddress(); + Address Ptr = lvalue.getAddress(CGF); Address RealPtr = CGF.emitAddrOfRealComponent(Ptr, lvalue.getType()); Address ImagPtr = CGF.emitAddrOfImagComponent(Ptr, lvalue.getType()); @@ -459,14 +464,14 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_LValueBitCast: { LValue origLV = CGF.EmitLValue(Op); - Address V = origLV.getAddress(); + Address V = origLV.getAddress(CGF); V = Builder.CreateElementBitCast(V, CGF.ConvertType(DestTy)); return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy), Op->getExprLoc()); } case CK_LValueToRValueBitCast: { LValue SourceLVal = CGF.EmitLValue(Op); - Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(CGF), CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); @@ -1132,7 +1137,11 @@ ComplexPairTy CodeGenFunction::EmitLoadOfComplex(LValue src, LValue CodeGenFunction::EmitComplexAssignmentLValue(const BinaryOperator *E) { assert(E->getOpcode() == BO_Assign); ComplexPairTy Val; // ignored - return ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val); + LValue LVal = ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getLHS()); + return LVal; } typedef ComplexPairTy (ComplexExprEmitter::*CompoundFunc)( diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp index 31cf2aef1ba0..46ed90a20264 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprConstant.cpp @@ -10,20 +10,21 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGRecordLayout.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -659,7 +660,7 @@ static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter, } bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { - RecordDecl *RD = ILE->getType()->getAs<RecordType>()->getDecl(); + RecordDecl *RD = ILE->getType()->castAs<RecordType>()->getDecl(); const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); unsigned FieldNo = -1; @@ -839,7 +840,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, } llvm::Constant *ConstStructBuilder::Finalize(QualType Type) { - RecordDecl *RD = Type->getAs<RecordType>()->getDecl(); + RecordDecl *RD = Type->castAs<RecordType>()->getDecl(); llvm::Type *ValTy = CGM.getTypes().ConvertType(Type); return Builder.build(ValTy, RD->hasFlexibleArrayMember()); } @@ -907,7 +908,7 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, llvm::GlobalVariable::NotThreadLocal, CGM.getContext().getTargetAddressSpace(addressSpace)); emitter.finalize(GV); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); CGM.setAddrOfConstantCompoundLiteral(E, GV); return ConstantAddress(GV, Align); } @@ -1173,7 +1174,7 @@ public: llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E, QualType T) { - return Visit(E->GetTemporaryExpr(), T); + return Visit(E->getSubExpr(), T); } llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) { @@ -1269,8 +1270,8 @@ public: return nullptr; // FIXME: We should not have to call getBaseElementType here. - const RecordType *RT = - CGM.getContext().getBaseElementType(Ty)->getAs<RecordType>(); + const auto *RT = + CGM.getContext().getBaseElementType(Ty)->castAs<RecordType>(); const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl()); // If the class doesn't have a trivial destructor, we can't emit it as a @@ -1728,7 +1729,7 @@ struct ConstantLValue { /*implicit*/ ConstantLValue(llvm::Constant *value, bool hasOffsetApplied = false) - : Value(value), HasOffsetApplied(false) {} + : Value(value), HasOffsetApplied(hasOffsetApplied) {} /*implicit*/ ConstantLValue(ConstantAddress address) : ConstantLValue(address.getPointer()) {} @@ -2003,8 +2004,8 @@ ConstantLValueEmitter::VisitMaterializeTemporaryExpr( assert(E->getStorageDuration() == SD_Static); SmallVector<const Expr *, 2> CommaLHSs; SmallVector<SubobjectAdjustment, 2> Adjustments; - const Expr *Inner = E->GetTemporaryExpr() - ->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); + const Expr *Inner = + E->getSubExpr()->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); return CGM.GetAddrOfGlobalTemporary(E, Inner); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp index 3d082de2a14f..de5c3a03fb68 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprScalar.cpp @@ -14,11 +14,13 @@ #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" @@ -34,6 +36,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include <cstdarg> @@ -294,8 +297,7 @@ public: Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue); - CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), - AlignmentCI->getZExtValue()); + CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI); } /// EmitLoadOfLValue - Given an expression with complex type that represents a @@ -616,7 +618,7 @@ public: if (isa<MemberPointerType>(E->getType())) // never sugared return CGF.CGM.getMemberPointerConstant(E); - return EmitLValue(E->getSubExpr()).getPointer(); + return EmitLValue(E->getSubExpr()).getPointer(CGF); } Value *VisitUnaryDeref(const UnaryOperator *E) { if (E->getType()->isVoidType()) @@ -645,8 +647,8 @@ public: auto &Ctx = CGF.getContext(); APValue Evaluated = SLE->EvaluateInContext(Ctx, CGF.CurSourceLocExprScope.getDefaultExpr()); - return ConstantEmitter(CGF.CGM, &CGF) - .emitAbstract(SLE->getLocation(), Evaluated, SLE->getType()); + return ConstantEmitter(CGF).emitAbstract(SLE->getLocation(), Evaluated, + SLE->getType()); } Value *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { @@ -674,6 +676,14 @@ public: return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); } + Value *VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E) { + return Builder.getInt1(E->isSatisfied()); + } + + Value *VisitRequiresExpr(const RequiresExpr *E) { + return Builder.getInt1(E->isSatisfied()); + } + Value *VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) { return llvm::ConstantInt::get(Builder.getInt32Ty(), E->getValue()); } @@ -792,17 +802,17 @@ public: // Comparisons. Value *EmitCompare(const BinaryOperator *E, llvm::CmpInst::Predicate UICmpOpc, llvm::CmpInst::Predicate SICmpOpc, - llvm::CmpInst::Predicate FCmpOpc); -#define VISITCOMP(CODE, UI, SI, FP) \ + llvm::CmpInst::Predicate FCmpOpc, bool IsSignaling); +#define VISITCOMP(CODE, UI, SI, FP, SIG) \ Value *VisitBin##CODE(const BinaryOperator *E) { \ return EmitCompare(E, llvm::ICmpInst::UI, llvm::ICmpInst::SI, \ - llvm::FCmpInst::FP); } - VISITCOMP(LT, ICMP_ULT, ICMP_SLT, FCMP_OLT) - VISITCOMP(GT, ICMP_UGT, ICMP_SGT, FCMP_OGT) - VISITCOMP(LE, ICMP_ULE, ICMP_SLE, FCMP_OLE) - VISITCOMP(GE, ICMP_UGE, ICMP_SGE, FCMP_OGE) - VISITCOMP(EQ, ICMP_EQ , ICMP_EQ , FCMP_OEQ) - VISITCOMP(NE, ICMP_NE , ICMP_NE , FCMP_UNE) + llvm::FCmpInst::FP, SIG); } + VISITCOMP(LT, ICMP_ULT, ICMP_SLT, FCMP_OLT, true) + VISITCOMP(GT, ICMP_UGT, ICMP_SGT, FCMP_OGT, true) + VISITCOMP(LE, ICMP_ULE, ICMP_SLE, FCMP_OLE, true) + VISITCOMP(GE, ICMP_UGE, ICMP_SGE, FCMP_OGE, true) + VISITCOMP(EQ, ICMP_EQ , ICMP_EQ , FCMP_OEQ, false) + VISITCOMP(NE, ICMP_NE , ICMP_NE , FCMP_UNE, false) #undef VISITCOMP Value *VisitBinAssign (const BinaryOperator *E); @@ -814,6 +824,10 @@ public: Value *VisitBinPtrMemD(const Expr *E) { return EmitLoadOfLValue(E); } Value *VisitBinPtrMemI(const Expr *E) { return EmitLoadOfLValue(E); } + Value *VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { + return Visit(E->getSemanticForm()); + } + // Other Operators. Value *VisitBlockExpr(const BlockExpr *BE); Value *VisitAbstractConditionalOperator(const AbstractConditionalOperator *); @@ -969,6 +983,11 @@ EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, return std::make_pair(Kind, std::make_pair(Check, Mask)); } +static bool PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck( + QualType SrcType, QualType DstType) { + return SrcType->isIntegerType() && DstType->isIntegerType(); +} + void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc) { @@ -977,7 +996,8 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. - if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) return; unsigned SrcBits = Src->getType()->getScalarSizeInBits(); @@ -1088,7 +1108,8 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. - if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) return; bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); @@ -1657,8 +1678,8 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { if (SrcTy == DstTy) return Src; - QualType SrcEltType = SrcType->getAs<VectorType>()->getElementType(), - DstEltType = DstType->getAs<VectorType>()->getElementType(); + QualType SrcEltType = SrcType->castAs<VectorType>()->getElementType(), + DstEltType = DstType->castAs<VectorType>()->getElementType(); assert(SrcTy->isVectorTy() && "ConvertVector source IR type must be a vector"); @@ -1965,7 +1986,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_LValueBitCast: case CK_ObjCObjectLValueCast: { - Address Addr = EmitLValue(E).getAddress(); + Address Addr = EmitLValue(E).getAddress(CGF); Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy)); LValue LV = CGF.MakeAddrLValue(Addr, DestTy); return EmitLoadOfLValue(LV, CE->getExprLoc()); @@ -1973,7 +1994,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_LValueToRValueBitCast: { LValue SourceLVal = CGF.EmitLValue(E); - Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(CGF), CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); @@ -2091,7 +2112,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_ArrayToPointerDecay: return CGF.EmitArrayToPointerDecay(E).getPointer(); case CK_FunctionToPointerDecay: - return EmitLValue(E).getPointer(); + return EmitLValue(E).getPointer(CGF); case CK_NullToPointer: if (MustVisitNullValue(E)) @@ -2339,10 +2360,29 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( llvm_unreachable("Unknown SignedOverflowBehaviorTy"); } +namespace { +/// Handles check and update for lastprivate conditional variables. +class OMPLastprivateConditionalUpdateRAII { +private: + CodeGenFunction &CGF; + const UnaryOperator *E; + +public: + OMPLastprivateConditionalUpdateRAII(CodeGenFunction &CGF, + const UnaryOperator *E) + : CGF(CGF), E(E) {} + ~OMPLastprivateConditionalUpdateRAII() { + if (CGF.getLangOpts().OpenMP) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional( + CGF, E->getSubExpr()); + } +}; +} // namespace + llvm::Value * ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre) { - + OMPLastprivateConditionalUpdateRAII OMPRegion(CGF, E); QualType type = E->getSubExpr()->getType(); llvm::PHINode *atomicPHI = nullptr; llvm::Value *value; @@ -2356,14 +2396,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (isInc && type->isBooleanType()) { llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type); if (isPre) { - Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified()) - ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent); + Builder.CreateStore(True, LV.getAddress(CGF), LV.isVolatileQualified()) + ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent); return Builder.getTrue(); } // For atomic bool increment, we just store true and return it for // preincrement, do an atomic swap with true for postincrement return Builder.CreateAtomicRMW( - llvm::AtomicRMWInst::Xchg, LV.getPointer(), True, + llvm::AtomicRMWInst::Xchg, LV.getPointer(CGF), True, llvm::AtomicOrdering::SequentiallyConsistent); } // Special case for atomic increment / decrement on integers, emit @@ -2380,8 +2420,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Instruction::Sub; llvm::Value *amt = CGF.EmitToMemory( llvm::ConstantInt::get(ConvertType(type), 1, true), type); - llvm::Value *old = Builder.CreateAtomicRMW(aop, - LV.getPointer(), amt, llvm::AtomicOrdering::SequentiallyConsistent); + llvm::Value *old = + Builder.CreateAtomicRMW(aop, LV.getPointer(CGF), amt, + llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } value = EmitLoadOfLValue(LV, E->getExprLoc()); @@ -2412,9 +2453,51 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // Most common case by far: integer increment. } else if (type->isIntegerType()) { - // Note that signed integer inc/dec with width less than int can't - // overflow because of promotion rules; we're just eliding a few steps here. - if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { + QualType promotedType; + bool canPerformLossyDemotionCheck = false; + if (type->isPromotableIntegerType()) { + promotedType = CGF.getContext().getPromotedIntegerType(type); + assert(promotedType != type && "Shouldn't promote to the same type."); + canPerformLossyDemotionCheck = true; + canPerformLossyDemotionCheck &= + CGF.getContext().getCanonicalType(type) != + CGF.getContext().getCanonicalType(promotedType); + canPerformLossyDemotionCheck &= + PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck( + type, promotedType); + assert((!canPerformLossyDemotionCheck || + type->isSignedIntegerOrEnumerationType() || + promotedType->isSignedIntegerOrEnumerationType() || + ConvertType(type)->getScalarSizeInBits() == + ConvertType(promotedType)->getScalarSizeInBits()) && + "The following check expects that if we do promotion to different " + "underlying canonical type, at least one of the types (either " + "base or promoted) will be signed, or the bitwidths will match."); + } + if (CGF.SanOpts.hasOneOf( + SanitizerKind::ImplicitIntegerArithmeticValueChange) && + canPerformLossyDemotionCheck) { + // While `x += 1` (for `x` with width less than int) is modeled as + // promotion+arithmetics+demotion, and we can catch lossy demotion with + // ease; inc/dec with width less than int can't overflow because of + // promotion rules, so we omit promotion+demotion, which means that we can + // not catch lossy "demotion". Because we still want to catch these cases + // when the sanitizer is enabled, we perform the promotion, then perform + // the increment/decrement in the wider type, and finally + // perform the demotion. This will catch lossy demotions. + + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); + Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); + value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); + // Do pass non-default ScalarConversionOpts so that sanitizer check is + // emitted. + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), + ScalarConversionOpts(CGF.SanOpts)); + + // Note that signed integer inc/dec with width less than int can't + // overflow because of promotion rules; we're just eliding a few steps + // here. + } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { @@ -2577,14 +2660,16 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { TestAndClearIgnoreResultAssign(); + Value *Op = Visit(E->getSubExpr()); + + // Generate a unary FNeg for FP ops. + if (Op->getType()->isFPOrFPVectorTy()) + return Builder.CreateFNeg(Op, "fneg"); + // Emit unary minus with EmitSub so we handle overflow cases etc. BinOpInfo BinOp; - BinOp.RHS = Visit(E->getSubExpr()); - - if (BinOp.RHS->getType()->isFPOrFPVectorTy()) - BinOp.LHS = llvm::ConstantFP::getZeroValueForNegation(BinOp.RHS->getType()); - else - BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); + BinOp.RHS = Op; + BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); BinOp.Ty = E->getType(); BinOp.Opcode = BO_Sub; // FIXME: once UnaryOperator carries FPFeatures, copy it here. @@ -2662,7 +2747,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { case OffsetOfNode::Field: { FieldDecl *MemberDecl = ON.getField(); - RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl(); + RecordDecl *RD = CurrentType->castAs<RecordType>()->getDecl(); const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD); // Compute the index of the field in its parent. @@ -2695,7 +2780,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { continue; } - RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl(); + RecordDecl *RD = CurrentType->castAs<RecordType>()->getDecl(); const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD); // Save the element type. @@ -2840,7 +2925,8 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) && CGF.getLangOpts().getSignedOverflowBehavior() != LangOptions::SOB_Trapping) { - llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP; + llvm::AtomicRMWInst::BinOp AtomicOp = llvm::AtomicRMWInst::BAD_BINOP; + llvm::Instruction::BinaryOps Op; switch (OpInfo.Opcode) { // We don't have atomicrmw operands for *, %, /, <<, >> case BO_MulAssign: case BO_DivAssign: @@ -2849,30 +2935,40 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( case BO_ShrAssign: break; case BO_AddAssign: - aop = llvm::AtomicRMWInst::Add; + AtomicOp = llvm::AtomicRMWInst::Add; + Op = llvm::Instruction::Add; break; case BO_SubAssign: - aop = llvm::AtomicRMWInst::Sub; + AtomicOp = llvm::AtomicRMWInst::Sub; + Op = llvm::Instruction::Sub; break; case BO_AndAssign: - aop = llvm::AtomicRMWInst::And; + AtomicOp = llvm::AtomicRMWInst::And; + Op = llvm::Instruction::And; break; case BO_XorAssign: - aop = llvm::AtomicRMWInst::Xor; + AtomicOp = llvm::AtomicRMWInst::Xor; + Op = llvm::Instruction::Xor; break; case BO_OrAssign: - aop = llvm::AtomicRMWInst::Or; + AtomicOp = llvm::AtomicRMWInst::Or; + Op = llvm::Instruction::Or; break; default: llvm_unreachable("Invalid compound assignment type"); } - if (aop != llvm::AtomicRMWInst::BAD_BINOP) { - llvm::Value *amt = CGF.EmitToMemory( + if (AtomicOp != llvm::AtomicRMWInst::BAD_BINOP) { + llvm::Value *Amt = CGF.EmitToMemory( EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy, E->getExprLoc()), LHSTy); - Builder.CreateAtomicRMW(aop, LHSLV.getPointer(), amt, + Value *OldVal = Builder.CreateAtomicRMW( + AtomicOp, LHSLV.getPointer(CGF), Amt, llvm::AtomicOrdering::SequentiallyConsistent); + + // Since operation is atomic, the result type is guaranteed to be the + // same as the input in LLVM terms. + Result = Builder.CreateBinOp(Op, OldVal, Amt); return LHSLV; } } @@ -2925,6 +3021,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); + if (CGF.getLangOpts().OpenMP) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, + E->getLHS()); return LHSLV; } @@ -3192,10 +3291,10 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, expr->getRHS())) return CGF.Builder.CreateIntToPtr(index, pointer->getType()); - if (width != DL.getTypeSizeInBits(PtrTy)) { + if (width != DL.getIndexTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. - index = CGF.Builder.CreateIntCast(index, DL.getIntPtrType(PtrTy), isSigned, + index = CGF.Builder.CreateIntCast(index, DL.getIndexType(PtrTy), isSigned, "idx.ext"); } @@ -3249,7 +3348,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // GNU void* casts amount to no-ops since our void* type is i8*, but this is // future proof. if (elementType->isVoidType() || elementType->isFunctionType()) { - Value *result = CGF.Builder.CreateBitCast(pointer, CGF.VoidPtrTy); + Value *result = CGF.EmitCastToVoidPtr(pointer); result = CGF.Builder.CreateGEP(result, index, "add.ptr"); return CGF.Builder.CreateBitCast(result, pointer->getType()); } @@ -3273,17 +3372,10 @@ static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend, Value *MulOp0 = MulOp->getOperand(0); Value *MulOp1 = MulOp->getOperand(1); - if (negMul) { - MulOp0 = - Builder.CreateFSub( - llvm::ConstantFP::getZeroValueForNegation(MulOp0->getType()), MulOp0, - "neg"); - } else if (negAdd) { - Addend = - Builder.CreateFSub( - llvm::ConstantFP::getZeroValueForNegation(Addend->getType()), Addend, - "neg"); - } + if (negMul) + MulOp0 = Builder.CreateFNeg(MulOp0, "neg"); + if (negAdd) + Addend = Builder.CreateFNeg(Addend, "neg"); Value *FMulAdd = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()), @@ -3716,7 +3808,8 @@ static llvm::Intrinsic::ID GetIntrinsic(IntrinsicType IT, Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, llvm::CmpInst::Predicate UICmpOpc, llvm::CmpInst::Predicate SICmpOpc, - llvm::CmpInst::Predicate FCmpOpc) { + llvm::CmpInst::Predicate FCmpOpc, + bool IsSignaling) { TestAndClearIgnoreResultAssign(); Value *Result; QualType LHSTy = E->getLHS()->getType(); @@ -3745,9 +3838,8 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Value *FirstVecArg = LHS, *SecondVecArg = RHS; - QualType ElTy = LHSTy->getAs<VectorType>()->getElementType(); - const BuiltinType *BTy = ElTy->getAs<BuiltinType>(); - BuiltinType::Kind ElementKind = BTy->getKind(); + QualType ElTy = LHSTy->castAs<VectorType>()->getElementType(); + BuiltinType::Kind ElementKind = ElTy->castAs<BuiltinType>()->getKind(); switch(E->getOpcode()) { default: llvm_unreachable("is not a comparison operation"); @@ -3812,7 +3904,10 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, if (BOInfo.isFixedPointBinOp()) { Result = EmitFixedPointBinOp(BOInfo); } else if (LHS->getType()->isFPOrFPVectorTy()) { - Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp"); + if (!IsSignaling) + Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp"); + else + Result = Builder.CreateFCmpS(FCmpOpc, LHS, RHS, "cmp"); } else if (LHSTy->hasSignedIntegerRepresentation()) { Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp"); } else { @@ -3869,6 +3964,8 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Value *ResultR, *ResultI; if (CETy->isRealFloatingType()) { + // As complex comparisons can only be equality comparisons, they + // are never signaling comparisons. ResultR = Builder.CreateFCmp(FCmpOpc, LHS.first, RHS.first, "cmp.r"); ResultI = Builder.CreateFCmp(FCmpOpc, LHS.second, RHS.second, "cmp.i"); } else { @@ -3913,7 +4010,7 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_Weak: RHS = Visit(E->getRHS()); LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); - RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore); + RHS = CGF.EmitARCStoreWeak(LHS.getAddress(CGF), RHS, Ignore); break; case Qualifiers::OCL_None: @@ -4218,6 +4315,21 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { return tmp5; } + if (condExpr->getType()->isVectorType()) { + CGF.incrementProfileCounter(E); + + llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); + llvm::Value *LHS = Visit(lhsExpr); + llvm::Value *RHS = Visit(rhsExpr); + + llvm::Type *CondType = ConvertType(condExpr->getType()); + auto *VecTy = cast<llvm::VectorType>(CondType); + llvm::Value *ZeroVec = llvm::Constant::getNullValue(VecTy); + + CondV = Builder.CreateICmpNE(CondV, ZeroVec, "vector_cond"); + return Builder.CreateSelect(CondV, LHS, RHS, "vector_select"); + } + // If this is a really simple expression (like x ? 4 : 5), emit this as a // select instead of as control flow. We can only do this if it is cheap and // safe to evaluate the LHS and RHS unconditionally. @@ -4414,8 +4526,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { return Src; } - return Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), - Src, DstTy, "astype"); + return createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), + Src, DstTy, "astype"); } Value *ScalarExprEmitter::VisitAtomicExpr(AtomicExpr *E) { @@ -4474,7 +4586,7 @@ LValue CodeGenFunction::EmitObjCIsaExpr(const ObjCIsaExpr *E) { if (BaseExpr->isRValue()) { Addr = Address(EmitScalarExpr(BaseExpr), getPointerAlign()); } else { - Addr = EmitLValue(BaseExpr).getAddress(); + Addr = EmitLValue(BaseExpr).getAddress(*this); } // Cast the address to Class*. @@ -4533,32 +4645,43 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( llvm_unreachable("Unhandled compound assignment operator"); } -Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, - ArrayRef<Value *> IdxList, - bool SignedIndices, - bool IsSubtraction, - SourceLocation Loc, - const Twine &Name) { - Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); +struct GEPOffsetAndOverflow { + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset; + // The offset overflow flag - true if the total offset overflows. + llvm::Value *OffsetOverflows; +}; - // If the pointer overflow sanitizer isn't enabled, do nothing. - if (!SanOpts.has(SanitizerKind::PointerOverflow)) - return GEPVal; +/// Evaluate given GEPVal, which is either an inbounds GEP, or a constant, +/// and compute the total offset it applies from it's base pointer BasePtr. +/// Returns offset in bytes and a boolean flag whether an overflow happened +/// during evaluation. +static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, + llvm::LLVMContext &VMContext, + CodeGenModule &CGM, + CGBuilderTy Builder) { + const auto &DL = CGM.getDataLayout(); - // If the GEP has already been reduced to a constant, leave it be. - if (isa<llvm::Constant>(GEPVal)) - return GEPVal; + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset = nullptr; - // Only check for overflows in the default address space. - if (GEPVal->getType()->getPointerAddressSpace()) - return GEPVal; + // Was the GEP already reduced to a constant? + if (isa<llvm::Constant>(GEPVal)) { + // Compute the offset by casting both pointers to integers and subtracting: + // GEPVal = BasePtr + ptr(Offset) <--> Offset = int(GEPVal) - int(BasePtr) + Value *BasePtr_int = + Builder.CreatePtrToInt(BasePtr, DL.getIntPtrType(BasePtr->getType())); + Value *GEPVal_int = + Builder.CreatePtrToInt(GEPVal, DL.getIntPtrType(GEPVal->getType())); + TotalOffset = Builder.CreateSub(GEPVal_int, BasePtr_int); + return {TotalOffset, /*OffsetOverflows=*/Builder.getFalse()}; + } auto *GEP = cast<llvm::GEPOperator>(GEPVal); + assert(GEP->getPointerOperand() == BasePtr && + "BasePtr must be the the base of the GEP."); assert(GEP->isInBounds() && "Expected inbounds GEP"); - SanitizerScope SanScope(this); - auto &VMContext = getLLVMContext(); - const auto &DL = CGM.getDataLayout(); auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); // Grab references to the signed add/mul overflow intrinsics for intptr_t. @@ -4568,8 +4691,6 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, auto *SMulIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy); - // The total (signed) byte offset for the GEP. - llvm::Value *TotalOffset = nullptr; // The offset overflow flag - true if the total offset overflows. llvm::Value *OffsetOverflows = Builder.getFalse(); @@ -4627,41 +4748,122 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, TotalOffset = eval(BO_Add, TotalOffset, LocalOffset); } - // Common case: if the total offset is zero, don't emit a check. - if (TotalOffset == Zero) + return {TotalOffset, OffsetOverflows}; +} + +Value * +CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, + bool SignedIndices, bool IsSubtraction, + SourceLocation Loc, const Twine &Name) { + Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); + + // If the pointer overflow sanitizer isn't enabled, do nothing. + if (!SanOpts.has(SanitizerKind::PointerOverflow)) + return GEPVal; + + llvm::Type *PtrTy = Ptr->getType(); + + // Perform nullptr-and-offset check unless the nullptr is defined. + bool PerformNullCheck = !NullPointerIsDefined( + Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace()); + // Check for overflows unless the GEP got constant-folded, + // and only in the default address space + bool PerformOverflowCheck = + !isa<llvm::Constant>(GEPVal) && PtrTy->getPointerAddressSpace() == 0; + + if (!(PerformNullCheck || PerformOverflowCheck)) + return GEPVal; + + const auto &DL = CGM.getDataLayout(); + + SanitizerScope SanScope(this); + llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy); + + GEPOffsetAndOverflow EvaluatedGEP = + EmitGEPOffsetInBytes(Ptr, GEPVal, getLLVMContext(), CGM, Builder); + + assert((!isa<llvm::Constant>(EvaluatedGEP.TotalOffset) || + EvaluatedGEP.OffsetOverflows == Builder.getFalse()) && + "If the offset got constant-folded, we don't expect that there was an " + "overflow."); + + auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); + + // Common case: if the total offset is zero, and we are using C++ semantics, + // where nullptr+0 is defined, don't emit a check. + if (EvaluatedGEP.TotalOffset == Zero && CGM.getLangOpts().CPlusPlus) return GEPVal; // Now that we've computed the total offset, add it to the base pointer (with // wrapping semantics). - auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy); - auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset); - - // The GEP is valid if: - // 1) The total offset doesn't overflow, and - // 2) The sign of the difference between the computed address and the base - // pointer matches the sign of the total offset. - llvm::Value *ValidGEP; - auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); - if (SignedIndices) { - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); - llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateAnd( - Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), - NoOffsetOverflow); - } else if (!SignedIndices && !IsSubtraction) { - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); - } else { - auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); + auto *IntPtr = Builder.CreatePtrToInt(Ptr, IntPtrTy); + auto *ComputedGEP = Builder.CreateAdd(IntPtr, EvaluatedGEP.TotalOffset); + + llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks; + + if (PerformNullCheck) { + // In C++, if the base pointer evaluates to a null pointer value, + // the only valid pointer this inbounds GEP can produce is also + // a null pointer, so the offset must also evaluate to zero. + // Likewise, if we have non-zero base pointer, we can not get null pointer + // as a result, so the offset can not be -intptr_t(BasePtr). + // In other words, both pointers are either null, or both are non-null, + // or the behaviour is undefined. + // + // C, however, is more strict in this regard, and gives more + // optimization opportunities: in C, additionally, nullptr+0 is undefined. + // So both the input to the 'gep inbounds' AND the output must not be null. + auto *BaseIsNotNullptr = Builder.CreateIsNotNull(Ptr); + auto *ResultIsNotNullptr = Builder.CreateIsNotNull(ComputedGEP); + auto *Valid = + CGM.getLangOpts().CPlusPlus + ? Builder.CreateICmpEQ(BaseIsNotNullptr, ResultIsNotNullptr) + : Builder.CreateAnd(BaseIsNotNullptr, ResultIsNotNullptr); + Checks.emplace_back(Valid, SanitizerKind::PointerOverflow); + } + + if (PerformOverflowCheck) { + // The GEP is valid if: + // 1) The total offset doesn't overflow, and + // 2) The sign of the difference between the computed address and the base + // pointer matches the sign of the total offset. + llvm::Value *ValidGEP; + auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows); + if (SignedIndices) { + // GEP is computed as `unsigned base + signed offset`, therefore: + // * If offset was positive, then the computed pointer can not be + // [unsigned] less than the base pointer, unless it overflowed. + // * If offset was negative, then the computed pointer can not be + // [unsigned] greater than the bas pointere, unless it overflowed. + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + auto *PosOrZeroOffset = + Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + ValidGEP = + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid); + } else if (!IsSubtraction) { + // GEP is computed as `unsigned base + unsigned offset`, therefore the + // computed pointer can not be [unsigned] less than base pointer, + // unless there was an overflow. + // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`. + ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + } else { + // GEP is computed as `unsigned base - unsigned offset`, therefore the + // computed pointer can not be [unsigned] greater than base pointer, + // unless there was an overflow. + // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`. + ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr); + } + ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow); + Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow); } + assert(!Checks.empty() && "Should have produced some checks."); + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; // Pass the computed GEP to the runtime to avoid emitting poisoned arguments. llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; - EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow), - SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); + EmitCheck(Checks, SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); return GEPVal; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.cpp index b2bc42bfa013..e4b184eb8798 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.cpp @@ -218,6 +218,7 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, if (Attrs.VectorizeEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified || Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) Enabled = true; @@ -251,6 +252,22 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, Args.push_back(TempNode.get()); Args.append(LoopProperties.begin(), LoopProperties.end()); + // Setting vectorize.predicate + bool IsVectorPredicateEnabled = false; + if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified && + Attrs.VectorizeEnable != LoopAttributes::Disable && + Attrs.VectorizeWidth < 1) { + + IsVectorPredicateEnabled = + (Attrs.VectorizePredicateEnable == LoopAttributes::Enable); + + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.vectorize.predicate.enable"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt1Ty(Ctx), + IsVectorPredicateEnabled))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { Metadata *Vals[] = { @@ -269,14 +286,18 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting vectorize.enable - if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.vectorize.enable"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt1Ty(Ctx), - (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; - Args.push_back(MDNode::get(Ctx, Vals)); + // vectorize.enable is set if: + // 1) loop hint vectorize.enable is set, or + // 2) it is implied when vectorize.predicate is set, or + // 3) it is implied when vectorize.width is set. + if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + IsVectorPredicateEnabled || + Attrs.VectorizeWidth > 1 ) { + bool AttrVal = Attrs.VectorizeEnable != LoopAttributes::Disable; + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), AttrVal))})); } if (FollowupHasTransforms) @@ -411,7 +432,8 @@ MDNode *LoopInfo::createMetadata( LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), - UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0), + UnrollAndJamEnable(LoopAttributes::Unspecified), + VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} @@ -425,6 +447,7 @@ void LoopAttributes::clear() { VectorizeEnable = LoopAttributes::Unspecified; UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; + VectorizePredicateEnable = LoopAttributes::Unspecified; DistributeEnable = LoopAttributes::Unspecified; PipelineDisabled = false; PipelineInitiationInterval = 0; @@ -446,6 +469,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && + Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && @@ -480,6 +504,7 @@ void LoopInfo::finish() { BeforeJam.InterleaveCount = Attrs.InterleaveCount; BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; BeforeJam.DistributeEnable = Attrs.DistributeEnable; + BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; switch (Attrs.UnrollEnable) { case LoopAttributes::Unspecified: @@ -495,6 +520,7 @@ void LoopInfo::finish() { break; } + AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; AfterJam.UnrollCount = Attrs.UnrollCount; AfterJam.PipelineDisabled = Attrs.PipelineDisabled; AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; @@ -516,6 +542,7 @@ void LoopInfo::finish() { // add it manually. SmallVector<Metadata *, 1> BeforeLoopProperties; if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || + BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified || BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) BeforeLoopProperties.push_back( MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); @@ -537,8 +564,9 @@ void LoopInfo::finish() { void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) { - Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, - Active.empty() ? nullptr : &Active.back())); + Active.emplace_back( + new LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, + Active.empty() ? nullptr : Active.back().get())); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } @@ -603,6 +631,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollAndJam: setUnrollAndJamState(LoopAttributes::Disable); break; + case LoopHintAttr::VectorizePredicate: + setVectorizePredicateState(LoopAttributes::Disable); + break; case LoopHintAttr::Distribute: setDistributeState(false); break; @@ -630,6 +661,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollAndJam: setUnrollAndJamState(LoopAttributes::Enable); break; + case LoopHintAttr::VectorizePredicate: + setVectorizePredicateState(LoopAttributes::Enable); + break; case LoopHintAttr::Distribute: setDistributeState(true); break; @@ -653,6 +687,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: + case LoopHintAttr::VectorizePredicate: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -681,6 +716,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::VectorizePredicate: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -704,6 +740,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: + case LoopHintAttr::VectorizePredicate: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: @@ -721,16 +758,16 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, void LoopInfoStack::pop() { assert(!Active.empty() && "No active loops to pop"); - Active.back().finish(); + Active.back()->finish(); Active.pop_back(); } void LoopInfoStack::InsertHelper(Instruction *I) const { if (I->mayReadOrWriteMemory()) { SmallVector<Metadata *, 4> AccessGroups; - for (const LoopInfo &AL : Active) { + for (const auto &AL : Active) { // Here we assume that every loop that has an access group is parallel. - if (MDNode *Group = AL.getAccessGroup()) + if (MDNode *Group = AL->getAccessGroup()) AccessGroups.push_back(Group); } MDNode *UnionMD = nullptr; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.h b/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.h index 35d0e00527b9..5abcf37c5433 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGLoopInfo.h @@ -51,6 +51,9 @@ struct LoopAttributes { /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full). LVEnableState UnrollAndJamEnable; + /// Value for llvm.loop.vectorize.predicate metadata + LVEnableState VectorizePredicateEnable; + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; @@ -237,6 +240,11 @@ public: StagedAttrs.UnrollEnable = State; } + /// Set the next pushed vectorize predicate state. + void setVectorizePredicateState(const LoopAttributes::LVEnableState &State) { + StagedAttrs.VectorizePredicateEnable = State; + } + /// Set the next pushed loop unroll_and_jam state. void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) { StagedAttrs.UnrollAndJamEnable = State; @@ -267,11 +275,11 @@ private: bool hasInfo() const { return !Active.empty(); } /// Return the LoopInfo for the current loop. HasInfo should be called /// first to ensure LoopInfo is present. - const LoopInfo &getInfo() const { return Active.back(); } + const LoopInfo &getInfo() const { return *Active.back(); } /// The set of attributes that will be applied to the next pushed loop. LoopAttributes StagedAttrs; /// Stack of active loops. - llvm::SmallVector<LoopInfo, 4> Active; + llvm::SmallVector<std::unique_ptr<LoopInfo>, 4> Active; }; } // end namespace CodeGen diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGNonTrivialStruct.cpp index caf62d2ac93a..d5f378c52232 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -707,7 +707,7 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); - CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV); + CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress(*CGF)), SrcLV); CGF->EmitStoreOfScalar(SrcVal, CGF->MakeAddrLValue(Addrs[DstIdx], QT), /* isInitialization */ true); } @@ -770,7 +770,7 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); - CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV); + CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress(*CGF)), SrcLV); LValue DstLV = CGF->MakeAddrLValue(Addrs[DstIdx], QT); llvm::Value *DstVal = CGF->EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); @@ -806,7 +806,8 @@ void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF, // such structure. void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) { GenDefaultInitialize Gen(getContext()); - Address DstPtr = Builder.CreateBitCast(Dst.getAddress(), CGM.Int8PtrPtrTy); + Address DstPtr = + Builder.CreateBitCast(Dst.getAddress(*this), CGM.Int8PtrPtrTy); Gen.setCGF(this); QualType QT = Dst.getType(); QT = Dst.isVolatile() ? QT.withVolatile() : QT; @@ -817,13 +818,14 @@ template <class G, size_t N> static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, CodeGenFunction &CGF, std::array<Address, N> Addrs) { + auto SetArtificialLoc = ApplyDebugLocation::CreateArtificial(CGF); for (unsigned I = 0; I < N; ++I) Addrs[I] = CGF.Builder.CreateBitCast(Addrs[I], CGF.CGM.Int8PtrPtrTy); QT = IsVolatile ? QT.withVolatile() : QT; Gen.callFunc(FuncName, QT, Addrs, CGF); } -template <size_t N> std::array<Address, N> createNullAddressArray(); +template <size_t N> static std::array<Address, N> createNullAddressArray(); template <> std::array<Address, 1> createNullAddressArray() { return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}}); @@ -849,7 +851,7 @@ getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, // Functions to emit calls to the special functions of a non-trivial C struct. void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); - Address DstPtr = Dst.getAddress(); + Address DstPtr = Dst.getAddress(*this); QualType QT = Dst.getType(); GenDefaultInitializeFuncName GenName(DstPtr.getAlignment(), getContext()); std::string FuncName = GenName.getName(QT, IsVolatile); @@ -873,7 +875,7 @@ std::string CodeGenFunction::getNonTrivialDestructorStr(QualType QT, void CodeGenFunction::callCStructDestructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); - Address DstPtr = Dst.getAddress(); + Address DstPtr = Dst.getAddress(*this); QualType QT = Dst.getType(); GenDestructorFuncName GenName("__destructor_", DstPtr.getAlignment(), getContext()); @@ -884,7 +886,7 @@ void CodeGenFunction::callCStructDestructor(LValue Dst) { void CodeGenFunction::callCStructCopyConstructor(LValue Dst, LValue Src) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName<false> GenName("__copy_constructor_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); @@ -898,7 +900,7 @@ void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src ) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName<false> GenName("__copy_assignment_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); @@ -909,7 +911,7 @@ void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src void CodeGenFunction::callCStructMoveConstructor(LValue Dst, LValue Src) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName<true> GenName("__move_constructor_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); @@ -923,7 +925,7 @@ void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src ) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName<true> GenName("__move_assignment_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp index 1dd7ec52230e..90fca2836d99 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjC.cpp @@ -17,6 +17,7 @@ #include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/StmtObjC.h" #include "clang/Basic/Diagnostic.h" @@ -143,7 +144,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, NumElements); QualType ElementType = Context.getObjCIdType().withConst(); QualType ElementArrayType - = Context.getConstantArrayType(ElementType, APNumElements, + = Context.getConstantArrayType(ElementType, APNumElements, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // Allocate the temporary array(s). @@ -430,6 +431,20 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, return None; } +CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend( + CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, + Selector Sel, llvm::Value *Receiver, const CallArgList &Args, + const ObjCInterfaceDecl *OID, const ObjCMethodDecl *Method, + bool isClassMessage) { + if (Optional<llvm::Value *> SpecializedResult = + tryGenerateSpecializedMessageSend(CGF, ResultType, Receiver, Args, + Sel, Method, isClassMessage)) { + return RValue::get(SpecializedResult.getValue()); + } + return GenerateMessageSend(CGF, Return, ResultType, Sel, Receiver, Args, OID, + Method); +} + /// Instead of '[[MyClass alloc] init]', try to generate /// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the /// caller side, as well as the optimized objc_alloc. @@ -446,38 +461,39 @@ tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { Sel.getNameForSlot(0) != "init") return None; - // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' or - // we are in an ObjC class method and 'receiver' is '[self alloc]'. + // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' + // with 'cls' a Class. auto *SubOME = dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParenCasts()); if (!SubOME) return None; Selector SubSel = SubOME->getSelector(); - // Check if we are in an ObjC class method and the receiver expression is - // 'self'. - const Expr *SelfInClassMethod = nullptr; - if (const auto *CurMD = dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl)) - if (CurMD->isClassMethod()) - if ((SelfInClassMethod = SubOME->getInstanceReceiver())) - if (!SelfInClassMethod->isObjCSelfExpr()) - SelfInClassMethod = nullptr; - - if ((SubOME->getReceiverKind() != ObjCMessageExpr::Class && - !SelfInClassMethod) || !SubOME->getType()->isObjCObjectPointerType() || + if (!SubOME->getType()->isObjCObjectPointerType() || !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc") return None; - llvm::Value *Receiver; - if (SelfInClassMethod) { - Receiver = CGF.EmitScalarExpr(SelfInClassMethod); - } else { + llvm::Value *Receiver = nullptr; + switch (SubOME->getReceiverKind()) { + case ObjCMessageExpr::Instance: + if (!SubOME->getInstanceReceiver()->getType()->isObjCClassType()) + return None; + Receiver = CGF.EmitScalarExpr(SubOME->getInstanceReceiver()); + break; + + case ObjCMessageExpr::Class: { QualType ReceiverType = SubOME->getClassReceiver(); - const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>(); + const ObjCObjectType *ObjTy = ReceiverType->castAs<ObjCObjectType>(); const ObjCInterfaceDecl *ID = ObjTy->getInterface(); assert(ID && "null interface should be impossible here"); Receiver = CGF.CGM.getObjCRuntime().GetClass(CGF, ID); + break; + } + case ObjCMessageExpr::SuperInstance: + case ObjCMessageExpr::SuperClass: + return None; } + return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType())); } @@ -497,7 +513,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, method->getMethodFamily() == OMF_retain) { if (auto lvalueExpr = findWeakLValue(E->getInstanceReceiver())) { LValue lvalue = EmitLValue(lvalueExpr); - llvm::Value *result = EmitARCLoadWeakRetained(lvalue.getAddress()); + llvm::Value *result = EmitARCLoadWeakRetained(lvalue.getAddress(*this)); return AdjustObjCObjectType(*this, E->getType(), RValue::get(result)); } } @@ -525,10 +541,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, switch (E->getReceiverKind()) { case ObjCMessageExpr::Instance: ReceiverType = E->getInstanceReceiver()->getType(); - if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(CurFuncDecl)) - if (OMD->isClassMethod()) - if (E->getInstanceReceiver()->isObjCSelfExpr()) - isClassMessage = true; + isClassMessage = ReceiverType->isObjCClassType(); if (retainSelf) { TryEmitResult ter = tryEmitARCRetainScalarExpr(*this, E->getInstanceReceiver()); @@ -540,9 +553,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, case ObjCMessageExpr::Class: { ReceiverType = E->getClassReceiver(); - const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>(); - assert(ObjTy && "Invalid Objective-C class message send"); - OID = ObjTy->getInterface(); + OID = ReceiverType->castAs<ObjCObjectType>()->getInterface(); assert(OID && "Invalid Objective-C class message send"); Receiver = Runtime.GetClass(*this, OID); isClassMessage = true; @@ -611,16 +622,9 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, method); } else { // Call runtime methods directly if we can. - if (Optional<llvm::Value *> SpecializedResult = - tryGenerateSpecializedMessageSend(*this, ResultType, Receiver, Args, - E->getSelector(), method, - isClassMessage)) { - result = RValue::get(SpecializedResult.getValue()); - } else { - result = Runtime.GenerateMessageSend(*this, Return, ResultType, - E->getSelector(), Receiver, Args, - OID, method); - } + result = Runtime.GeneratePossiblySpecializedMessageSend( + *this, Return, ResultType, E->getSelector(), Receiver, Args, OID, + method, isClassMessage); } // For delegate init calls in ARC, implicitly store the result of @@ -683,7 +687,13 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, llvm::Function *Fn = CGM.getObjCRuntime().GenerateMethod(OMD, CD); const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD); - CGM.SetInternalFunctionAttributes(OMD, Fn, FI); + if (OMD->isDirectMethod()) { + Fn->setVisibility(llvm::Function::HiddenVisibility); + CGM.SetLLVMFunctionAttributes(OMD, FI, Fn); + CGM.SetLLVMFunctionAttributesForDefinition(OMD, Fn); + } else { + CGM.SetInternalFunctionAttributes(OMD, Fn, FI); + } args.push_back(OMD->getSelfDecl()); args.push_back(OMD->getCmdDecl()); @@ -696,6 +706,14 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, StartFunction(OMD, OMD->getReturnType(), Fn, FI, args, OMD->getLocation(), StartLoc); + if (OMD->isDirectMethod()) { + // This function is a direct call, it has to implement a nil check + // on entry. + // + // TODO: possibly have several entry points to elide the check + CGM.getObjCRuntime().GenerateDirectMethodPrologue(*this, Fn, OMD, CD); + } + // In ARC, certain methods get an extra cleanup. if (CGM.getLangOpts().ObjCAutoRefCount && OMD->isInstanceMethod() && @@ -728,8 +746,8 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, ASTContext &Context = CGF.getContext(); Address src = - CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) - .getAddress(); + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getAddress(CGF); // objc_copyStruct (ReturnValue, &structIvar, // sizeof (Type of Ivar), isAtomic, false); @@ -954,14 +972,13 @@ void CodeGenFunction::GenerateObjCGetter(ObjCImplementationDecl *IMP, const ObjCPropertyImplDecl *PID) { llvm::Constant *AtomicHelperFn = CodeGenFunction(CGM).GenerateObjCAtomicGetterCopyHelperFunction(PID); - const ObjCPropertyDecl *PD = PID->getPropertyDecl(); - ObjCMethodDecl *OMD = PD->getGetterMethodDecl(); + ObjCMethodDecl *OMD = PID->getGetterMethodDecl(); assert(OMD && "Invalid call to generate getter (empty method)"); StartObjCMethod(OMD, IMP->getClassInterface()); generateObjCGetterBody(IMP, PID, OMD, AtomicHelperFn); - FinishFunction(); + FinishFunction(OMD->getEndLoc()); } static bool hasTrivialGetExpr(const ObjCPropertyImplDecl *propImpl) { @@ -1002,8 +1019,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, // The 2nd argument is the address of the ivar. llvm::Value *ivarAddr = - CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), - CGF.LoadObjCSelf(), ivar, 0).getPointer(); + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getPointer(CGF); ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); @@ -1041,7 +1058,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyDecl *prop = propImpl->getPropertyDecl(); QualType propType = prop->getType(); - ObjCMethodDecl *getterMethod = prop->getGetterMethodDecl(); + ObjCMethodDecl *getterMethod = propImpl->getGetterMethodDecl(); ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); @@ -1062,7 +1079,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay // Perform an atomic load. This does not impose ordering constraints. - Address ivarAddr = LV.getAddress(); + Address ivarAddr = LV.getAddress(*this); ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType); llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load"); load->setAtomic(llvm::AtomicOrdering::Unordered); @@ -1163,14 +1180,14 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, case TEK_Scalar: { llvm::Value *value; if (propType->isReferenceType()) { - value = LV.getAddress().getPointer(); + value = LV.getAddress(*this).getPointer(); } else { // We want to load and autoreleaseReturnValue ARC __weak ivars. if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) { if (getLangOpts().ObjCAutoRefCount) { value = emitARCRetainLoadOfScalar(*this, LV, ivarType); } else { - value = EmitARCLoadWeak(LV.getAddress()); + value = EmitARCLoadWeak(LV.getAddress(*this)); } // Otherwise we want to do a simple load, suppressing the @@ -1204,9 +1221,9 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, CallArgList args; // The first argument is the address of the ivar. - llvm::Value *ivarAddr = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), - CGF.LoadObjCSelf(), ivar, 0) - .getPointer(); + llvm::Value *ivarAddr = + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getPointer(CGF); ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); @@ -1215,7 +1232,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, DeclRefExpr argRef(CGF.getContext(), argVar, false, argVar->getType().getNonReferenceType(), VK_LValue, SourceLocation()); - llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(); + llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF); argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); @@ -1251,8 +1268,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, // The first argument is the address of the ivar. llvm::Value *ivarAddr = - CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), - CGF.LoadObjCSelf(), ivar, 0).getPointer(); + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getPointer(CGF); ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); @@ -1261,7 +1278,7 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, DeclRefExpr argRef(CGF.getContext(), argVar, false, argVar->getType().getNonReferenceType(), VK_LValue, SourceLocation()); - llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(); + llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF); argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); @@ -1311,9 +1328,8 @@ void CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyImplDecl *propImpl, llvm::Constant *AtomicHelperFn) { - const ObjCPropertyDecl *prop = propImpl->getPropertyDecl(); ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); - ObjCMethodDecl *setterMethod = prop->getSetterMethodDecl(); + ObjCMethodDecl *setterMethod = propImpl->getSetterMethodDecl(); // Just use the setter expression if Sema gave us one and it's // non-trivial. @@ -1339,7 +1355,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, LValue ivarLValue = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, /*quals*/ 0); - Address ivarAddr = ivarLValue.getAddress(); + Address ivarAddr = ivarLValue.getAddress(*this); // Currently, all atomic accesses have to be through integer // types, so there's no point in trying to pick a prettier type. @@ -1490,14 +1506,13 @@ void CodeGenFunction::GenerateObjCSetter(ObjCImplementationDecl *IMP, const ObjCPropertyImplDecl *PID) { llvm::Constant *AtomicHelperFn = CodeGenFunction(CGM).GenerateObjCAtomicSetterCopyHelperFunction(PID); - const ObjCPropertyDecl *PD = PID->getPropertyDecl(); - ObjCMethodDecl *OMD = PD->getSetterMethodDecl(); + ObjCMethodDecl *OMD = PID->getSetterMethodDecl(); assert(OMD && "Invalid call to generate setter (empty method)"); StartObjCMethod(OMD, IMP->getClassInterface()); generateObjCSetterBody(IMP, PID, AtomicHelperFn); - FinishFunction(); + FinishFunction(OMD->getEndLoc()); } namespace { @@ -1517,7 +1532,7 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { LValue lvalue = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), addr, ivar, /*CVR*/ 0); - CGF.emitDestroy(lvalue.getAddress(), ivar->getType(), destroyer, + CGF.emitDestroy(lvalue.getAddress(CGF), ivar->getType(), destroyer, flags.isForNormalCleanup() && useEHCleanupForArray); } }; @@ -1584,7 +1599,7 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, LValue LV = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), Ivar, 0); EmitAggExpr(IvarInit->getInit(), - AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, + AggValueSlot::forLValue(LV, *this, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap)); @@ -1661,7 +1676,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ QualType ItemsTy = getContext().getConstantArrayType(getContext().getObjCIdType(), - llvm::APInt(32, NumItems), + llvm::APInt(32, NumItems), nullptr, ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); @@ -2309,7 +2324,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst, !isBlock && (dst.getAlignment().isZero() || dst.getAlignment() >= CharUnits::fromQuantity(PointerAlignInBytes))) { - return EmitARCStoreStrongCall(dst.getAddress(), newValue, ignored); + return EmitARCStoreStrongCall(dst.getAddress(*this), newValue, ignored); } // Otherwise, split it out. @@ -2708,7 +2723,7 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal(); } else { assert(type.getObjCLifetime() == Qualifiers::OCL_Weak); - result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress()); + result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress(CGF)); } return TryEmitResult(result, !shouldRetain); } @@ -2732,7 +2747,7 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, SourceLocation()).getScalarVal(); // Set the source pointer to NULL. - CGF.EmitStoreOfScalar(getNullForVariable(lv.getAddress()), lv); + CGF.EmitStoreOfScalar(getNullForVariable(lv.getAddress(CGF)), lv); return TryEmitResult(result, true); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp index ee5c12aa35bd..a27b6d4ed637 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCGNU.cpp @@ -13,19 +13,20 @@ // //===----------------------------------------------------------------------===// -#include "CGObjCRuntime.h" +#include "CGCXXABI.h" #include "CGCleanup.h" +#include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "CGCXXABI.h" -#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/DataLayout.h" @@ -606,6 +607,9 @@ public: llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) override; + void GenerateDirectMethodPrologue(CodeGenFunction &CGF, llvm::Function *Fn, + const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD) override; void GenerateCategory(const ObjCCategoryImplDecl *CMD) override; void GenerateClass(const ObjCImplementationDecl *ClassDecl) override; void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override; @@ -1232,6 +1236,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // The first Interface we find may be a @class, // which should only be treated as the source of // truth in the absence of a true declaration. + assert(OID && "Failed to find ObjCInterfaceDecl"); const ObjCInterfaceDecl *OIDDef = OID->getDefinition(); if (OIDDef != nullptr) OID = OIDDef; @@ -1294,7 +1299,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Emit a placeholder symbol. GV = new llvm::GlobalVariable(TheModule, ProtocolTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, Name); - GV->setAlignment(CGM.getPointerAlign().getQuantity()); + GV->setAlignment(CGM.getPointerAlign().getAsAlign()); } return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy); } @@ -1318,7 +1323,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName); GV->setComdat(TheModule.getOrInsertComdat(RefName)); GV->setSection(sectionName<ProtocolReferenceSection>()); - GV->setAlignment(CGM.getPointerAlign().getQuantity()); + GV->setAlignment(CGM.getPointerAlign().getAsAlign()); Ref = GV; } EmittedProtocolRef = true; @@ -1497,7 +1502,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Sym->setSection((Section + SecSuffix).str()); Sym->setComdat(TheModule.getOrInsertComdat((Prefix + Section).str())); - Sym->setAlignment(CGM.getPointerAlign().getQuantity()); + Sym->setAlignment(CGM.getPointerAlign().getAsAlign()); return Sym; }; return { Sym("__start_", "$a"), Sym("__stop", "$z") }; @@ -1854,7 +1859,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ivarBuilder.addInt(Int32Ty, CGM.getContext().getTypeSizeInChars(ivarTy).getQuantity()); // Alignment will be stored as a base-2 log of the alignment. - int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); + unsigned align = + llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); // Objects that require more than 2^64-byte alignment should be impossible! assert(align < 64); // uint32_t flags; @@ -1879,13 +1885,12 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { for (auto *propImpl : OID->property_impls()) if (propImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { - ObjCPropertyDecl *prop = propImpl->getPropertyDecl(); - auto addIfExists = [&](const ObjCMethodDecl* OMD) { - if (OMD) + auto addIfExists = [&](const ObjCMethodDecl *OMD) { + if (OMD && OMD->hasBody()) InstanceMethods.push_back(OMD); }; - addIfExists(prop->getGetterMethodDecl()); - addIfExists(prop->getSetterMethodDecl()); + addIfExists(propImpl->getGetterMethodDecl()); + addIfExists(propImpl->getSetterMethodDecl()); } if (InstanceMethods.size() == 0) @@ -3032,6 +3037,7 @@ llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()]; if (!protocol) GenerateProtocol(PD); + assert(protocol && "Unknown protocol"); llvm::Type *T = CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); @@ -3493,13 +3499,12 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { for (auto *propertyImpl : OID->property_impls()) if (propertyImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { - ObjCPropertyDecl *property = propertyImpl->getPropertyDecl(); auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { if (accessor) InstanceMethods.push_back(accessor); }; - addPropertyMethod(property->getGetterMethodDecl()); - addPropertyMethod(property->getSetterMethodDecl()); + addPropertyMethod(propertyImpl->getGetterMethodDecl()); + addPropertyMethod(propertyImpl->getSetterMethodDecl()); } llvm::Constant *Properties = GeneratePropertyList(OID, ClassDecl); @@ -3872,6 +3877,13 @@ llvm::Function *CGObjCGNU::GenerateMethod(const ObjCMethodDecl *OMD, return Method; } +void CGObjCGNU::GenerateDirectMethodPrologue(CodeGenFunction &CGF, + llvm::Function *Fn, + const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD) { + // GNU runtime doesn't support direct calls at this time +} + llvm::FunctionCallee CGObjCGNU::GetPropertyGetFunction() { return GetPropertyFn; } @@ -4039,7 +4051,7 @@ LValue CGObjCGNU::EmitObjCValueForIvar(CodeGenFunction &CGF, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { const ObjCInterfaceDecl *ID = - ObjectTy->getAs<ObjCObjectType>()->getInterface(); + ObjectTy->castAs<ObjCObjectType>()->getInterface(); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, EmitIvarOffset(CGF, ID, Ivar)); } @@ -4086,7 +4098,7 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, auto GV = new llvm::GlobalVariable(TheModule, IntTy, false, llvm::GlobalValue::LinkOnceAnyLinkage, llvm::Constant::getNullValue(IntTy), name); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); Offset = GV; } Offset = CGF.Builder.CreateAlignedLoad(Offset, Align); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp index 12880fecbadf..f36c28a85a68 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCMac.cpp @@ -16,8 +16,8 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/RecordLayout.h" @@ -25,6 +25,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LangOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" @@ -874,6 +875,10 @@ protected: /// this translation unit. llvm::DenseMap<const ObjCMethodDecl*, llvm::Function*> MethodDefinitions; + /// DirectMethodDefinitions - map of direct methods which have been defined in + /// this translation unit. + llvm::DenseMap<const ObjCMethodDecl*, llvm::Function*> DirectMethodDefinitions; + /// PropertyNames - uniqued method variable names. llvm::DenseMap<IdentifierInfo*, llvm::GlobalVariable*> PropertyNames; @@ -923,7 +928,8 @@ protected: /// \param[out] NameOut - The return value. void GetNameForMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD, - SmallVectorImpl<char> &NameOut); + SmallVectorImpl<char> &NameOut, + bool ignoreCategoryNamespace = false); /// GetMethodVarName - Return a unique constant for the given /// selector's name. The return value has type char *. @@ -1065,7 +1071,7 @@ protected: CodeGen::RValue EmitMessageSend(CodeGen::CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, - llvm::Value *Sel, + Selector Sel, llvm::Value *Arg0, QualType Arg0Ty, bool IsSuper, @@ -1092,6 +1098,13 @@ public: llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD=nullptr) override; + llvm::Function *GenerateDirectMethod(const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD); + + void GenerateDirectMethodPrologue(CodeGenFunction &CGF, llvm::Function *Fn, + const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD) override; + void GenerateProtocol(const ObjCProtocolDecl *PD) override; /// GetOrEmitProtocol - Get the protocol object for the given @@ -1303,7 +1316,7 @@ private: /// EmitSelector - Return a Value*, of type ObjCTypes.SelectorPtrTy, /// for the given selector. llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel); - Address EmitSelectorAddr(CodeGenFunction &CGF, Selector Sel); + Address EmitSelectorAddr(Selector Sel); public: CGObjCMac(CodeGen::CodeGenModule &cgm); @@ -1531,7 +1544,7 @@ private: /// EmitSelector - Return a Value*, of type ObjCTypes.SelectorPtrTy, /// for the given selector. llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel); - Address EmitSelectorAddr(CodeGenFunction &CGF, Selector Sel); + Address EmitSelectorAddr(Selector Sel); /// GetInterfaceEHType - Get the cached ehtype for the given Objective-C /// interface. The return value has type EHTypePtrTy. @@ -1573,9 +1586,13 @@ private: // base of the ivar access is a parameter to an Objective C method. // However, because the parameters are not available in the current // interface, we cannot perform this check. + // + // Note that for direct methods, because objc_msgSend is skipped, + // and that the method may be inlined, this optimization actually + // can't be performed. if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl)) - if (MD->isInstanceMethod()) + if (MD->isInstanceMethod() && !MD->isDirectMethod()) if (const ObjCInterfaceDecl *ID = MD->getClassInterface()) return IV->getContainingInterface()->isSuperClassOf(ID); return false; @@ -1619,7 +1636,7 @@ public: llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel) override { return EmitSelector(CGF, Sel); } Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override - { return EmitSelectorAddr(CGF, Sel); } + { return EmitSelectorAddr(Sel); } /// The NeXT/Apple runtimes do not support typed selectors; just emit an /// untyped one. @@ -1887,7 +1904,7 @@ llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, Selector Sel) { return EmitSelector(CGF, Sel); } Address CGObjCMac::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) { - return EmitSelectorAddr(CGF, Sel); + return EmitSelectorAddr(Sel); } llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl *Method) { @@ -2018,7 +2035,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) { GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. - GV->setAlignment(1); + GV->setAlignment(llvm::Align::None()); Fields.addBitCast(GV, CGM.Int8PtrTy); // String length. @@ -2103,10 +2120,9 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType()); Target = CGF.Builder.CreateBitCast(Target, ClassTy); CGF.Builder.CreateStore(Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1)); - return EmitMessageSend(CGF, Return, ResultType, - EmitSelector(CGF, Sel), - ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy, - true, CallArgs, Method, Class, ObjCTypes); + return EmitMessageSend(CGF, Return, ResultType, Sel, ObjCSuper.getPointer(), + ObjCTypes.SuperPtrCTy, true, CallArgs, Method, Class, + ObjCTypes); } /// Generate code for a message send expression. @@ -2118,10 +2134,9 @@ CodeGen::RValue CGObjCMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF, const CallArgList &CallArgs, const ObjCInterfaceDecl *Class, const ObjCMethodDecl *Method) { - return EmitMessageSend(CGF, Return, ResultType, - EmitSelector(CGF, Sel), - Receiver, CGF.getContext().getObjCIdType(), - false, CallArgs, Method, Class, ObjCTypes); + return EmitMessageSend(CGF, Return, ResultType, Sel, Receiver, + CGF.getContext().getObjCIdType(), false, CallArgs, + Method, Class, ObjCTypes); } static bool isWeakLinkedClass(const ObjCInterfaceDecl *ID) { @@ -2137,7 +2152,7 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, - llvm::Value *Sel, + Selector Sel, llvm::Value *Arg0, QualType Arg0Ty, bool IsSuper, @@ -2145,11 +2160,24 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, const ObjCMethodDecl *Method, const ObjCInterfaceDecl *ClassReceiver, const ObjCCommonTypesHelper &ObjCTypes) { + CodeGenTypes &Types = CGM.getTypes(); + auto selTy = CGF.getContext().getObjCSelType(); + llvm::Value *SelValue; + + if (Method && Method->isDirectMethod()) { + // Direct methods will synthesize the proper `_cmd` internally, + // so just don't bother with setting the `_cmd` argument. + assert(!IsSuper); + SelValue = llvm::UndefValue::get(Types.ConvertType(selTy)); + } else { + SelValue = GetSelector(CGF, Sel); + } + CallArgList ActualArgs; if (!IsSuper) Arg0 = CGF.Builder.CreateBitCast(Arg0, ObjCTypes.ObjectPtrTy); ActualArgs.add(RValue::get(Arg0), Arg0Ty); - ActualArgs.add(RValue::get(Sel), CGF.getContext().getObjCSelType()); + ActualArgs.add(RValue::get(SelValue), selTy); ActualArgs.addFrom(CallArgs); // If we're calling a method, use the formal signature. @@ -2190,7 +2218,9 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, bool RequiresNullCheck = false; llvm::FunctionCallee Fn = nullptr; - if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { + if (Method && Method->isDirectMethod()) { + Fn = GenerateDirectMethod(Method, Method->getClassInterface()); + } else if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { if (ReceiverCanBeNull) RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper) : ObjCTypes.getSendStretFn(IsSuper); @@ -2517,14 +2547,12 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout, } if (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) { - const ConstantArrayType *CArray = - dyn_cast_or_null<ConstantArrayType>(Array); + auto *CArray = cast<ConstantArrayType>(Array); uint64_t ElCount = CArray->getSize().getZExtValue(); assert(CArray && "only array with known element size is supported"); FQT = CArray->getElementType(); while (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) { - const ConstantArrayType *CArray = - dyn_cast_or_null<ConstantArrayType>(Array); + auto *CArray = cast<ConstantArrayType>(Array); ElCount *= CArray->getSize().getZExtValue(); FQT = CArray->getElementType(); } @@ -3103,7 +3131,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) { nullptr, "OBJC_PROTOCOL_" + PD->getName()); Entry->setSection("__OBJC,__protocol,regular,no_dead_strip"); // FIXME: Is this necessary? Why only for protocol? - Entry->setAlignment(4); + Entry->setAlignment(llvm::Align(4)); } return Entry; @@ -3217,9 +3245,6 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet, SmallVectorImpl<const ObjCPropertyDecl *> &Properties, const ObjCProtocolDecl *Proto, bool IsClassProperty) { - for (const auto *P : Proto->protocols()) - PushProtocolProperties(PropertySet, Properties, P, IsClassProperty); - for (const auto *PD : Proto->properties()) { if (IsClassProperty != PD->isClassProperty()) continue; @@ -3227,6 +3252,9 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet, continue; Properties.push_back(PD); } + + for (const auto *P : Proto->protocols()) + PushProtocolProperties(PropertySet, Properties, P, IsClassProperty); } /* @@ -3299,6 +3327,8 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, values.addInt(ObjCTypes.IntTy, Properties.size()); auto propertiesArray = values.beginArray(ObjCTypes.PropertyTy); for (auto PD : Properties) { + if (PD->isDirectProperty()) + continue; auto property = propertiesArray.beginStruct(ObjCTypes.PropertyTy); property.add(GetPropertyName(PD->getIdentifier())); property.add(GetPropertyTypeString(PD, Container)); @@ -3374,7 +3404,8 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { }; SmallVector<const ObjCMethodDecl *, 16> Methods[NumMethodLists]; for (const auto *MD : OCD->methods()) { - Methods[unsigned(MD->isClassMethod())].push_back(MD); + if (!MD->isDirectMethod()) + Methods[unsigned(MD->isClassMethod())].push_back(MD); } Values.add(GetClassName(OCD->getName())); @@ -3556,17 +3587,18 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { }; SmallVector<const ObjCMethodDecl *, 16> Methods[NumMethodLists]; for (const auto *MD : ID->methods()) { - Methods[unsigned(MD->isClassMethod())].push_back(MD); + if (!MD->isDirectMethod()) + Methods[unsigned(MD->isClassMethod())].push_back(MD); } for (const auto *PID : ID->property_impls()) { if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { - ObjCPropertyDecl *PD = PID->getPropertyDecl(); - - if (ObjCMethodDecl *MD = PD->getGetterMethodDecl()) + if (PID->getPropertyDecl()->isDirectProperty()) + continue; + if (ObjCMethodDecl *MD = PID->getGetterMethodDecl()) if (GetMethodDefinition(MD)) Methods[InstanceMethods].push_back(MD); - if (ObjCMethodDecl *MD = PD->getSetterMethodDecl()) + if (ObjCMethodDecl *MD = PID->getSetterMethodDecl()) if (GetMethodDefinition(MD)) Methods[InstanceMethods].push_back(MD); } @@ -3609,7 +3641,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { "Forward metaclass reference has incorrect type."); values.finishAndSetAsInitializer(GV); GV->setSection(Section); - GV->setAlignment(CGM.getPointerAlign().getQuantity()); + GV->setAlignment(CGM.getPointerAlign().getAsAlign()); CGM.addCompilerUsedGlobal(GV); } else GV = CreateMetadataVar(Name, values, Section, CGM.getPointerAlign(), true); @@ -3961,7 +3993,8 @@ llvm::Constant *CGObjCMac::emitMethodList(Twine name, MethodListType MLT, values.addInt(ObjCTypes.IntTy, methods.size()); auto methodArray = values.beginArray(ObjCTypes.MethodTy); for (auto MD : methods) { - emitMethodConstant(methodArray, MD); + if (!MD->isDirectMethod()) + emitMethodConstant(methodArray, MD); } methodArray.finishAndAddTo(values); @@ -3972,22 +4005,133 @@ llvm::Constant *CGObjCMac::emitMethodList(Twine name, MethodListType MLT, llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) { + llvm::Function *Method; + + if (OMD->isDirectMethod()) { + Method = GenerateDirectMethod(OMD, CD); + } else { + SmallString<256> Name; + GetNameForMethod(OMD, CD, Name); + + CodeGenTypes &Types = CGM.getTypes(); + llvm::FunctionType *MethodTy = + Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); + Method = + llvm::Function::Create(MethodTy, llvm::GlobalValue::InternalLinkage, + Name.str(), &CGM.getModule()); + } + + MethodDefinitions.insert(std::make_pair(OMD, Method)); + + return Method; +} + +llvm::Function * +CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD) { + auto I = DirectMethodDefinitions.find(OMD->getCanonicalDecl()); + if (I != DirectMethodDefinitions.end()) + return I->second; + SmallString<256> Name; - GetNameForMethod(OMD, CD, Name); + GetNameForMethod(OMD, CD, Name, /*ignoreCategoryNamespace*/true); CodeGenTypes &Types = CGM.getTypes(); llvm::FunctionType *MethodTy = Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); llvm::Function *Method = - llvm::Function::Create(MethodTy, - llvm::GlobalValue::InternalLinkage, - Name.str(), - &CGM.getModule()); - MethodDefinitions.insert(std::make_pair(OMD, Method)); + llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, + Name.str(), &CGM.getModule()); + DirectMethodDefinitions.insert(std::make_pair(OMD->getCanonicalDecl(), Method)); return Method; } +void CGObjCCommonMac::GenerateDirectMethodPrologue( + CodeGenFunction &CGF, llvm::Function *Fn, const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD) { + auto &Builder = CGF.Builder; + bool ReceiverCanBeNull = true; + auto selfAddr = CGF.GetAddrOfLocalVar(OMD->getSelfDecl()); + auto selfValue = Builder.CreateLoad(selfAddr); + + // Generate: + // + // /* for class methods only to force class lazy initialization */ + // self = [self self]; + // + // /* unless the receiver is never NULL */ + // if (self == nil) { + // return (ReturnType){ }; + // } + // + // _cmd = @selector(...) + // ... + + if (OMD->isClassMethod()) { + const ObjCInterfaceDecl *OID = cast<ObjCInterfaceDecl>(CD); + assert(OID && + "GenerateDirectMethod() should be called with the Class Interface"); + Selector SelfSel = GetNullarySelector("self", CGM.getContext()); + auto ResultType = CGF.getContext().getObjCIdType(); + RValue result; + CallArgList Args; + + // TODO: If this method is inlined, the caller might know that `self` is + // already initialized; for example, it might be an ordinary Objective-C + // method which always receives an initialized `self`, or it might have just + // forced initialization on its own. + // + // We should find a way to eliminate this unnecessary initialization in such + // cases in LLVM. + result = GeneratePossiblySpecializedMessageSend( + CGF, ReturnValueSlot(), ResultType, SelfSel, selfValue, Args, OID, + nullptr, true); + Builder.CreateStore(result.getScalarVal(), selfAddr); + + // Nullable `Class` expressions cannot be messaged with a direct method + // so the only reason why the receive can be null would be because + // of weak linking. + ReceiverCanBeNull = isWeakLinkedClass(OID); + } + + if (ReceiverCanBeNull) { + llvm::BasicBlock *SelfIsNilBlock = + CGF.createBasicBlock("objc_direct_method.self_is_nil"); + llvm::BasicBlock *ContBlock = + CGF.createBasicBlock("objc_direct_method.cont"); + + // if (self == nil) { + auto selfTy = cast<llvm::PointerType>(selfValue->getType()); + auto Zero = llvm::ConstantPointerNull::get(selfTy); + + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + Builder.CreateCondBr(Builder.CreateICmpEQ(selfValue, Zero), SelfIsNilBlock, + ContBlock, MDHelper.createBranchWeights(1, 1 << 20)); + + CGF.EmitBlock(SelfIsNilBlock); + + // return (ReturnType){ }; + auto retTy = OMD->getReturnType(); + Builder.SetInsertPoint(SelfIsNilBlock); + if (!retTy->isVoidType()) { + CGF.EmitNullInitialization(CGF.ReturnValue, retTy); + } + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + // } + + // rest of the body + CGF.EmitBlock(ContBlock); + Builder.SetInsertPoint(ContBlock); + } + + // only synthesize _cmd if it's referenced + if (OMD->getCmdDecl()->isUsed()) { + Builder.CreateStore(GetSelector(CGF, OMD), + CGF.GetAddrOfLocalVar(OMD->getCmdDecl())); + } +} + llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name, ConstantStructBuilder &Init, StringRef Section, @@ -4016,7 +4160,7 @@ llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name, new llvm::GlobalVariable(CGM.getModule(), Ty, false, LT, Init, Name); if (!Section.empty()) GV->setSection(Section); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); if (AddToUsed) CGM.addCompilerUsedGlobal(GV); return GV; @@ -4064,7 +4208,7 @@ CGObjCCommonMac::CreateCStringLiteral(StringRef Name, ObjCLabelType Type, if (CGM.getTriple().isOSBinFormatMachO()) GV->setSection(Section); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - GV->setAlignment(CharUnits::One().getQuantity()); + GV->setAlignment(CharUnits::One().getAsAlign()); CGM.addCompilerUsedGlobal(GV); return GV; @@ -4902,7 +5046,7 @@ LValue CGObjCMac::EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { const ObjCInterfaceDecl *ID = - ObjectTy->getAs<ObjCObjectType>()->getInterface(); + ObjectTy->castAs<ObjCObjectType>()->getInterface(); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, EmitIvarOffset(CGF, ID, Ivar)); } @@ -5120,11 +5264,11 @@ llvm::Value *CGObjCMac::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { } llvm::Value *CGObjCMac::EmitSelector(CodeGenFunction &CGF, Selector Sel) { - return CGF.Builder.CreateLoad(EmitSelectorAddr(CGF, Sel)); + return CGF.Builder.CreateLoad(EmitSelectorAddr(Sel)); } -Address CGObjCMac::EmitSelectorAddr(CodeGenFunction &CGF, Selector Sel) { - CharUnits Align = CGF.getPointerAlign(); +Address CGObjCMac::EmitSelectorAddr(Selector Sel) { + CharUnits Align = CGM.getPointerAlign(); llvm::GlobalVariable *&Entry = SelectorReferences[Sel]; if (!Entry) { @@ -5544,14 +5688,16 @@ CGObjCCommonMac::GetPropertyTypeString(const ObjCPropertyDecl *PD, void CGObjCCommonMac::GetNameForMethod(const ObjCMethodDecl *D, const ObjCContainerDecl *CD, - SmallVectorImpl<char> &Name) { + SmallVectorImpl<char> &Name, + bool ignoreCategoryNamespace) { llvm::raw_svector_ostream OS(Name); assert (CD && "Missing container decl in GetNameForMethod"); OS << '\01' << (D->isInstanceMethod() ? '-' : '+') << '[' << CD->getName(); - if (const ObjCCategoryImplDecl *CID = - dyn_cast<ObjCCategoryImplDecl>(D->getDeclContext())) - OS << '(' << *CID << ')'; + if (!ignoreCategoryNamespace) + if (const ObjCCategoryImplDecl *CID = + dyn_cast<ObjCCategoryImplDecl>(D->getDeclContext())) + OS << '(' << *CID << ')'; OS << ' ' << D->getSelector().getAsString() << ']'; } @@ -6076,7 +6222,8 @@ void CGObjCNonFragileABIMac::AddModuleClassList( llvm::GlobalVariable *GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, LT, Init, SymbolName); - GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType())); + GV->setAlignment( + llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType()))); GV->setSection(SectionName); CGM.addCompilerUsedGlobal(GV); } @@ -6229,23 +6376,12 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer( SmallVector<const ObjCMethodDecl*, 16> methods; if (flags & NonFragileABI_Class_Meta) { for (const auto *MD : ID->class_methods()) - methods.push_back(MD); + if (!MD->isDirectMethod()) + methods.push_back(MD); } else { for (const auto *MD : ID->instance_methods()) - methods.push_back(MD); - - for (const auto *PID : ID->property_impls()) { - if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize){ - ObjCPropertyDecl *PD = PID->getPropertyDecl(); - - if (auto MD = PD->getGetterMethodDecl()) - if (GetMethodDefinition(MD)) - methods.push_back(MD); - if (auto MD = PD->getSetterMethodDecl()) - if (GetMethodDefinition(MD)) - methods.push_back(MD); - } - } + if (!MD->isDirectMethod()) + methods.push_back(MD); } values.add(emitMethodList(ID->getObjCRuntimeNameAsString(), @@ -6319,8 +6455,8 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI, if (CGM.getTriple().isOSBinFormatMachO()) GV->setSection("__DATA, __objc_data"); - GV->setAlignment( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy)); + GV->setAlignment(llvm::Align( + CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy))); if (!CGM.getTriple().isOSBinFormatCOFF()) if (HiddenVisibility) GV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -6527,7 +6663,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, PTGV->setSection(GetSectionName("__objc_protorefs", "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); - PTGV->setAlignment(Align.getQuantity()); + PTGV->setAlignment(Align.getAsAlign()); if (!CGM.getTriple().isOSBinFormatMachO()) PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName)); CGM.addUsedGlobal(PTGV); @@ -6566,6 +6702,8 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { SmallVector<const ObjCMethodDecl *, 16> instanceMethods; SmallVector<const ObjCMethodDecl *, 8> classMethods; for (const auto *MD : OCD->methods()) { + if (MD->isDirectMethod()) + continue; if (MD->isInstanceMethod()) { instanceMethods.push_back(MD); } else { @@ -6708,9 +6846,8 @@ CGObjCNonFragileABIMac::emitMethodList(Twine name, MethodListType kind, // method_count values.addInt(ObjCTypes.IntTy, methods.size()); auto methodArray = values.beginArray(ObjCTypes.MethodTy); - for (auto MD : methods) { + for (auto MD : methods) emitMethodConstant(methodArray, MD, forProtocol); - } methodArray.finishAndAddTo(values); llvm::GlobalVariable *GV = finishAndCreateGlobal(values, prefix + name, CGM); @@ -6759,8 +6896,8 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar); IvarOffsetGV->setInitializer( llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset)); - IvarOffsetGV->setAlignment( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy)); + IvarOffsetGV->setAlignment(llvm::Align( + CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy))); if (!CGM.getTriple().isOSBinFormatCOFF()) { // FIXME: This matches gcc, but shouldn't the visibility be set on the use @@ -6986,8 +7123,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( ProtocolRef); if (!CGM.getTriple().isOSBinFormatMachO()) PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef)); - PTGV->setAlignment( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy)); + PTGV->setAlignment(llvm::Align( + CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy))); PTGV->setSection(GetSectionName("__objc_protolist", "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -7053,7 +7190,7 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar( llvm::Value *BaseValue, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { - ObjCInterfaceDecl *ID = ObjectTy->getAs<ObjCObjectType>()->getInterface(); + ObjCInterfaceDecl *ID = ObjectTy->castAs<ObjCObjectType>()->getInterface(); llvm::Value *Offset = EmitIvarOffset(CGF, ID, Ivar); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, Offset); @@ -7235,8 +7372,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF, ? EmitVTableMessageSend(CGF, Return, ResultType, Sel, Receiver, CGF.getContext().getObjCIdType(), false, CallArgs, Method) - : EmitMessageSend(CGF, Return, ResultType, - EmitSelector(CGF, Sel), + : EmitMessageSend(CGF, Return, ResultType, Sel, Receiver, CGF.getContext().getObjCIdType(), false, CallArgs, Method, Class, ObjCTypes); } @@ -7338,7 +7474,7 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, CGM.getModule(), ClassGV->getType(), false, getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, "OBJC_CLASSLIST_REFERENCES_$_"); - Entry->setAlignment(CGF.getPointerAlign().getQuantity()); + Entry->setAlignment(CGF.getPointerAlign().getAsAlign()); if (!ID || !ID->hasAttr<ObjCClassStubAttr>()) Entry->setSection(SectionName); @@ -7377,7 +7513,7 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF, CGM.getModule(), ClassGV->getType(), false, getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); - Entry->setAlignment(CGF.getPointerAlign().getQuantity()); + Entry->setAlignment(CGF.getPointerAlign().getAsAlign()); Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); } @@ -7401,7 +7537,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF, CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false, getLinkageTypeForObjCMetadata(CGM, SectionName), MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); - Entry->setAlignment(Align.getQuantity()); + Entry->setAlignment(Align.getAsAlign()); Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); } @@ -7467,15 +7603,14 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, ? EmitVTableMessageSend(CGF, Return, ResultType, Sel, ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy, true, CallArgs, Method) - : EmitMessageSend(CGF, Return, ResultType, - EmitSelector(CGF, Sel), + : EmitMessageSend(CGF, Return, ResultType, Sel, ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy, true, CallArgs, Method, Class, ObjCTypes); } llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF, Selector Sel) { - Address Addr = EmitSelectorAddr(CGF, Sel); + Address Addr = EmitSelectorAddr(Sel); llvm::LoadInst* LI = CGF.Builder.CreateLoad(Addr); LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), @@ -7483,11 +7618,9 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF, return LI; } -Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF, - Selector Sel) { +Address CGObjCNonFragileABIMac::EmitSelectorAddr(Selector Sel) { llvm::GlobalVariable *&Entry = SelectorReferences[Sel]; - - CharUnits Align = CGF.getPointerAlign(); + CharUnits Align = CGM.getPointerAlign(); if (!Entry) { llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel), @@ -7500,7 +7633,7 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF, "OBJC_SELECTOR_REFERENCES_"); Entry->setExternallyInitialized(true); Entry->setSection(SectionName); - Entry->setAlignment(Align.getQuantity()); + Entry->setAlignment(Align.getAsAlign()); CGM.addCompilerUsedGlobal(Entry); } @@ -7733,7 +7866,7 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, : llvm::GlobalValue::WeakAnyLinkage; if (Entry) { values.finishAndSetAsInitializer(Entry); - Entry->setAlignment(CGM.getPointerAlign().getQuantity()); + Entry->setAlignment(CGM.getPointerAlign().getAsAlign()); } else { Entry = values.finishAndCreateGlobal("OBJC_EHTYPE_$_" + ClassName, CGM.getPointerAlign(), diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h b/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h index 471816cb5988..f0b3525cfde2 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGObjCRuntime.h @@ -169,6 +169,21 @@ public: const ObjCInterfaceDecl *Class = nullptr, const ObjCMethodDecl *Method = nullptr) = 0; + /// Generate an Objective-C message send operation. + /// + /// This variant allows for the call to be substituted with an optimized + /// variant. + CodeGen::RValue + GeneratePossiblySpecializedMessageSend(CodeGenFunction &CGF, + ReturnValueSlot Return, + QualType ResultType, + Selector Sel, + llvm::Value *Receiver, + const CallArgList& Args, + const ObjCInterfaceDecl *OID, + const ObjCMethodDecl *Method, + bool isClassMessage); + /// Generate an Objective-C message send operation to the super /// class initiated in a method for Class and with the given Self /// object. @@ -205,6 +220,12 @@ public: virtual llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) = 0; + /// Generates prologue for direct Objective-C Methods. + virtual void GenerateDirectMethodPrologue(CodeGenFunction &CGF, + llvm::Function *Fn, + const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD) = 0; + /// Return the runtime function for getting properties. virtual llvm::FunctionCallee GetPropertyGetFunction() = 0; diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenCLRuntime.cpp index 191a95c62992..dbe375294d17 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -96,7 +96,7 @@ llvm::PointerType *CGOpenCLRuntime::getSamplerType(const Type *T) { } llvm::Value *CGOpenCLRuntime::getPipeElemSize(const Expr *PipeArg) { - const PipeType *PipeTy = PipeArg->getType()->getAs<PipeType>(); + const PipeType *PipeTy = PipeArg->getType()->castAs<PipeType>(); // The type of the last (implicit) argument to be passed. llvm::Type *Int32Ty = llvm::IntegerType::getInt32Ty(CGM.getLLVMContext()); unsigned TypeSize = CGM.getContext() @@ -106,7 +106,7 @@ llvm::Value *CGOpenCLRuntime::getPipeElemSize(const Expr *PipeArg) { } llvm::Value *CGOpenCLRuntime::getPipeElemAlign(const Expr *PipeArg) { - const PipeType *PipeTy = PipeArg->getType()->getAs<PipeType>(); + const PipeType *PipeTy = PipeArg->getType()->castAs<PipeType>(); // The type of the last (implicit) argument to be passed. llvm::Type *Int32Ty = llvm::IntegerType::getInt32Ty(CGM.getLLVMContext()); unsigned TypeSize = CGM.getContext() diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 27e7175da841..97b17799a03e 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10,17 +10,22 @@ // //===----------------------------------------------------------------------===// +#include "CGOpenMPRuntime.h" #include "CGCXXABI.h" #include "CGCleanup.h" -#include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -30,6 +35,7 @@ using namespace clang; using namespace CodeGen; +using namespace llvm::omp; namespace { /// Base class for handling code generation inside OpenMP regions. @@ -356,7 +362,7 @@ public: VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); PrivScope.addPrivate( - VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); + VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); } (void)PrivScope.Privatize(); } @@ -727,10 +733,6 @@ enum OpenMPRTLFunction { OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_requires(int64_t flags); OMPRTL__tgt_register_requires, - // Call to void __tgt_register_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_register_lib, - // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_unregister_lib, // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, @@ -752,6 +754,11 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_update_nowait, + // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + OMPRTL__tgt_mapper_num_components, + // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + OMPRTL__tgt_push_mapper_component, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -836,7 +843,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); + InitRVal = RValue::getAggregate(LV.getAddress(CGF)); break; } OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); @@ -960,7 +967,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress()); + DRD, SharedLVal.getAddress(CGF)); } ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, @@ -1001,13 +1008,13 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = - cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) - ->getElementType(); + auto *ElemType = cast<llvm::PointerType>( + SharedAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), - SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), + SharedAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); @@ -1057,7 +1064,7 @@ void ReductionCodeGen::emitInitialization( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); QualType SharedType = SharedAddresses[N].first.getType(); SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), CGF.ConvertTypeForMem(SharedType)), SharedType, SharedAddresses[N].first.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); @@ -1065,7 +1072,7 @@ void ReductionCodeGen::emitInitialization( emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(), + PrivateAddr, SharedLVal.getAddress(CGF), SharedLVal.getType()); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { @@ -1102,15 +1109,15 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); } else { - LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), CGF.ConvertTypeForMem(ElTy)), BaseLV.getType(), BaseLV.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); @@ -1174,15 +1181,15 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( - BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); llvm::Value *PrivatePointer = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivateAddr.getPointer(), - SharedAddresses[N].first.getAddress().getType()); + SharedAddresses[N].first.getAddress(CGF).getType()); llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getAddress().getType(), + OriginalBaseLValue.getAddress(CGF).getType(), OriginalBaseLValue.getAlignment(), Ptr); } BaseDecls.emplace_back( @@ -1259,6 +1266,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, loadOffloadInfoMetadata(); } +bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit at least a definition for the aliasee if the the address of the + // original function is requested. + if (IsForDefinition || OrigAddr) + (void)CGM.GetAddrOfGlobal(NewGD); + StringRef NewMangledName = CGM.getMangledName(NewGD); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); + if (Addr && !Addr->isDeclaration()) { + const auto *D = cast<FunctionDecl>(OldGD.getDecl()); + const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); + llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); + + // Create a reference to the named value. This ensures that it is emitted + // if a deferred decl. + llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); + + // Create the new alias itself, but don't set a name yet. + auto *GA = + llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); + + if (OrigAddr) { + assert(OrigAddr->isDeclaration() && "Expected declaration"); + + GA->takeName(OrigAddr); + OrigAddr->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); + OrigAddr->eraseFromParent(); + } else { + GA->setName(CGM.getMangledName(OldGD)); + } + + // Set attributes which are particular to an alias; this is a + // specialization of the attributes which may be set on a global function. + if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || + D->isWeakImported()) + GA->setLinkage(llvm::Function::WeakAnyLinkage); + + CGM.SetCommonAttributes(OldGD, GA); + return true; + } + return false; +} + void CGOpenMPRuntime::clear() { InternalVars.clear(); // Clean non-target variable declarations possibly used only in debug info. @@ -1272,6 +1325,14 @@ void CGOpenMPRuntime::clear() { continue; GV->eraseFromParent(); } + // Emit aliases for the deferred aliasees. + for (const auto &Pair : DeferredVariantFunction) { + StringRef MangledName = CGM.getMangledName(Pair.second.second); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); + // If not able to emit alias, just emit original declaration. + (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, + /*IsForDefinition=*/false); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1321,12 +1382,12 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(CGF); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(CGF); }); (void)Scope.Privatize(); if (!IsCombiner && Out->hasInit() && @@ -1377,6 +1438,52 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } +namespace { +// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR +// Builder if one is present. +struct PushAndPopStackRAII { + PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, + bool HasCancel) + : OMPBuilder(OMPBuilder) { + if (!OMPBuilder) + return; + + // The following callback is the crucial part of clangs cleanup process. + // + // NOTE: + // Once the OpenMPIRBuilder is used to create parallel regions (and + // similar), the cancellation destination (Dest below) is determined via + // IP. That means if we have variables to finalize we split the block at IP, + // use the new block (=BB) as destination to build a JumpDest (via + // getJumpDestInCurrentScope(BB)) which then is fed to + // EmitBranchThroughCleanup. Furthermore, there will not be the need + // to push & pop an FinalizationInfo object. + // The FiniCB will still be needed but at the point where the + // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. + auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { + assert(IP.getBlock()->end() == IP.getPoint() && + "Clang CG should cause non-terminated block!"); + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.restoreIP(IP); + CodeGenFunction::JumpDest Dest = + CGF.getOMPCancelDestination(OMPD_parallel); + CGF.EmitBranchThroughCleanup(Dest); + }; + + // TODO: Remove this once we emit parallel regions through the + // OpenMPIRBuilder as it can do this setup internally. + llvm::OpenMPIRBuilder::FinalizationInfo FI( + {FiniCB, OMPD_parallel, HasCancel}); + OMPBuilder->pushFinalizationCB(std::move(FI)); + } + ~PushAndPopStackRAII() { + if (OMPBuilder) + OMPBuilder->popFinalizationCB(); + } + llvm::OpenMPIRBuilder *OMPBuilder; +}; +} // namespace + static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1401,6 +1508,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( else if (const auto *OPFD = dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); + + // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new + // parallel region to make cancellation barriers work properly. + llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); + PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -1436,7 +1548,7 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), TaskTVar->getType()->castAs<PointerType>()) - .getPointer()}; + .getPointer(CGF)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); }; CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, @@ -1638,18 +1750,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, return ThreadID; } // If exceptions are enabled, do not use parameter to avoid possible crash. - if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || - !CGF.getLangOpts().CXXExceptions || - CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); + if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || + !CGF.getLangOpts().CXXExceptions || + CGF.Builder.GetInsertBlock() == TopBlock || + !isa<llvm::Instruction>(LVal.getPointer(CGF)) || + cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == + TopBlock || + cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == + CGF.Builder.GetInsertBlock()) { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + if (CGF.Builder.GetInsertBlock() == TopBlock) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } @@ -1686,6 +1804,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { UDRMap.erase(D); FunctionUDRMap.erase(CGF.CurFn); } + auto I = FunctionUDMMap.find(CGF.CurFn); + if (I != FunctionUDMMap.end()) { + for(auto *D : I->second) + UDMMap.erase(D); + FunctionUDMMap.erase(I); + } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -2352,26 +2476,6 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); break; } - case OMPRTL__tgt_register_lib: { - // Build void __tgt_register_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); - break; - } - case OMPRTL__tgt_unregister_lib: { - // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); - break; - } case OMPRTL__tgt_target_data_begin: { // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); @@ -2459,6 +2563,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); break; } + case OMPRTL__tgt_mapper_num_components: { + // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); + break; + } + case OMPRTL__tgt_push_mapper_component: { + // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, + CGM.Int64Ty, CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -2552,6 +2674,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { return CGM.CreateRuntimeFunction(FnTy, Name); } +/// Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum) { + SourceManager &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); +} + Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); @@ -2563,19 +2711,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; + OS << CGM.getMangledName(GlobalDecl(VD)); + if (!VD->isExternallyVisible()) { + unsigned DeviceID, FileID, Line; + getTargetEntryUniqueInfo(CGM.getContext(), + VD->getCanonicalDecl()->getBeginLoc(), + DeviceID, FileID, Line); + OS << llvm::format("_%x", FileID); + } + OS << "_decl_tgt_ref_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), PtrName); - if (!CGM.getLangOpts().OpenMPIsDevice) { - auto *GV = cast<llvm::GlobalVariable>(Ptr); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + + auto *GV = cast<llvm::GlobalVariable>(Ptr); + GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + + if (!CGM.getLangOpts().OpenMPIsDevice) GV->setInitializer(CGM.GetAddrOfGlobal(VD)); - } - CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); } return Address(Ptr, CGM.getContext().getDeclAlign(VD)); @@ -2749,35 +2905,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -/// Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - SourceManager &SM = C.getSourceManager(); - - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) - - assert(Loc.isValid() && "Source location is expected to be always valid."); - - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - SM.getDiagnostics().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); -} - bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return false; Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || @@ -2889,10 +3022,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { std::string Suffix = getName({"artificial", ""}); - std::string CacheSuffix = getName({"cache", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && + CGM.getTarget().isTLSSupported()) { + cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); + return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); + } + std::string CacheSuffix = getName({"cache", ""}); llvm::Value *Args[] = { emitUpdateLocation(CGF, SourceLocation()), getThreadID(CGF, SourceLocation()), @@ -2906,12 +3044,12 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), VarLVType->getPointerTo(/*AddrSpace=*/0)), - CGM.getPointerAlign()); + CGM.getContext().getTypeAlignInChars(VarType)); } -void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const RegionCodeGenTy &ThenGen, - const RegionCodeGenTy &ElseGen) { +void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, + const RegionCodeGenTy &ThenGen, + const RegionCodeGenTy &ElseGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting @@ -2981,14 +3119,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); - // OutlinedFn(>id, &zero, CapturedStruct); - Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, - /*Name*/ ".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + // OutlinedFn(>id, &zero_bound, CapturedStruct); + Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); + Address ZeroAddrBound = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; // ThreadId for serialized parallels is 0. - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2999,7 +3139,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, EndArgs); }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); + emitIfClause(CGF, IfCond, ThenGen, ElseGen); } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); @@ -3017,7 +3157,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) - return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); + return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); llvm::Value *ThreadID = getThreadID(CGF, Loc); QualType Int32Ty = @@ -3283,9 +3423,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // <copy_func>, did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); - QualType CopyprivateArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType CopyprivateArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); @@ -3293,7 +3433,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), + CGF.VoidPtrTy), Elem); } // Build function that copies private values from single region to all other @@ -3375,6 +3516,16 @@ void CGOpenMPRuntime::getDefaultScheduleAndChunk( void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { + // Check if we should use the OMPBuilder + auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); + llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); + if (OMPBuilder) { + CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( + CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); + return; + } + if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); @@ -3384,8 +3535,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); @@ -3472,7 +3622,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } -static int addMonoNonMonoModifier(OpenMPSchedType Schedule, +static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2) { int Modifier = 0; @@ -3506,6 +3656,20 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, case OMPC_SCHEDULE_MODIFIER_unknown: break; } + // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the effect is + // as if the monotonic modifier is specified. Otherwise, unless the monotonic + // modifier is specified, the effect is as if the nonmonotonic modifier is + // specified. + if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { + if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static_chunked || + Schedule == OMP_dist_sch_static)) + Modifier = OMP_sch_modifier_nonmonotonic; + } return Schedule | Modifier; } @@ -3530,13 +3694,14 @@ void CGOpenMPRuntime::emitForDispatchInit( llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - DispatchValues.LB, // Lower - DispatchValues.UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk + CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } @@ -3578,7 +3743,7 @@ static void emitForStaticInitCall( llvm::Value *Args[] = { UpdateLocation, ThreadId, - CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, M2)), // Schedule type Values.IL.getPointer(), // &isLastIter Values.LB.getPointer(), // &LB @@ -3693,37 +3858,15 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; - // Constants for proc bind value accepted by the runtime. - enum ProcBindTy { - ProcBindFalse = 0, - ProcBindTrue, - ProcBindMaster, - ProcBindClose, - ProcBindSpread, - ProcBindIntel, - ProcBindDefault - } RuntimeProcBind; - switch (ProcBind) { - case OMPC_PROC_BIND_master: - RuntimeProcBind = ProcBindMaster; - break; - case OMPC_PROC_BIND_close: - RuntimeProcBind = ProcBindClose; - break; - case OMPC_PROC_BIND_spread: - RuntimeProcBind = ProcBindSpread; - break; - case OMPC_PROC_BIND_unknown: - llvm_unreachable("Unsupported proc_bind value."); - } + assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; + llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); } @@ -3899,157 +4042,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: Action(E.getKey(), E.getValue()); } -llvm::Function * -CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { - // If we don't have entries or if we are emitting code for the device, we - // don't need to do anything. - if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) - return nullptr; - - llvm::Module &M = CGM.getModule(); - ASTContext &C = CGM.getContext(); - - // Get list of devices we care about - const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; - - // We should be creating an offloading descriptor only if there are devices - // specified. - assert(!Devices.empty() && "No OpenMP offloading devices??"); - - // Create the external variables that will point to the begin and end of the - // host entries section. These will be defined by the linker. - llvm::Type *OffloadEntryTy = - CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); - std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); - auto *HostEntriesBegin = new llvm::GlobalVariable( - M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - EntriesBeginName); - std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); - auto *HostEntriesEnd = - new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, EntriesEndName); - - // Create all device images - auto *DeviceImageTy = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); - ConstantInitBuilder DeviceImagesBuilder(CGM); - ConstantArrayBuilder DeviceImagesEntries = - DeviceImagesBuilder.beginArray(DeviceImageTy); - - for (const llvm::Triple &Device : Devices) { - StringRef T = Device.getTriple(); - std::string BeginName = getName({"omp_offloading", "img_start", ""}); - auto *ImgBegin = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(BeginName).concat(T)); - std::string EndName = getName({"omp_offloading", "img_end", ""}); - auto *ImgEnd = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(EndName).concat(T)); - - llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, - HostEntriesEnd}; - createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, - DeviceImagesEntries); - } - - // Create device images global array. - std::string ImagesName = getName({"omp_offloading", "device_images"}); - llvm::GlobalVariable *DeviceImages = - DeviceImagesEntries.finishAndCreateGlobal(ImagesName, - CGM.getPointerAlign(), - /*isConstant=*/true); - DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - // This is a Zero array to be used in the creation of the constant expressions - llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), - llvm::Constant::getNullValue(CGM.Int32Ty)}; - - // Create the target region descriptor. - llvm::Constant *Data[] = { - llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), - llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), - DeviceImages, Index), - HostEntriesBegin, HostEntriesEnd}; - std::string Descriptor = getName({"omp_offloading", "descriptor"}); - llvm::GlobalVariable *Desc = createGlobalStruct( - CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); - - // Emit code to register or unregister the descriptor at execution - // startup or closing, respectively. - - llvm::Function *UnRegFn; - { - FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); - Args.push_back(&DummyPtr); - - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); - UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); - CGF.FinishFunction(); - } - llvm::Function *RegFn; - { - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - - // Encode offload target triples into the registration function name. It - // will serve as a comdat key for the registration/unregistration code for - // this particular combination of offloading targets. - SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); - RegFnNameParts[0] = "omp_offloading"; - RegFnNameParts[1] = "descriptor_reg"; - llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), - [](const llvm::Triple &T) -> const std::string& { - return T.getTriple(); - }); - llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); - std::string Descriptor = getName(RegFnNameParts); - RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); - // Create a variable to drive the registration and unregistration of the - // descriptor, so we can reuse the logic that emits Ctors and Dtors. - ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), - SourceLocation(), nullptr, C.CharTy, - ImplicitParamDecl::Other); - CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); - CGF.FinishFunction(); - } - if (CGM.supportsCOMDAT()) { - // It is sufficient to call registration function only once, so create a - // COMDAT group for registration/unregistration functions and associated - // data. That would reduce startup time and code size. Registration - // function serves as a COMDAT group key. - llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); - RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); - RegFn->setComdat(ComdatKey); - UnRegFn->setComdat(ComdatKey); - DeviceImages->setComdat(ComdatKey); - Desc->setComdat(ComdatKey); - } - return RegFn; -} - void CGOpenMPRuntime::createOffloadEntry( llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage) { @@ -4077,8 +4069,7 @@ void CGOpenMPRuntime::createOffloadEntry( Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. - std::string Section = getName({"omp_offloading", "entries"}); - Entry->setSection(Section); + Entry->setSection("omp_offloading_entries"); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -4091,13 +4082,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Right now we only generate metadata for function that contain target // regions. - // If we do not have entries, we don't need to do anything. - if (OffloadEntriesInfoManager.empty()) + // If we are in simd mode or there are no entries, we don't need to do + // anything. + if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); - SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, + SourceLocation, StringRef>, + 16> OrderedEntries(OffloadEntriesInfoManager.size()); llvm::SmallVector<StringRef, 16> ParentFunctions( OffloadEntriesInfoManager.size()); @@ -4115,7 +4109,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( + [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, + &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -4133,8 +4128,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(FileID), GetMDString(ParentName), GetMDInt(Line), GetMDInt(E.getOrder())}; + SourceLocation Loc; + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == DeviceID && + I->getFirst()->getUniqueID().getFile() == FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), Line, 1); + break; + } + } // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. @@ -4162,7 +4168,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = + std::make_tuple(&E, SourceLocation(), MangledName); // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -4171,11 +4178,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( DeviceGlobalVarMetadataEmitter); - for (const auto *E : OrderedEntries) { - assert(E && "All ordered entries must exist!"); + for (const auto &E : OrderedEntries) { + assert(std::get<0>(E) && "All ordered entries must exist!"); if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( - E)) { + std::get<0>(E))) { if (!CE->getID() || !CE->getAddress()) { // Do not blame the entry if the parent funtion is not emitted. StringRef FnName = ParentFunctions[CE->getOrder()]; @@ -4183,16 +4190,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, - "Offloading entry for target region is incorrect: either the " + "Offloading entry for target region in %0 is incorrect: either the " "address or the ID is invalid."); - CGM.getDiags().Report(DiagID); + CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; continue; } createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = - dyn_cast<OffloadEntriesInfoManagerTy:: - OffloadEntryInfoDeviceGlobalVar>(E)) { + } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: + OffloadEntryInfoDeviceGlobalVar>( + std::get<0>(E))) { OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( CE->getFlags()); @@ -4203,10 +4210,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); continue; } // The vaiable has no definition - no need to add the entry. @@ -4349,57 +4356,6 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { return TgtOffloadEntryQTy; } -QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { - // These are the types we need to build: - // struct __tgt_device_image{ - // void *ImageStart; // Pointer to the target code start. - // void *ImageEnd; // Pointer to the target code end. - // // We also add the host entries to the device image, as it may be useful - // // for the target runtime to have access to that information. - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all - // // the entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtDeviceImageQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtDeviceImageQTy = C.getRecordType(RD); - } - return TgtDeviceImageQTy; -} - -QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { - // struct __tgt_bin_desc{ - // int32_t NumDevices; // Number of devices supported. - // __tgt_device_image *DeviceImages; // Arrays of device images - // // (one per device). - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the - // // entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtBinaryDescriptorQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); - RD->startDefinition(); - addFieldToRecordDecl( - C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtBinaryDescriptorQTy = C.getRecordType(RD); - } - return TgtBinaryDescriptorQTy; -} - namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, @@ -4559,7 +4515,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - llvm::Value *PartidParam = PartIdLVal.getPointer(); + llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -4572,7 +4528,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivatesLVal.getPointer(), CGF.VoidPtrTy); + PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -4581,7 +4537,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMap, CGF.Builder .CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) + TDBase.getAddress(CGF), CGF.VoidPtrTy) .getPointer()}; SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); @@ -4659,7 +4615,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, if (QualType::DestructionKind DtorKind = Field->getType().isDestructedType()) { LValue FieldLValue = CGF.EmitLValueForField(Base, Field); - CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); + CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); } } CGF.FinishFunction(); @@ -4757,8 +4713,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, LValue RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( - RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); - CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); + RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); + CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); ++Counter; } CGF.FinishFunction(); @@ -4823,7 +4779,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } else { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + Address(SharedRefLValue.getPointer(CGF), + C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); } @@ -4836,7 +4793,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array using element-by-element // initialization. CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, + PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), + Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. @@ -4854,8 +4812,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { - return SharedRefLValue.getAddress(); + InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { + return SharedRefLValue.getAddress(CGF); }); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); @@ -5242,7 +5200,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), - ArrayType::Normal, /*IndexTypeQuals=*/0); + nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); @@ -5255,10 +5213,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = - CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); + llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( + UpAddrLVal.getPointer(CGF), /*Idx0=*/1); llvm::Value *LowIntPtr = - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else { @@ -5271,7 +5229,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(<Dependences[i].second>); LValue LenLVal = CGF.EmitLValueForField( @@ -5388,7 +5346,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); + emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); } else { RegionCodeGenTy ThenRCG(ThenCodeGen); ThenRCG(CGF); @@ -5425,21 +5383,24 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); const auto *LBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), + LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); const auto *UBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), + UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); const auto *StVar = cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); - CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), + StLVal.getQuals(), /*IsInitializer=*/true); // Store reductions address. LValue RedLVal = CGF.EmitLValueForField( @@ -5448,7 +5409,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); } else { - CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.EmitNullInitialization(RedLVal.getAddress(CGF), CGF.getContext().VoidPtrTy); } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; @@ -5457,11 +5418,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ThreadID, Result.NewTask, IfVal, - LBLVal.getPointer(), - UBLVal.getPointer(), + LBLVal.getPointer(CGF), + UBLVal.getPointer(CGF), CGF.EmitLoadOfScalar(StLVal, Loc), llvm::ConstantInt::getSigned( - CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -5763,7 +5724,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); @@ -5773,7 +5734,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. @@ -6201,7 +6162,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, // Emit the finalizer body: // <destroy>(<type>* %0) RCG.emitCleanups(CGF, N, PrivateAddr); - CGF.FinishFunction(); + CGF.FinishFunction(Loc); return Fn; } @@ -6235,7 +6196,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( unsigned Size = Data.ReductionVars.size(); llvm::APInt ArraySize(/*numBits=*/64, Size); QualType ArrayRDType = C.getConstantArrayType( - RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, @@ -6253,7 +6214,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); RCG.emitSharedLValue(CGF, Cnt); llvm::Value *CastedShared = - CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); CGF.EmitStoreOfScalar(CastedShared, SharedLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; @@ -6296,7 +6257,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else - CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), + FlagsLVal.getType()); } // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void // *data); @@ -6332,7 +6294,7 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), SharedAddr, /*IsVolatile=*/false); } } @@ -6343,12 +6305,12 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, LValue SharedLVal) { // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); - llvm::Value *Args[] = { - CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, - /*isSigned=*/true), - ReductionsPtr, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), - CGM.VoidPtrTy)}; + llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; return Address( CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), @@ -6471,8 +6433,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, - [](CodeGenFunction &, PrePostActionTy &) {}); + emitIfClause(CGF, IfCond, ThenGen, + [](CodeGenFunction &, PrePostActionTy &) {}); } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); @@ -6685,6 +6647,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -6720,12 +6683,17 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -6990,6 +6958,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -7025,12 +6994,17 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -7079,12 +7053,24 @@ public: OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, + /// Close is a hint to the runtime to allocate memory close to + /// the target device. + OMP_MAP_CLOSE = 0x400, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), }; + /// Get the offset of the OMP_MAP_MEMBER_OF field. + static unsigned getFlagMemberOffset() { + unsigned Offset = 0; + for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); + Remain = Remain >> 1) + Offset++; + return Offset; + } + /// Class that associates information with a base pointer to be passed to the /// runtime library. class BasePointerInfo { @@ -7148,8 +7134,11 @@ private: : IE(IE), VD(VD) {} }; - /// Directive from where the map clauses were extracted. - const OMPExecutableDirective &CurDir; + /// The target directive from where the mappable clauses were extracted. It + /// is either a executable directive or a user-defined mapper directive. + llvm::PointerUnion<const OMPExecutableDirective *, + const OMPDeclareMapperDecl *> + CurDir; /// Function the directive is being generated for. CodeGenFunction &CGF; @@ -7181,9 +7170,11 @@ private: OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); - // If there is no length associated with the expression, that means we - // are using the whole length of the base. - if (!OAE->getLength() && OAE->getColonLoc().isValid()) + // If there is no length associated with the expression and lower bound is + // not specified too, that means we are using the whole length of the + // base. + if (!OAE->getLength() && OAE->getColonLoc().isValid() && + !OAE->getLowerBound()) return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; @@ -7197,13 +7188,30 @@ private: // If we don't have a length at this point, that is because we have an // array section with a single element. - if (!OAE->getLength()) + if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) return ElemSize; - llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); - LengthVal = - CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); - return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + if (const Expr *LenExpr = OAE->getLength()) { + llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); + LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), + CGF.getContext().getSizeType(), + LenExpr->getExprLoc()); + return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + } + assert(!OAE->getLength() && OAE->getColonLoc().isValid() && + OAE->getLowerBound() && "expected array_section[lb:]."); + // Size = sizetype - lb * elemtype; + llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); + llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); + LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), + CGF.getContext().getSizeType(), + OAE->getLowerBound()->getExprLoc()); + LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); + llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); + llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); + LengthVal = CGF.Builder.CreateSelect( + Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); + return LengthVal; } return CGF.getTypeSize(ExprTy); } @@ -7247,6 +7255,9 @@ private: if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) != MapModifiers.end()) Bits |= OMP_MAP_ALWAYS; + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) + != MapModifiers.end()) + Bits |= OMP_MAP_CLOSE; return Bits; } @@ -7486,11 +7497,11 @@ private: } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || (OASE && isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = @@ -7584,8 +7595,8 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - Address LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object @@ -7632,7 +7643,7 @@ private: if (MC.getAssociatedDeclaration()) { ComponentLB = CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(); + .getAddress(CGF); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); @@ -7675,10 +7686,10 @@ private: if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, - // then we reset the TO/FROM/ALWAYS/DELETE flags. + // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. if (IsPointer) Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | - OMP_MAP_DELETE); + OMP_MAP_DELETE | OMP_MAP_CLOSE); if (ShouldBeMemberOf) { // Set placeholder value MEMBER_OF=FFFF to indicate that the flag @@ -7752,9 +7763,9 @@ private: } static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. + // Rotate by getFlagMemberOffset() bits. return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) - << 48); + << getFlagMemberOffset()); } static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, @@ -7834,7 +7845,7 @@ private: public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(Dir), CGF(CGF) { + : CurDir(&Dir), CGF(CGF) { // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) for (const auto *D : C->varlists()) @@ -7846,6 +7857,10 @@ public: DevPointersMap[L.first].push_back(L.second); } + /// Constructor for the declare mapper directive. + MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) {} + /// Generate code for the combined entry if we have a partially mapped struct /// and take care of the mapping flags of the arguments corresponding to /// individual struct members. @@ -7907,19 +7922,21 @@ public: IsImplicit); }; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (const auto &L : C->component_lists()) { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (const auto &L : C->component_lists()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (const auto &L : C->component_lists()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } @@ -7933,10 +7950,9 @@ public: llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> DeferredInfo; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (const auto *C : - this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { - for (const auto &L : C->component_lists()) { + CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { + for (const auto L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); @@ -7964,7 +7980,6 @@ public: // We didn't find any match in our map information - generate a zero // size array section - if the pointer is a struct member we defer this // action until the whole struct has been processed. - // FIXME: MSVC 2013 seems to require this-> to find member CGF. if (isa<MemberExpr>(IE)) { // Insert the pointer into Info to be processed by // generateInfoForComponentList. Because it is a member pointer @@ -7977,11 +7992,11 @@ public: /*ReturnDevicePointer=*/false, C->isImplicit()); DeferredInfo[nullptr].emplace_back(IE, VD); } else { - llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( - this->CGF.EmitLValue(IE), IE->getExprLoc()); + llvm::Value *Ptr = + CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); BasePointers.emplace_back(Ptr, VD); Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); + Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } } @@ -8005,11 +8020,10 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList( - L.MapType, L.MapModifiers, L.Components, CurBasePointers, - CurPointers, CurSizes, CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8033,7 +8047,7 @@ public: auto CI = DeferredInfo.find(M.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); + llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); CurBasePointers.emplace_back(BasePtr, L.VD); @@ -8061,6 +8075,78 @@ public: } } + /// Generate all the base pointers, section pointers, sizes and map types for + /// the extracted map clauses of user-defined mapper. + void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPDeclareMapperDecl *>() && + "Expect a declare mapper directive"); + const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); + // We have to process the component lists that relate with the same + // declaration in a single chunk so that we can generate the map flags + // correctly. Therefore, we organize all lists in a map. + llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + + // Helper function to fill the information map for the different supported + // clauses. + auto &&InfoGen = [&Info]( + const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool ReturnDevicePointer, bool IsImplicit) { + const ValueDecl *VD = + D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit); + }; + + for (const auto *C : CurMapperDir->clauselists()) { + const auto *MC = cast<OMPMapClause>(C); + for (const auto L : MC->component_lists()) { + InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), + /*ReturnDevicePointer=*/false, MC->isImplicit()); + } + } + + for (const auto &M : Info) { + // We need to know when we generate information for the first component + // associated with a capture, because the mapping flags depend on it. + bool IsFirstComponentList = true; + + // Temporary versions of arrays + MapBaseValuesArrayTy CurBasePointers; + MapValuesArrayTy CurPointers; + MapValuesArrayTy CurSizes; + MapFlagsArrayTy CurTypes; + StructRangeInfoTy PartialStruct; + + for (const MapInfo &L : M.second) { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); + IsFirstComponentList = false; + } + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, + PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + Types.append(CurTypes.begin(), CurTypes.end()); + } + } + /// Emit capture info for lambdas for variables captured by reference. void generateInfoForLambdaCaptures( const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, @@ -8083,9 +8169,10 @@ public: LValue ThisLVal = CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); - LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(ThisLVal.getPointer()); - Pointers.push_back(ThisLValVal.getPointer()); + LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(ThisLVal.getPointer(CGF)); + Pointers.push_back(ThisLValVal.getPointer(CGF)); Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); @@ -8103,17 +8190,19 @@ public: LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); if (LC.getCaptureKind() == LCK_ByRef) { LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); - Pointers.push_back(VarLValVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); + Pointers.push_back(VarLValVal.getPointer(CGF)); Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( VD->getType().getCanonicalType().getNonReferenceType()), CGF.Int64Ty, /*isSigned=*/true)); } else { RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); Pointers.push_back(VarRVal.getScalarVal()); Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } @@ -8184,9 +8273,11 @@ public: std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; SmallVector<MapData, 4> DeclComponentLists; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { - for (const auto &L : C->decl_component_lists(VD)) { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + for (const auto L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && @@ -8333,10 +8424,13 @@ public: MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { - for (const auto &L : C->component_lists()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + for (const auto L : C->component_lists()) { if (!L.first) continue; const auto *VD = dyn_cast<VarDecl>(L.first); @@ -8414,7 +8508,7 @@ public: CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); // Copy the value of the original variable to the new global copy. CGF.Builder.CreateMemCpy( - CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), + CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), CurSizes.back(), /*IsVolatile=*/false); // Use new global variable as the base pointers. @@ -8472,9 +8566,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, } llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); - QualType PointerArrayType = - Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType PointerArrayType = Ctx.getConstantArrayType( + Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); @@ -8487,9 +8581,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, QualType Int64Ty = Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = - Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType SizeArrayType = Ctx.getConstantArrayType( + Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { @@ -8562,6 +8656,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, } } } + /// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( @@ -8642,6 +8737,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -8677,12 +8773,17 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -8692,10 +8793,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { return nullptr; } +/// Emit the user-defined mapper function. The code generation follows the +/// pattern in the example below. +/// \code +/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, +/// void *base, void *begin, +/// int64_t size, int64_t type) { +/// // Allocate space for an array section first. +/// if (size > 1 && !maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// // Map members. +/// for (unsigned i = 0; i < size; i++) { +/// // For each component specified by this mapper: +/// for (auto c : all_components) { +/// if (c.hasMapper()) +/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, +/// c.arg_type); +/// else +/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, +/// c.arg_begin, c.arg_size, c.arg_type); +/// } +/// } +/// // Delete the array section. +/// if (size > 1 && maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// } +/// \endcode +void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (UDMMap.count(D) > 0) + return; + ASTContext &C = CGM.getContext(); + QualType Ty = D->getType(); + QualType PtrTy = C.getPointerType(Ty).withRestrict(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + auto *MapperVarDecl = + cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); + SourceLocation Loc = D->getLocation(); + CharUnits ElementSize = C.getTypeSizeInChars(Ty); + + // Prepare mapper function arguments and attributes. + ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&HandleArg); + Args.push_back(&BaseArg); + Args.push_back(&BeginArg); + Args.push_back(&SizeArg); + Args.push_back(&TypeArg); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + SmallString<64> TyStr; + llvm::raw_svector_ostream Out(TyStr); + CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); + std::string Name = getName({"omp_mapper", TyStr, D->getName()}); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); + // Start the mapper function code generation. + CodeGenFunction MapperCGF(CGM); + MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); + // Compute the starting and end addreses of array elements. + llvm::Value *Size = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( + MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), + CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); + llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); + llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + // Prepare common arguments for array initiation and deletion. + llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&HandleArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BaseArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BeginArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + + // Emit array initiation if this is an array section and \p MapType indicates + // that memory allocation is required. + llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, HeadBB, /*IsInit=*/true); + + // Emit a for loop to iterate through SizeArg of elements and map all of them. + + // Emit the loop header block. + MapperCGF.EmitBlock(HeadBB); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); + llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); + // Evaluate whether the initial condition is satisfied. + llvm::Value *IsEmpty = + MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); + MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); + + // Emit the loop body block. + MapperCGF.EmitBlock(BodyBB); + llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( + PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); + PtrPHI->addIncoming(PtrBegin, EntryBB); + Address PtrCurrent = + Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) + .getAlignment() + .alignmentOfArrayElement(ElementSize)); + // Privatize the declared variable of mapper to be the current array element. + CodeGenFunction::OMPPrivateScope Scope(MapperCGF); + Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { + return MapperCGF + .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) + .getAddress(MapperCGF); + }); + (void)Scope.Privatize(); + + // Get map clause information. Fill up the arrays with all mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler MEHandler(*D, MapperCGF); + MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); + + // Call the runtime API __tgt_mapper_num_components to get the number of + // pre-existing components. + llvm::Value *OffloadingArgs[] = {Handle}; + llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); + llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( + PreviousSize, + MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); + + // Fill up the runtime mapper handle for all components. + for (unsigned I = 0; I < BasePointers.size(); ++I) { + llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( + *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( + Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurSizeArg = Sizes[I]; + + // Extract the MEMBER_OF field from the map type. + llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); + MapperCGF.EmitBlock(MemberBB); + llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); + llvm::Value *Member = MapperCGF.Builder.CreateAnd( + OriMapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); + llvm::BasicBlock *MemberCombineBB = + MapperCGF.createBasicBlock("omp.member.combine"); + llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); + llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); + MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); + // Add the number of pre-existing components to the MEMBER_OF field if it + // is valid. + MapperCGF.EmitBlock(MemberCombineBB); + llvm::Value *CombinedMember = + MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); + // Do nothing if it is not a member of previous components. + MapperCGF.EmitBlock(TypeBB); + llvm::PHINode *MemberMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); + MemberMapType->addIncoming(OriMapType, MemberBB); + MemberMapType->addIncoming(CombinedMember, MemberCombineBB); + + // Combine the map type inherited from user-defined mapper with that + // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM + // bits of the \a MapType, which is the input argument of the mapper + // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM + // bits of MemberMapType. + // [OpenMP 5.0], 1.2.6. map-type decay. + // | alloc | to | from | tofrom | release | delete + // ---------------------------------------------------------- + // alloc | alloc | alloc | alloc | alloc | release | delete + // to | alloc | to | alloc | to | release | delete + // from | alloc | alloc | from | from | release | delete + // tofrom | alloc | to | from | tofrom | release | delete + llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM)); + llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); + llvm::BasicBlock *AllocElseBB = + MapperCGF.createBasicBlock("omp.type.alloc.else"); + llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); + llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); + llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); + llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); + llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); + MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); + // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. + MapperCGF.EmitBlock(AllocBB); + llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(AllocElseBB); + llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); + // In case of to, clear OMP_MAP_FROM. + MapperCGF.EmitBlock(ToBB); + llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(ToElseBB); + llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); + // In case of from, clear OMP_MAP_TO. + MapperCGF.EmitBlock(FromBB); + llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); + // In case of tofrom, do nothing. + MapperCGF.EmitBlock(EndBB); + llvm::PHINode *CurMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); + CurMapType->addIncoming(AllocMapType, AllocBB); + CurMapType->addIncoming(ToMapType, ToBB); + CurMapType->addIncoming(FromMapType, FromBB); + CurMapType->addIncoming(MemberMapType, ToElseBB); + + // TODO: call the corresponding mapper function if a user-defined mapper is + // associated with this map clause. + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, + CurSizeArg, CurMapType}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), + OffloadingArgs); + } + + // Update the pointer to point to the next element that needs to be mapped, + // and check whether we have mapped all elements. + llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( + PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); + PtrPHI->addIncoming(PtrNext, BodyBB); + llvm::Value *IsDone = + MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); + llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); + MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); + + MapperCGF.EmitBlock(ExitBB); + // Emit array deletion if this is an array section and \p MapType indicates + // that deletion is required. + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, DoneBB, /*IsInit=*/false); + + // Emit the function exit block. + MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); + MapperCGF.FinishFunction(); + UDMMap.try_emplace(D, Fn); + if (CGF) { + auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); + Decls.second.push_back(D); + } +} + +/// Emit the array initialization or deletion portion for user-defined mapper +/// code generation. First, it evaluates whether an array section is mapped and +/// whether the \a MapType instructs to delete this section. If \a IsInit is +/// true, and \a MapType indicates to not delete this array, array +/// initialization code is generated. If \a IsInit is false, and \a MapType +/// indicates to not this array, array deletion code is generated. +void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( + CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, + llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, + CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { + StringRef Prefix = IsInit ? ".init" : ".del"; + + // Evaluate if this is an array section. + llvm::BasicBlock *IsDeleteBB = + MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); + llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( + Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); + MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); + + // Evaluate if we are going to delete this section. + MapperCGF.EmitBlock(IsDeleteBB); + llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); + llvm::Value *DeleteCond; + if (IsInit) { + DeleteCond = MapperCGF.Builder.CreateIsNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } else { + DeleteCond = MapperCGF.Builder.CreateIsNotNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } + MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); + + MapperCGF.EmitBlock(BodyBB); + // Get the array size by multiplying element size and element number (i.e., \p + // Size). + llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( + Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); + // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves + // memory allocation/deletion purpose only. + llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); +} + void CGOpenMPRuntime::emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. @@ -8704,30 +9138,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!TD) return; const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *NumIterations = SizeEmitter(CGF, *LD); - - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device) - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); - - llvm::Value *Args[] = {DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + } }; emitInlinedDirective(CGF, OMPD_unknown, CodeGen); } -void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { +void CGOpenMPRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { if (!CGF.HaveInsertPoint()) return; @@ -8746,8 +9174,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, - &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { + &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -8779,6 +9207,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + // Emit tripcount for the target loop-based directive. + emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). @@ -8985,7 +9416,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // specify target triples. if (OutlinedFnID) { if (IfCond) { - emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); + emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); @@ -9068,6 +9499,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -9103,12 +9535,17 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -9137,18 +9574,32 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) + if (!CGM.getLangOpts().OpenMPIsDevice) { + if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + return true; + } return false; + } const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); - StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. - if (const auto *FD = dyn_cast<FunctionDecl>(VD)) + if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { + StringRef Name = CGM.getMangledName(GD); scanForTargetRegionsFunctions(FD->getBody(), Name); + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) + return true; + } // Do not to emit function if it is not marked as declare target. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && - AlreadyEmittedTargetFunctions.count(Name) == 0; + AlreadyEmittedTargetDecls.count(VD) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -9221,6 +9672,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) { @@ -9376,20 +9830,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; - StringRef Name = CGM.getMangledName(GD); const auto *D = cast<FunctionDecl>(GD.getDecl()); // Do not to emit function if it is marked as declare target as it was already // emitted. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { - if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) + if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>( + CGM.GetGlobalValue(CGM.getMangledName(GD)))) return !F->isDeclaration(); return false; } return true; } - return !AlreadyEmittedTargetFunctions.insert(Name).second; + return !AlreadyEmittedTargetDecls.insert(D).second; } llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { @@ -9433,17 +9887,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { return RequiresRegFn; } -llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { - // If we have offloading in the current module, we need to emit the entries - // now and register the offloading descriptor. - createOffloadEntriesAndInfoMetadata(); - - // Create and register the offloading binary descriptors. This is the main - // entity that captures all the information about offloading in the current - // compilation unit. - return createOffloadingBinaryDescriptorRegistration(); -} - void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, @@ -9602,7 +10045,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; if (IfCond) { - emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); + emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); } else { RegionCodeGenTy RCG(BeginThenGen); RCG(CGF); @@ -9616,7 +10059,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } if (IfCond) { - emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); + emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); } else { RegionCodeGenTy RCG(EndThenGen); RCG(CGF); @@ -9679,6 +10122,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -9711,12 +10155,17 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -9768,8 +10217,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, TargetThenGen, - [](CodeGenFunction &CGF, PrePostActionTy &) {}); + emitIfClause(CGF, IfCond, TargetThenGen, + [](CodeGenFunction &CGF, PrePostActionTy &) {}); } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); @@ -10307,8 +10756,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ExprLoc = VLENExpr->getExprLoc(); } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) { + if (CGM.getTriple().isX86()) { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { unsigned VLEN = VLENVal.getExtValue(); @@ -10377,7 +10825,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); QualType ArrayTy = - C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); + C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); @@ -10428,7 +10876,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( - Int64Ty, Size, ArrayType::Normal, 0); + Int64Ty, Size, nullptr, ArrayType::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { const Expr *CounterVal = C->getLoopData(I); @@ -10566,6 +11014,595 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address(Addr, Align); } +namespace { +using OMPContextSelectorData = + OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; +using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; +} // anonymous namespace + +/// Checks current context and returns true if it matches the context selector. +template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, + typename... Arguments> +static bool checkContext(const OMPContextSelectorData &Data, + Arguments... Params) { + assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && + "Unknown context selector or context selector set."); + return false; +} + +/// Checks for implementation={vendor(<vendor>)} context selector. +/// \returns true iff <vendor>="llvm", false otherwise. +template <> +bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( + const OMPContextSelectorData &Data) { + return llvm::all_of(Data.Names, + [](StringRef S) { return !S.compare_lower("llvm"); }); +} + +/// Checks for device={kind(<kind>)} context selector. +/// \returns true if <kind>="host" and compilation is for host. +/// true if <kind>="nohost" and compilation is for device. +/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. +/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. +/// false otherwise. +template <> +bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( + const OMPContextSelectorData &Data, CodeGenModule &CGM) { + for (StringRef Name : Data.Names) { + if (!Name.compare_lower("host")) { + if (CGM.getLangOpts().OpenMPIsDevice) + return false; + continue; + } + if (!Name.compare_lower("nohost")) { + if (!CGM.getLangOpts().OpenMPIsDevice) + return false; + continue; + } + switch (CGM.getTriple().getArch()) { + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + case llvm::Triple::aarch64_32: + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::x86: + case llvm::Triple::x86_64: + if (Name.compare_lower("cpu")) + return false; + break; + case llvm::Triple::amdgcn: + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + if (Name.compare_lower("gpu")) + return false; + break; + case llvm::Triple::UnknownArch: + case llvm::Triple::arc: + case llvm::Triple::avr: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: + case llvm::Triple::hexagon: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::msp430: + case llvm::Triple::r600: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + case llvm::Triple::sparc: + case llvm::Triple::sparcv9: + case llvm::Triple::sparcel: + case llvm::Triple::systemz: + case llvm::Triple::tce: + case llvm::Triple::tcele: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::xcore: + case llvm::Triple::le32: + case llvm::Triple::le64: + case llvm::Triple::amdil: + case llvm::Triple::amdil64: + case llvm::Triple::hsail: + case llvm::Triple::hsail64: + case llvm::Triple::spir: + case llvm::Triple::spir64: + case llvm::Triple::kalimba: + case llvm::Triple::shave: + case llvm::Triple::lanai: + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + case llvm::Triple::renderscript32: + case llvm::Triple::renderscript64: + case llvm::Triple::ve: + return false; + } + } + return true; +} + +static bool matchesContext(CodeGenModule &CGM, + const CompleteOMPContextSelectorData &ContextData) { + for (const OMPContextSelectorData &Data : ContextData) { + switch (Data.Ctx) { + case OMP_CTX_vendor: + assert(Data.CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) + return false; + break; + case OMP_CTX_kind: + assert(Data.CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, + CGM)) + return false; + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return true; +} + +static CompleteOMPContextSelectorData +translateAttrToContextSelectorData(ASTContext &C, + const OMPDeclareVariantAttr *A) { + CompleteOMPContextSelectorData Data; + for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { + Data.emplace_back(); + auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( + *std::next(A->ctxSelectorSets_begin(), I)); + auto Ctx = static_cast<OpenMPContextSelectorKind>( + *std::next(A->ctxSelectors_begin(), I)); + Data.back().CtxSet = CtxSet; + Data.back().Ctx = Ctx; + const Expr *Score = *std::next(A->scores_begin(), I); + Data.back().Score = Score->EvaluateKnownConstInt(C); + switch (Ctx) { + case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); + break; + case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return Data; +} + +static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; + for (const OMPContextSelectorData &D : RHS) { + auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); + Pair.getSecond().insert(D.Names.begin(), D.Names.end()); + } + bool AllSetsAreEqual = true; + for (const OMPContextSelectorData &D : LHS) { + auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); + if (It == RHSData.end()) + return false; + if (D.Names.size() > It->getSecond().size()) + return false; + if (llvm::set_union(It->getSecond(), D.Names)) + return false; + AllSetsAreEqual = + AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); + } + + return LHS.size() != RHS.size() || !AllSetsAreEqual; +} + +static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + // Score is calculated as sum of all scores + 1. + llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); + if (RHSIsSubsetOfLHS) { + LHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : LHS) { + if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { + LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { + LHSScore += Data.Score.extend(LHSScore.getBitWidth()); + } else { + LHSScore += Data.Score; + } + } + } + llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { + RHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : RHS) { + if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { + RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { + RHSScore += Data.Score.extend(RHSScore.getBitWidth()); + } else { + RHSScore += Data.Score; + } + } + } + return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; +} + +/// Finds the variant function that matches current context with its context +/// selector. +static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, + const FunctionDecl *FD) { + if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) + return FD; + // Iterate through all DeclareVariant attributes and check context selectors. + const OMPDeclareVariantAttr *TopMostAttr = nullptr; + CompleteOMPContextSelectorData TopMostData; + for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { + CompleteOMPContextSelectorData Data = + translateAttrToContextSelectorData(CGM.getContext(), A); + if (!matchesContext(CGM, Data)) + continue; + // If the attribute matches the context, find the attribute with the highest + // score. + if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { + TopMostAttr = A; + TopMostData.swap(Data); + } + } + if (!TopMostAttr) + return FD; + return cast<FunctionDecl>( + cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) + ->getDecl()); +} + +bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { + const auto *D = cast<FunctionDecl>(GD.getDecl()); + // If the original function is defined already, use its definition. + StringRef MangledName = CGM.getMangledName(GD); + llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); + if (Orig && !Orig->isDeclaration()) + return false; + const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); + // Emit original function if it does not have declare variant attribute or the + // context does not match. + if (NewFD == D) + return false; + GlobalDecl NewGD = GD.getWithDecl(NewFD); + if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { + DeferredVariantFunction.erase(D); + return true; + } + DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); + return true; +} + +CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( + CodeGenModule &CGM, const OMPLoopDirective &S) + : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + NontemporalDeclsSet &DS = + CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { + for (const Stmt *Ref : C->private_refs()) { + const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); + const ValueDecl *VD; + if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { + VD = DRE->getDecl(); + } else { + const auto *ME = cast<MemberExpr>(SimpleRefExpr); + assert((ME->isImplicitCXXThis() || + isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && + "Expected member of current class."); + VD = ME->getMemberDecl(); + } + DS.insert(VD); + } + } +} + +CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); +} + +bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + + return llvm::any_of( + CGM.getOpenMPRuntime().NontemporalDeclsStack, + [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) + : CGM(CGF.CGM), + NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == + OMPC_LASTPRIVATE_conditional; + })) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + LastprivateConditionalData &Data = + CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + if (C->getKind() != OMPC_LASTPRIVATE_conditional) + continue; + + for (const Expr *Ref : C->varlists()) { + Data.DeclToUniqeName.try_emplace( + cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), + generateUniqueName(CGM, "pl_cond", Ref)); + } + } + Data.IVLVal = IVLVal; + // In simd only mode or for simd directives no need to generate threadprivate + // references for the loop iteration counter, we can use the original one + // since outlining cannot happen in simd regions. + if (CGF.getLangOpts().OpenMPSimd || + isOpenMPSimdDirective(S.getDirectiveKind())) { + Data.UseOriginalIV = true; + return; + } + llvm::SmallString<16> Buffer; + llvm::raw_svector_ostream OS(Buffer); + PresumedLoc PLoc = + CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + CGM.getDiags().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" + << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; + Data.IVName = OS.str(); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); +} + +void CGOpenMPRuntime::initLastprivateConditionalCounter( + CodeGenFunction &CGF, const OMPExecutableDirective &S) { + if (CGM.getLangOpts().OpenMPSimd || + !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) + return; + const CGOpenMPRuntime::LastprivateConditionalData &Data = + LastprivateConditionalStack.back(); + if (Data.UseOriginalIV) + return; + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv = iv; + Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, Data.IVLVal.getType(), Data.IVName); + LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); + llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); + CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); +} + +namespace { +/// Checks if the lastprivate conditional variable is referenced in LHS. +class LastprivateConditionalRefChecker final + : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { + CodeGenFunction &CGF; + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; + const Expr *FoundE = nullptr; + const Decl *FoundD = nullptr; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + SourceLocation Loc; + bool UseOriginalIV = false; + +public: + bool VisitDeclRefExpr(const DeclRefExpr *E) { + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitMemberExpr(const MemberExpr *E) { + if (!CGF.IsWrappedCXXThis(E->getBase())) + return false; + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getMemberDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getMemberDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (!Child) + continue; + if (const auto *E = dyn_cast<Expr>(Child)) + if (!E->isGLValue()) + continue; + if (Visit(Child)) + return true; + } + return false; + } + explicit LastprivateConditionalRefChecker( + CodeGenFunction &CGF, + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) + : CGF(CGF), LPM(LPM) {} + std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> + getFoundData() const { + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, + UseOriginalIV); + } +}; +} // namespace + +void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS) { + if (CGF.getLangOpts().OpenMP < 50) + return; + LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); + if (!Checker.Visit(LHS)) + return; + const Expr *FoundE; + const Decl *FoundD; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + bool UseOriginalIV; + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = + Checker.getFoundData(); + + // Last updated loop counter for the lastprivate conditional var. + // int<xx> last_iv = 0; + llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); + llvm::Constant *LastIV = + getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); + cast<llvm::GlobalVariable>(LastIV)->setAlignment( + IVLVal.getAlignment().getAsAlign()); + LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); + + // Private address of the lastprivate conditional in the current context. + // priv_a + LValue LVal = CGF.EmitLValue(FoundE); + // Last value of the lastprivate conditional. + // decltype(priv_a) last_a; + llvm::Constant *Last = getOrCreateInternalVariable( + LVal.getAddress(CGF).getElementType(), UniqueDeclName); + cast<llvm::GlobalVariable>(Last)->setAlignment( + LVal.getAlignment().getAsAlign()); + LValue LastLVal = + CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv + if (!UseOriginalIV) { + Address IVAddr = + getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); + IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); + } + llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); + + // #pragma omp critical(a) + // if (last_iv <= iv) { + // last_iv = iv; + // last_a = priv_a; + // } + auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, + FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *LastIVVal = + CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); + // (last_iv <= global_iv) ? Check if the variable is updated and store new + // value in global var. + llvm::Value *CmpRes; + if (IVLVal.getType()->isSignedIntegerType()) { + CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); + } else { + assert(IVLVal.getType()->isUnsignedIntegerType() && + "Loop iteration variable must be integer."); + CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); + } + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); + CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); + // { + CGF.EmitBlock(ThenBB); + + // last_iv = global_iv; + CGF.EmitStoreOfScalar(IVVal, LastIVLVal); + + // last_a = priv_a; + switch (CGF.getEvaluationKind(LVal.getType())) { + case TEK_Scalar: { + llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfScalar(PrivVal, LastLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy PrivVal = + CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + llvm_unreachable( + "Aggregates are not supported in lastprivate conditional."); + } + // } + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + if (CGM.getLangOpts().OpenMPSimd) { + // Do not emit as a critical region as no parallel region could be emitted. + RegionCodeGenTy ThenRCG(CodeGen); + ThenRCG(CGF); + } else { + emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); + } +} + +void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( + CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, + SourceLocation Loc) { + if (CGF.getLangOpts().OpenMP < 50) + return; + auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); + assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && + "Unknown lastprivate conditional variable."); + StringRef UniqueName = It->getSecond(); + llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); + // The variable was not updated in the region - exit. + if (!GV) + return; + LValue LPLVal = CGF.MakeAddrLValue( + GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); + llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); + CGF.EmitStoreOfScalar(Res, PrivLVal); +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -10688,7 +11725,7 @@ void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, } void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -10786,12 +11823,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, - const Expr *Device) { +void CGOpenMPSIMDRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -10807,10 +11845,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; } -llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { - return nullptr; -} - void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h index 3f842ce96407..8159f5e8b790 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -15,12 +15,15 @@ #include "CGValue.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/GlobalDecl.h" #include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Function.h" #include "llvm/IR/ValueHandle.h" @@ -36,7 +39,6 @@ class Value; namespace clang { class Expr; -class GlobalDecl; class OMPDependClause; class OMPExecutableDirective; class OMPLoopDirective; @@ -211,6 +213,43 @@ public: ~DisableAutoDeclareTargetRAII(); }; + /// Manages list of nontemporal decls for the specified directive. + class NontemporalDeclsRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S); + ~NontemporalDeclsRAII(); + }; + + /// Maps the expression for the lastprivate variable to the global copy used + /// to store new value because original variables are not mapped in inner + /// parallel regions. Only private copies are captured but we need also to + /// store private copy in shared address. + /// Also, stores the expression for the private loop counter and it + /// threaprivate name. + struct LastprivateConditionalData { + llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>> + DeclToUniqeName; + LValue IVLVal; + SmallString<16> IVName; + /// True if original lvalue for loop counter can be used in codegen (simd + /// region or simd only mode) and no need to create threadprivate + /// references. + bool UseOriginalIV = false; + }; + /// Manages list of lastprivate conditional decls for the specified directive. + class LastprivateConditionalRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + LastprivateConditionalRAII(CodeGenFunction &CGF, + const OMPExecutableDirective &S, LValue IVLVal); + ~LastprivateConditionalRAII(); + }; + protected: CodeGenModule &CGM; StringRef FirstSeparator, Separator; @@ -241,17 +280,6 @@ protected: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// Emits code for OpenMP 'if' clause using specified \a CodeGen - /// function. Here is the logic: - /// if (Cond) { - /// ThenGen(); - /// } else { - /// ElseGen(); - /// } - void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const RegionCodeGenTy &ThenGen, - const RegionCodeGenTy &ElseGen); - /// Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. /// @@ -291,6 +319,17 @@ protected: /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition); + /// Returns default flags for the barriers depending on the directive, for /// which this barier is going to be emitted. static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); @@ -345,6 +384,14 @@ private: SmallVector<const OMPDeclareReductionDecl *, 4>> FunctionUDRMapTy; FunctionUDRMapTy FunctionUDRMap; + /// Map from the user-defined mapper declaration to its corresponding + /// functions. + llvm::DenseMap<const OMPDeclareMapperDecl *, llvm::Function *> UDMMap; + /// Map of functions and their local user-defined mappers. + using FunctionUDMMapTy = + llvm::DenseMap<llvm::Function *, + SmallVector<const OMPDeclareMapperDecl *, 4>>; + FunctionUDMMapTy FunctionUDMMap; /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -392,29 +439,10 @@ private: /// // (function or global) /// char *name; // Name of the function or global. /// size_t size; // Size of the entry info (0 if it a function). + /// int32_t flags; + /// int32_t reserved; /// }; QualType TgtOffloadEntryQTy; - /// struct __tgt_device_image{ - /// void *ImageStart; // Pointer to the target code start. - /// void *ImageEnd; // Pointer to the target code end. - /// // We also add the host entries to the device image, as it may be useful - /// // for the target runtime to have access to that information. - /// __tgt_offload_entry *EntriesBegin; // Begin of the table with all - /// // the entries. - /// __tgt_offload_entry *EntriesEnd; // End of the table with all the - /// // entries (non inclusive). - /// }; - QualType TgtDeviceImageQTy; - /// struct __tgt_bin_desc{ - /// int32_t NumDevices; // Number of devices supported. - /// __tgt_device_image *DeviceImages; // Arrays of device images - /// // (one per device). - /// __tgt_offload_entry *EntriesBegin; // Begin of the table with all the - /// // entries. - /// __tgt_offload_entry *EntriesEnd; // End of the table with all the - /// // entries (non inclusive). - /// }; - QualType TgtBinaryDescriptorQTy; /// Entity that registers the offloading constants that were emitted so /// far. class OffloadEntriesInfoManagerTy { @@ -626,8 +654,8 @@ private: OffloadEntriesInfoManagerTy OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; - /// List of the emitted functions. - llvm::StringSet<> AlreadyEmittedTargetFunctions; + /// List of the emitted declarations. + llvm::DenseSet<CanonicalDeclPtr<const Decl>> AlreadyEmittedTargetDecls; /// List of the global variables with their addresses that should not be /// emitted for the target. llvm::StringMap<llvm::WeakTrackingVH> EmittedNonTargetVariables; @@ -636,6 +664,22 @@ private: /// must be emitted. llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; + /// Mapping of the original functions to their variants and original global + /// decl. + llvm::MapVector<CanonicalDeclPtr<const FunctionDecl>, + std::pair<GlobalDecl, GlobalDecl>> + DeferredVariantFunction; + + using NontemporalDeclsSet = llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>>; + /// Stack for list of declarations in current context marked as nontemporal. + /// The set is the union of all current stack elements. + llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack; + + /// Stack for list of addresses of declarations in current context marked as + /// lastprivate conditional. The set is the union of all current stack + /// elements. + llvm::SmallVector<LastprivateConditionalData, 4> LastprivateConditionalStack; + /// Flag for keeping track of weather a requires unified_shared_memory /// directive is present. bool HasRequiresUnifiedSharedMemory = false; @@ -647,14 +691,6 @@ private: /// Device routines are specific to the bool HasEmittedDeclareTargetRegion = false; - /// Creates and registers offloading binary descriptor for the current - /// compilation unit. The function that does the registration is returned. - llvm::Function *createOffloadingBinaryDescriptorRegistration(); - - /// Creates all the offload entries in the current compilation unit - /// along with the associated metadata. - void createOffloadEntriesAndInfoMetadata(); - /// Loads all the offload entries information from the host IR /// metadata. void loadOffloadInfoMetadata(); @@ -662,12 +698,6 @@ private: /// Returns __tgt_offload_entry type. QualType getTgtOffloadEntryQTy(); - /// Returns __tgt_device_image type. - QualType getTgtDeviceImageQTy(); - - /// Returns __tgt_bin_desc type. - QualType getTgtBinaryDescriptorQTy(); - /// Start scanning from statement \a S and and emit all target regions /// found along the way. /// \param S Starting statement. @@ -738,6 +768,14 @@ private: llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc); + /// Emit the array initialization or deletion portion for user-defined mapper + /// code generation. + void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, + llvm::Value *Handle, llvm::Value *BasePtr, + llvm::Value *Ptr, llvm::Value *Size, + llvm::Value *MapType, CharUnits ElementSize, + llvm::BasicBlock *ExitBB, bool IsInit); + struct TaskResultTy { llvm::Value *NewTask = nullptr; llvm::Function *TaskEntry = nullptr; @@ -777,12 +815,34 @@ private: /// default. virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; } + /// Emit code that pushes the trip count of loops associated with constructs + /// 'target teams distribute' and 'teams distribute parallel for'. + /// \param SizeEmitter Emits the int64 value for the number of iterations of + /// the associated loop. + void emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, ".", ".") {} virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Emits code for OpenMP 'if' clause using specified \a CodeGen + /// function. Here is the logic: + /// if (Cond) { + /// ThenGen(); + /// } else { + /// ElseGen(); + /// } + void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, + const RegionCodeGenTy &ThenGen, + const RegionCodeGenTy &ElseGen); + /// Checks if the \p Body is the \a CompoundStmt and returns its child /// statement iff there is only one that is not evaluatable at the compile /// time. @@ -798,6 +858,10 @@ public: virtual std::pair<llvm::Function *, llvm::Function *> getUserDefinedReduction(const OMPDeclareReductionDecl *D); + /// Emit the function for the user defined mapper construct. + void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF = nullptr); + /// Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -1106,7 +1170,7 @@ public: /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. virtual void emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + llvm::omp::ProcBindKind ProcBind, SourceLocation Loc); /// Returns address of the threadprivate variable for the current @@ -1394,15 +1458,6 @@ public: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// Emit code that pushes the trip count of loops associated with constructs - /// 'target teams distribute' and 'teams distribute parallel for'. - /// \param SizeEmitter Emits the int64 value for the number of iterations of - /// the associated loop. - virtual void emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter); - /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of /// a failure it executes the host version outlined in \a OutlinedFn. @@ -1413,11 +1468,15 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - virtual void emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device); + /// \param SizeEmitter Callback to emit number of iterations for loop-based + /// directives. + virtual void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -1449,10 +1508,9 @@ public: /// requires directives was used in the current module. llvm::Function *emitRequiresDirectiveRegFun(); - /// Creates the offloading descriptor in the event any target region - /// was emitted in the current module and return the function that registers - /// it. - virtual llvm::Function *emitRegistrationFunction(); + /// Creates all the offload entries in the current compilation unit + /// along with the associated metadata. + void createOffloadEntriesAndInfoMetadata(); /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -1626,6 +1684,43 @@ public: /// Return whether the unified_shared_memory has been specified. bool hasRequiresUnifiedSharedMemory() const; + + /// Emits the definition of the declare variant function. + virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition); + + /// Checks if the \p VD variable is marked as nontemporal declaration in + /// current context. + bool isNontemporalDecl(const ValueDecl *VD) const; + + /// Initializes global counter for lastprivate conditional. + virtual void + initLastprivateConditionalCounter(CodeGenFunction &CGF, + const OMPExecutableDirective &S); + + /// Checks if the provided \p LVal is lastprivate conditional and emits the + /// code to update the value of the original variable. + /// \code + /// lastprivate(conditional: a) + /// ... + /// <type> a; + /// lp_a = ...; + /// #pragma omp critical(a) + /// if (last_iv_a <= iv) { + /// last_iv_a = iv; + /// global_a = lp_a; + /// } + /// \endcode + virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS); + + /// Gets the address of the global copy used for lastprivate conditional + /// update, if any. + /// \param PrivLVal LValue for the private copy. + /// \param VD Original lastprivate declaration. + virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, + LValue PrivLVal, + const VarDecl *VD, + SourceLocation Loc); }; /// Class supports emissionof SIMD-only code. @@ -1854,7 +1949,7 @@ public: /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. void emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override; /// Returns address of the threadprivate variable for the current @@ -2097,9 +2192,13 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) override; + void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) override; /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -2117,11 +2216,6 @@ public: /// \param GD Global to scan. bool emitTargetGlobal(GlobalDecl GD) override; - /// Creates the offloading descriptor in the event any target region - /// was emitted in the current module and return the function that registers - /// it. - llvm::Function *emitRegistrationFunction() override; - /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 48dcbbf3cabd..d00d84b79cfe 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -13,14 +13,17 @@ #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" +#include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/IntrinsicsNVPTX.h" using namespace clang; using namespace CodeGen; +using namespace llvm::omp; namespace { enum OpenMPRTLFunctionNVPTX { @@ -107,6 +110,10 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 /// global_tid); OMPRTL__kmpc_barrier_simple_spmd, + /// Call to int32_t __kmpc_warp_active_thread_mask(void); + OMPRTL_NVPTX__kmpc_warp_active_thread_mask, + /// Call to void __kmpc_syncwarp(int32_t Mask); + OMPRTL_NVPTX__kmpc_syncwarp, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -276,7 +283,8 @@ static RecordDecl *buildRecordForGlobalizedVars( } } else { llvm::APInt ArraySize(32, BufSize); - Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); + Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal, + 0); Field = FieldDecl::Create( C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, C.getTrivialTypeSourceInfo(Type, SourceLocation()), @@ -287,10 +295,11 @@ static RecordDecl *buildRecordForGlobalizedVars( static_cast<CharUnits::QuantityType>( GlobalMemoryAlignment))); Field->addAttr(AlignedAttr::CreateImplicit( - C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true, + C, /*IsAlignmentExpr=*/true, IntegerLiteral::Create(C, Align, C.getIntTypeForBitwidth(32, /*Signed=*/0), - SourceLocation()))); + SourceLocation()), + {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned)); } GlobalizedRD->addDecl(Field); MappedDeclsFields.try_emplace(VD, Field); @@ -755,6 +764,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -790,12 +800,17 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -825,6 +840,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -860,12 +876,17 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -988,6 +1009,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -1023,12 +1045,17 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -1064,6 +1091,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -1099,12 +1127,17 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -1777,9 +1810,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - cast<llvm::Function>(RTLFn.getCallee()) - ->addFnAttr(llvm::Attribute::Convergent); + RTLFn = + CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; } case OMPRTL__kmpc_barrier_simple_spmd: { @@ -1788,10 +1820,22 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - cast<llvm::Function>(RTLFn.getCallee()) - ->addFnAttr(llvm::Attribute::Convergent); + RTLFn = CGM.CreateConvergentRuntimeFunction( + FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); + break; + } + case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { + // Build int32_t __kmpc_warp_active_thread_mask(void); + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); + RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); + break; + } + case OMPRTL_NVPTX__kmpc_syncwarp: { + // Build void __kmpc_syncwarp(kmp_int32 Mask); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); break; } } @@ -1871,6 +1915,19 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { llvm_unreachable("Unknown flags are requested."); } +bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit the function in OldGD with the body from NewGD, if NewGD is defined. + auto *NewFD = cast<FunctionDecl>(NewGD.getDecl()); + if (NewFD->isDefined()) { + CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr); + return true; + } + return false; +} + CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) @@ -1878,7 +1935,7 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) } void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { // Do nothing in case of SPMD mode and L0 parallel. if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) @@ -2030,7 +2087,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().GlobalRecord = GlobalizedRD; I->getSecond().MappedParams = - llvm::make_unique<CodeGenFunction::OMPMapVars>(); + std::make_unique<CodeGenFunction::OMPMapVars>(); DeclToAddrMapTy &Data = I->getSecond().LocalVarData; for (const auto &Pair : MappedDeclsFields) { assert(Pair.getFirst()->isCanonicalDecl() && @@ -2268,7 +2325,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, VarTy = Rec.second.FD->getType(); } else { llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( - VarAddr.getAddress().getPointer(), + VarAddr.getAddress(CGF).getPointer(), {Bld.getInt32(0), getNVPTXLaneID(CGF)}); VarTy = Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); @@ -2276,7 +2333,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, AlignmentSource::Decl); } - Rec.second.PrivateAddr = VarAddr.getAddress(); + Rec.second.PrivateAddr = VarAddr.getAddress(CGF); if (!IsInTTDRegion && (WithSPMDCheck || getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { @@ -2287,10 +2344,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, "Secondary glob data must be one per team."); LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD); VarAddr.setAddress( - Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(), - VarAddr.getPointer()), + Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF), + VarAddr.getPointer(CGF)), VarAddr.getAlignment())); - Rec.second.PrivateAddr = VarAddr.getAddress(); + Rec.second.PrivateAddr = VarAddr.getAddress(CGF); } Address GlobalPtr = Rec.second.PrivateAddr; Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName()); @@ -2302,7 +2359,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, if (EscapedParam) { const auto *VD = cast<VarDecl>(Rec.first); CGF.EmitStoreOfScalar(ParValue, VarAddr); - I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); + I->getSecond().MappedParams->setVarAddr(CGF, VD, + VarAddr.getAddress(CGF)); } if (IsTTD) ++SecIt; @@ -2336,7 +2394,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.getContext().getDeclAlign(VD), AlignmentSource::Decl); I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD), - Base.getAddress()); + Base.getAddress(CGF)); I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue); } I->getSecond().MappedParams->apply(CGF); @@ -2414,9 +2472,8 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); @@ -2445,16 +2502,19 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( - /*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // ThreadId for serialized parallels is 0. Address ThreadIDAddr = ZeroAddr; - auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr]( + auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr]( CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -2595,7 +2655,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, LNParallelGen, SeqGen); + emitIfClause(CGF, IfCond, LNParallelGen, SeqGen); } else { CodeGenFunction::RunCleanupsScope Scope(CGF); RegionCodeGenTy ThenRCG(LNParallelGen); @@ -2611,17 +2671,19 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( // llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( - /*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // ThreadId for serialized parallels is 0. Address ThreadIDAddr = ZeroAddr; - auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr, - &ThreadIDAddr](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr]( + CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -2669,8 +2731,9 @@ void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast<llvm::PointerType>(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - CGF.EmitRuntimeCall( + llvm::CallInst *Call = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); + Call->setConvergent(); } void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, @@ -2684,7 +2747,9 @@ void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); + llvm::CallInst *Call = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); + Call->setConvergent(); } void CGOpenMPRuntimeNVPTX::emitCriticalRegion( @@ -2697,6 +2762,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); + // Get the mask of active threads in the warp. + llvm::Value *Mask = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = getNVPTXThreadID(CGF); @@ -2737,8 +2805,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( // Block waits for all threads in current team to finish then increments the // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); - emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Reconverge active threads in the warp. + (void)CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -3629,7 +3698,8 @@ static llvm::Value *emitListToGlobalCopyFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { case TEK_Scalar: { @@ -3726,7 +3796,8 @@ static llvm::Value *emitListToGlobalReduceFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { @@ -3830,7 +3901,8 @@ static llvm::Value *emitGlobalToListCopyFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { case TEK_Scalar: { @@ -3926,7 +3998,8 @@ static llvm::Value *emitGlobalToListReduceFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { @@ -4239,7 +4312,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction( } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); @@ -4249,7 +4322,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction( Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. @@ -4515,9 +4588,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // Get the array of arguments. SmallVector<llvm::Value *, 8> Args; @@ -4634,7 +4706,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, return; auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().MappedParams = - llvm::make_unique<CodeGenFunction::OMPMapVars>(); + std::make_unique<CodeGenFunction::OMPMapVars>(); I->getSecond().GlobalRecord = GlobalizedVarsRecord; I->getSecond().EscapedParameters.insert( VarChecker.getEscapedParameters().begin(), @@ -4700,7 +4772,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); CharUnits Align = CGM.getContext().getDeclAlign(VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return Address(GV, Align); } case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { @@ -4712,7 +4784,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); CharUnits Align = CGM.getContext().getDeclAlign(VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return Address(GV, Align); } case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: @@ -4723,7 +4795,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy), VD->getName()); CharUnits Align = CGM.getContext().getDeclAlign(VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return Address(GV, Align); } } @@ -4832,7 +4904,7 @@ void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( if (VD->getType().getCanonicalType()->isReferenceType()) VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType().getCanonicalType()) - .getAddress(); + .getAddress(CGF); CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal); } } @@ -4894,7 +4966,8 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { - switch (getCudaArch(CGM)) { + CudaArch Arch = getCudaArch(CGM); + switch (Arch) { case CudaArch::SM_20: case CudaArch::SM_21: case CudaArch::SM_30: @@ -4906,10 +4979,14 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( case CudaArch::SM_53: case CudaArch::SM_60: case CudaArch::SM_61: - case CudaArch::SM_62: - CGM.Error(Clause->getBeginLoc(), - "Target architecture does not support unified addressing"); + case CudaArch::SM_62: { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << "Target architecture " << CudaArchToString(Arch) + << " does not support unified addressing"; + CGM.Error(Clause->getBeginLoc(), Out.str()); return; + } case CudaArch::SM_70: case CudaArch::SM_72: case CudaArch::SM_75: @@ -5026,7 +5103,7 @@ void CGOpenMPRuntimeNVPTX::clear() { Size = llvm::alignTo(Size, RecAlignment); llvm::APInt ArySize(/*numBits=*/64, Size); QualType SubTy = C.getConstantArrayType( - C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); const bool UseSharedMemory = Size <= SharedMemorySize; auto *Field = FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, @@ -5053,7 +5130,7 @@ void CGOpenMPRuntimeNVPTX::clear() { if (!SharedStaticRD->field_empty()) { llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize); QualType SubTy = C.getConstantArrayType( - C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); auto *Field = FieldDecl::Create( C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy, C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), @@ -5086,11 +5163,12 @@ void CGOpenMPRuntimeNVPTX::clear() { std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); llvm::APInt Size1(32, SMsBlockPerSM.second); QualType Arr1Ty = - C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, + C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); llvm::APInt Size2(32, SMsBlockPerSM.first); - QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType Arr2Ty = + C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); // FIXME: nvlink does not handle weak linkage correctly (object with the // different size are reported as erroneous). diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index e7fd458e7271..4159af0a622f 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -193,6 +193,18 @@ protected: /// Full/Lightweight runtime mode. Used for better optimization. unsigned getDefaultLocationReserved2Flags() const override; + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + /// NVPTX backend does not support global aliases, so just use the function, + /// emitted for \p NewGD instead of \p OldGD. + bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) override; + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); void clear() override; @@ -200,7 +212,7 @@ public: /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. virtual void emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override; /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp index 40ab79509f98..138459c68dbf 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp @@ -10,10 +10,11 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGDebugInfo.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" +#include "clang/AST/Attr.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/PrettyStackTrace.h" @@ -221,6 +222,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPParallelForSimdDirectiveClass: EmitOMPParallelForSimdDirective(cast<OMPParallelForSimdDirective>(*S)); break; + case Stmt::OMPParallelMasterDirectiveClass: + EmitOMPParallelMasterDirective(cast<OMPParallelMasterDirective>(*S)); + break; case Stmt::OMPParallelSectionsDirectiveClass: EmitOMPParallelSectionsDirective(cast<OMPParallelSectionsDirective>(*S)); break; @@ -281,6 +285,21 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPTaskLoopSimdDirectiveClass: EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S)); break; + case Stmt::OMPMasterTaskLoopDirectiveClass: + EmitOMPMasterTaskLoopDirective(cast<OMPMasterTaskLoopDirective>(*S)); + break; + case Stmt::OMPMasterTaskLoopSimdDirectiveClass: + EmitOMPMasterTaskLoopSimdDirective( + cast<OMPMasterTaskLoopSimdDirective>(*S)); + break; + case Stmt::OMPParallelMasterTaskLoopDirectiveClass: + EmitOMPParallelMasterTaskLoopDirective( + cast<OMPParallelMasterTaskLoopDirective>(*S)); + break; + case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass: + EmitOMPParallelMasterTaskLoopSimdDirective( + cast<OMPParallelMasterTaskLoopSimdDirective>(*S)); + break; case Stmt::OMPDistributeDirectiveClass: EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S)); break; @@ -543,8 +562,7 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) { // Emit debug info for labels. if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().hasReducedDebugInfo()) { DI->setLocation(D->getLocation()); DI->EmitLabel(D, Builder); } @@ -1819,15 +1837,15 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, Ty = llvm::IntegerType::get(getLLVMContext(), Size); Ty = llvm::PointerType::getUnqual(Ty); - Arg = Builder.CreateLoad(Builder.CreateBitCast(InputValue.getAddress(), - Ty)); + Arg = Builder.CreateLoad( + Builder.CreateBitCast(InputValue.getAddress(*this), Ty)); } else { - Arg = InputValue.getPointer(); + Arg = InputValue.getPointer(*this); ConstraintStr += '*'; } } } else { - Arg = InputValue.getPointer(); + Arg = InputValue.getPointer(*this); ConstraintStr += '*'; } @@ -1984,6 +2002,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Type *> ResultTruncRegTypes; std::vector<llvm::Type *> ArgTypes; std::vector<llvm::Value*> Args; + llvm::BitVector ResultTypeRequiresCast; // Keep track of inout constraints. std::string InOutConstraints; @@ -2022,13 +2041,23 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // If this is a register output, then make the inline asm return it // by-value. If this is a memory result, return the value by-reference. - if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) { + bool isScalarizableAggregate = + hasAggregateEvaluationKind(OutExpr->getType()); + if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) || + isScalarizableAggregate)) { Constraints += "=" + OutputConstraint; ResultRegQualTys.push_back(OutExpr->getType()); ResultRegDests.push_back(Dest); - ResultRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); - ResultTruncRegTypes.push_back(ResultRegTypes.back()); - + ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); + if (Info.allowsRegister() && isScalarizableAggregate) { + ResultTypeRequiresCast.push_back(true); + unsigned Size = getContext().getTypeSize(OutExpr->getType()); + llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size); + ResultRegTypes.push_back(ConvTy); + } else { + ResultTypeRequiresCast.push_back(false); + ResultRegTypes.push_back(ResultTruncRegTypes.back()); + } // If this output is tied to an input, and if the input is larger, then // we need to set the actual result type of the inline asm node to be the // same as the input type. @@ -2062,11 +2091,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); } else { - ArgTypes.push_back(Dest.getAddress().getType()); - Args.push_back(Dest.getPointer()); + ArgTypes.push_back(Dest.getAddress(*this).getType()); + Args.push_back(Dest.getPointer(*this)); Constraints += "=*"; Constraints += OutputConstraint; ReadOnly = ReadNone = false; @@ -2087,8 +2116,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2174,8 +2203,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); @@ -2271,6 +2300,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { assert(RegResults.size() == ResultRegTypes.size()); assert(RegResults.size() == ResultTruncRegTypes.size()); assert(RegResults.size() == ResultRegDests.size()); + // ResultRegDests can be also populated by addReturnRegisterOutputs() above, + // in which case its size may grow. + assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; @@ -2300,7 +2332,24 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } } - EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]); + LValue Dest = ResultRegDests[i]; + // ResultTypeRequiresCast elements correspond to the first + // ResultTypeRequiresCast.size() elements of RegResults. + if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { + unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); + Address A = Builder.CreateBitCast(Dest.getAddress(*this), + ResultRegTypes[i]->getPointerTo()); + QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false); + if (Ty.isNull()) { + const Expr *OutExpr = S.getOutputExpr(i); + CGM.Error( + OutExpr->getExprLoc(), + "impossible constraint in asm: can't store value into a register"); + return; + } + Dest = MakeAddrLValue(A, Ty); + } + EmitStoreThroughLValue(RValue::get(Tmp), Dest); } } @@ -2341,14 +2390,14 @@ CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) { delete CGF.CapturedStmtInfo; // Emit call to the helper function. - EmitCallOrInvoke(F, CapStruct.getPointer()); + EmitCallOrInvoke(F, CapStruct.getPointer(*this)); return F; } Address CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) { LValue CapStruct = InitCapturedStruct(S); - return CapStruct.getAddress(); + return CapStruct.getAddress(*this); } /// Creates the outlined function for a CapturedStmt. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp index e8fbca5108ad..dc3899f0e4ea 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -15,11 +15,16 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" -#include "clang/AST/DeclOpenMP.h" +#include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" using namespace clang; using namespace CodeGen; +using namespace llvm::omp; namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen @@ -76,7 +81,7 @@ public: InlinedShareds.isGlobalVarCaptured(VD)), VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); + return CGF.EmitLValue(&DRE).getAddress(CGF); }); } } @@ -120,12 +125,47 @@ public: class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { CodeGenFunction::OMPMapVars PreCondVars; + llvm::DenseSet<const VarDecl *> EmittedAsPrivate; for (const auto *E : S.counters()) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + EmittedAsPrivate.insert(VD->getCanonicalDecl()); (void)PreCondVars.setVarAddr( CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); } + // Mark private vars as undefs. + for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { + for (const Expr *IRef : C->varlists()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + (void)PreCondVars.setVarAddr( + CGF, OrigVD, + Address(llvm::UndefValue::get( + CGF.ConvertTypeForMem(CGF.getContext().getPointerType( + OrigVD->getType().getNonReferenceType()))), + CGF.getContext().getDeclAlign(OrigVD))); + } + } + } (void)PreCondVars.apply(CGF); + // Emit init, __range and __end variables for C++ range loops. + const Stmt *Body = + S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { + Body = OMPLoopDirective::tryToFindNextInnerLoop( + Body, /*TryImperfectlyNestedLoops=*/true); + if (auto *For = dyn_cast<ForStmt>(Body)) { + Body = For->getBody(); + } else { + assert(isa<CXXForRangeStmt>(Body) && + "Expected canonical for loop or range-based for loop."); + auto *CXXFor = cast<CXXForRangeStmt>(Body); + if (const Stmt *Init = CXXFor->getInit()) + CGF.EmitStmt(Init); + CGF.EmitStmt(CXXFor->getRangeStmt()); + CGF.EmitStmt(CXXFor->getEndStmt()); + Body = CXXFor->getBody(); + } + } if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { for (const auto *I : PreInits->decls()) CGF.EmitVarDecl(cast<VarDecl>(*I)); @@ -196,7 +236,7 @@ public: VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); + return CGF.EmitLValue(&DRE).getAddress(CGF); }); } } @@ -289,7 +329,7 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( CapturedVars.push_back(CV); } else { assert(CurCap->capturesVariable() && "Expected capture by reference."); - CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); + CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); } } } @@ -300,11 +340,11 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, ASTContext &Ctx = CGF.getContext(); llvm::Value *CastedPtr = CGF.EmitScalarConversion( - AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), + AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), Ctx.getPointerType(DstType), Loc); Address TmpAddr = CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) - .getAddress(); + .getAddress(CGF); return TmpAddr; } @@ -483,7 +523,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( } else if (I->capturesVariable()) { const VarDecl *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); - Address ArgAddr = ArgLVal.getAddress(); + Address ArgAddr = ArgLVal.getAddress(CGF); if (ArgLVal.getType()->isLValueReferenceType()) { ArgAddr = CGF.EmitLoadOfReference(ArgLVal); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { @@ -505,12 +545,12 @@ static llvm::Function *emitOutlinedFunctionPrologue( ? castValueFromUintptr( CGF, I->getLocation(), FD->getType(), Args[Cnt]->getName(), ArgLVal) - : ArgLVal.getAddress()}}); + : ArgLVal.getAddress(CGF)}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); - LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); + LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); } ++Cnt; ++I; @@ -527,8 +567,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { const CapturedDecl *CD = S.getCapturedDecl(); // Build the argument list. bool NeedWrapperFunction = - getDebugInfo() && - CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; + getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); FunctionArgList Args; llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; @@ -794,8 +833,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, EmitAggregateAssign(Dest, OriginalLVal, Type); } else { EmitOMPAggregateAssign( - Emission.getAllocatedAddress(), OriginalLVal.getAddress(), - Type, + Emission.getAllocatedAddress(), + OriginalLVal.getAddress(*this), Type, [this, VDInit, Init](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the @@ -813,7 +852,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, return Emission.getAllocatedAddress(); }); } else { - Address OriginalAddr = OriginalLVal.getAddress(); + Address OriginalAddr = OriginalLVal.getAddress(*this); IsRegistered = PrivateScope.addPrivate( OrigVD, [this, VDInit, OriginalAddr, VD]() { // Emit private VarDecl with copy init. @@ -890,7 +929,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { "Copyin threadprivates should have been captured!"); DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - MasterAddr = EmitLValue(&DRE).getAddress(); + MasterAddr = EmitLValue(&DRE).getAddress(*this); LocalDeclMap.erase(VD); } else { MasterAddr = @@ -899,7 +938,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { getContext().getDeclAlign(VD)); } // Get the address of the threadprivate variable. - Address PrivateAddr = EmitLValue(*IRef).getAddress(); + Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); if (CopiedVars.size() == 1) { // At first check if current thread is a master thread. If it is, no // need to copy data. @@ -967,7 +1006,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( /*RefersToEnclosingVariableOrCapture=*/ CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - return EmitLValue(&DRE).getAddress(); + return EmitLValue(&DRE).getAddress(*this); }); // Check if the variable is also a firstprivate: in this case IInit is // not generated. Initialization of this variable will happen in codegen @@ -1005,6 +1044,18 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( llvm::BasicBlock *ThenBB = nullptr; llvm::BasicBlock *DoneBB = nullptr; if (IsLastIterCond) { + // Emit implicit barrier if at least one lastprivate conditional is found + // and this is not a simd mode. + if (!getLangOpts().OpenMPSimd && + llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), + OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } ThenBB = createBasicBlock(".omp.lastprivate.then"); DoneBB = createBasicBlock(".omp.lastprivate.done"); Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); @@ -1043,14 +1094,19 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); const auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); - // Get the address of the original variable. - Address OriginalAddr = GetAddrOfLocalVar(DestVD); // Get the address of the private variable. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), getNaturalTypeAlignment(RefTy->getPointeeType())); + // Store the last value to the private copy in the last iteration. + if (C->getKind() == OMPC_LASTPRIVATE_conditional) + CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( + *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, + (*IRef)->getExprLoc()); + // Get the address of the original variable. + Address OriginalAddr = GetAddrOfLocalVar(DestVD); EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); } ++IRef; @@ -1124,8 +1180,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { - return RedCG.getSharedLValue(Count).getAddress(); + PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { + return RedCG.getSharedLValue(Count).getAddress(*this); }); PrivateScope.addPrivate( RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); @@ -1133,8 +1189,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( isa<ArraySubscriptExpr>(IRef)) { // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { - return RedCG.getSharedLValue(Count).getAddress(); + PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { + return RedCG.getSharedLValue(Count).getAddress(*this); }); PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), @@ -1144,7 +1200,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( } else { QualType Type = PrivateVD->getType(); bool IsArray = getContext().getAsArrayType(Type) != nullptr; - Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); // Store the address of the original variable associated with the LHS // implicit variable. if (IsArray) { @@ -1279,6 +1335,87 @@ static void emitEmptyBoundParameters(CodeGenFunction &, llvm::SmallVectorImpl<llvm::Value *> &) {} void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { + + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + // Check if we have any if clause associated with the directive. + llvm::Value *IfCond = nullptr; + if (const auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = EmitScalarExpr(C->getCondition(), + /*IgnoreResultAssign=*/true); + + llvm::Value *NumThreads = nullptr; + if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) + NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign=*/true); + + ProcBindKind ProcBind = OMP_PROC_BIND_default; + if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) + ProcBind = ProcBindClause->getProcBindKind(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // The cleanup callback that finalizes all variabels at the given location, + // thus calls destructors etc. + auto FiniCB = [this](InsertPointTy IP) { + CGBuilderTy::InsertPointGuard IPG(Builder); + assert(IP.getBlock()->end() != IP.getPoint() && + "OpenMP IR Builder should cause terminated block!"); + llvm::BasicBlock *IPBB = IP.getBlock(); + llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); + IPBB->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(IPBB); + CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); + EmitBranchThroughCleanup(Dest); + }; + + // Privatization callback that performs appropriate action for + // shared/private/firstprivate/lastprivate/copyin/... variables. + // + // TODO: This defaults to shared right now. + auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::Value &Val, llvm::Value *&ReplVal) { + // The next line is appropriate only for variables (Val) with the + // data-sharing attribute "shared". + ReplVal = &Val; + + return CodeGenIP; + }; + + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); + + auto BodyGenCB = [ParallelRegionBodyStmt, + this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::BasicBlock &ContinuationBB) { + auto OldAllocaIP = AllocaInsertPt; + AllocaInsertPt = &*AllocaIP.getPoint(); + + auto OldReturnBlock = ReturnBlock; + ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); + llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); + CodeGenIPBBTI->removeFromParent(); + + Builder.SetInsertPoint(CodeGenIPBB); + + EmitStmt(ParallelRegionBodyStmt); + + Builder.Insert(CodeGenIPBBTI); + + AllocaInsertPt = OldAllocaIP; + ReturnBlock = OldReturnBlock; + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, + FiniCB, IfCond, NumThreads, + ProcBind, S.hasCancel())); + return; + } + // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); @@ -1305,6 +1442,41 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { [](CodeGenFunction &) { return nullptr; }); } +static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, + int MaxLevel, int Level = 0) { + assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); + const Stmt *SimplifiedS = S->IgnoreContainers(); + if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { + PrettyStackTraceLoc CrashInfo( + CGF.getContext().getSourceManager(), CS->getLBracLoc(), + "LLVM IR generation of compound statement ('{}')"); + + // Keep track of the current cleanup stack depth, including debug scopes. + CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); + for (const Stmt *CurStmt : CS->body()) + emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); + return; + } + if (SimplifiedS == NextLoop) { + if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { + S = For->getBody(); + } else { + assert(isa<CXXForRangeStmt>(SimplifiedS) && + "Expected canonical for loop or range-based for loop."); + const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); + CGF.EmitStmt(CXXFor->getLoopVarStmt()); + S = CXXFor->getBody(); + } + if (Level + 1 < MaxLevel) { + NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( + S, /*TryImperfectlyNestedLoops=*/true); + emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); + return; + } + } + CGF.EmitStmt(S); +} + void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit) { RunCleanupsScope BodyScope(*this); @@ -1324,8 +1496,25 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, // On a continue in the body, jump to the end. JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + for (const Expr *E : D.finals_conditions()) { + if (!E) + continue; + // Check that loop counter in non-rectangular nest fits into the iteration + // space. + llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); + EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), + getProfileCount(D.getBody())); + EmitBlock(NextBB); + } + // Emit loop variables for C++ range loops. + const Stmt *Body = + D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); // Emit loop body. - EmitStmt(D.getBody()); + emitBody(*this, Body, + OMPLoopDirective::tryToFindNextInnerLoop( + Body, /*TryImperfectlyNestedLoops=*/true), + D.getCollapsedNumber()); + // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); @@ -1441,7 +1630,7 @@ void CodeGenFunction::EmitOMPLinearClauseFinal( DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); - Address OrigAddr = EmitLValue(&DRE).getAddress(); + Address OrigAddr = EmitLValue(&DRE).getAddress(*this); CodeGenFunction::OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); (void)VarScope.Privatize(); @@ -1460,14 +1649,14 @@ static void emitAlignedClause(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { - unsigned ClauseAlignment = 0; + llvm::APInt ClauseAlignment(64, 0); if (const Expr *AlignmentExpr = Clause->getAlignment()) { auto *AlignmentCI = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); - ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); + ClauseAlignment = AlignmentCI->getValue(); } for (const Expr *E : Clause->varlists()) { - unsigned Alignment = ClauseAlignment; + llvm::APInt Alignment(ClauseAlignment); if (Alignment == 0) { // OpenMP [2.8.1, Description] // If no optional parameter is specified, implementation-defined default @@ -1478,12 +1667,13 @@ static void emitAlignedClause(CodeGenFunction &CGF, E->getType()->getPointeeType())) .getQuantity(); } - assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && + assert((Alignment == 0 || Alignment.isPowerOf2()) && "alignment is not power of 2"); if (Alignment != 0) { llvm::Value *PtrValue = CGF.EmitScalarExpr(E); CGF.EmitAlignmentAssumption( - PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment); + PtrValue, E, /*No second loc needed*/ SourceLocation(), + llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); } } } @@ -1510,7 +1700,7 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), E->getType(), VK_LValue, E->getExprLoc()); - return EmitLValue(&DRE).getAddress(); + return EmitLValue(&DRE).getAddress(*this); }); } else { (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { @@ -1553,8 +1743,28 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitIgnoredExpr(I); } } + // Create temp loop control variables with their init values to support + // non-rectangular loops. + CodeGenFunction::OMPMapVars PreCondVars; + for (const Expr * E: S.dependent_counters()) { + if (!E) + continue; + assert(!E->getType().getNonReferenceType()->isRecordType() && + "dependent counter must not be an iterator."); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address CounterAddr = + CGF.CreateMemTemp(VD->getType().getNonReferenceType()); + (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); + } + (void)PreCondVars.apply(CGF); + for (const Expr *E : S.dependent_inits()) { + if (!E) + continue; + CGF.EmitIgnoredExpr(E); + } // Check that loop is executed at least one time. CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); + PreCondVars.restore(CGF); } void CodeGenFunction::EmitOMPLinearClause( @@ -1653,12 +1863,13 @@ void CodeGenFunction::EmitOMPSimdFinal( } Address OrigAddr = Address::invalid(); if (CED) { - OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); + OrigAddr = + EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); } else { DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), /*RefersToEnclosingVariableOrCapture=*/false, (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); - OrigAddr = EmitLValue(&DRE).getAddress(); + OrigAddr = EmitLValue(&DRE).getAddress(*this); } OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); @@ -1675,6 +1886,7 @@ void CodeGenFunction::EmitOMPSimdFinal( static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit) { + CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); } @@ -1687,6 +1899,40 @@ static LValue EmitOMPHelperVar(CodeGenFunction &CGF, return CGF.EmitLValue(Helper); } +static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, + const RegionCodeGenTy &SimdInitGen, + const RegionCodeGenTy &BodyCodeGen) { + auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, + PrePostActionTy &) { + CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + SimdInitGen(CGF); + + BodyCodeGen(CGF); + }; + auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); + + BodyCodeGen(CGF); + }; + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (CGF.getLangOpts().OpenMP >= 50 && + (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_simd)) { + IfCond = C->getCondition(); + break; + } + } + if (IfCond) { + CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); + } else { + RegionCodeGenTy ThenRCG(ThenGen); + ThenRCG(CGF); + } +} + static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, PrePostActionTy &Action) { Action.Enter(CGF); @@ -1737,8 +1983,6 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitIgnoredExpr(S.getCalcLastIteration()); } - CGF.EmitOMPSimdInit(S); - emitAlignedClause(CGF, S); (void)CGF.EmitOMPLinearClauseInit(S); { @@ -1747,17 +1991,29 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPLinearClause(S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + CGF, S, CGF.EmitLValue(S.getIterationVariable())); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + + emitCommonSimdLoop( + CGF, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPSimdInit(S); + }, + [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter( + CGF, S); + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); // Emit final copy of the lastprivate variables at the end of loops. if (HasLastprivateClause) @@ -1842,27 +2098,32 @@ void CodeGenFunction::EmitOMPOuterLoop( JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - // Generate !llvm.loop.parallel metadata for loads and stores for loops - // with dynamic/guided scheduling and without ordered clause. - if (!isOpenMPSimdDirective(S.getDirectiveKind())) - LoopStack.setParallel(!IsMonotonic); - else - EmitOMPSimdInit(S, IsMonotonic); - - SourceLocation Loc = S.getBeginLoc(); - - // when 'distribute' is not combined with a 'for': - // while (idx <= UB) { BODY; ++idx; } - // when 'distribute' is combined with a 'for' - // (e.g. 'distribute parallel for') - // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } - EmitOMPInnerLoop( - S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, - [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { - CodeGenLoop(CGF, S, LoopExit); + emitCommonSimdLoop( + *this, S, + [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { + // Generate !llvm.loop.parallel metadata for loads and stores for loops + // with dynamic/guided scheduling and without ordered clause. + if (!isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.LoopStack.setParallel(!IsMonotonic); + else + CGF.EmitOMPSimdInit(S, IsMonotonic); }, - [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { - CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, + &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + SourceLocation Loc = S.getBeginLoc(); + // when 'distribute' is not combined with a 'for': + // while (idx <= UB) { BODY; ++idx; } + // when 'distribute' is combined with a 'for' + // (e.g. 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { + CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + }); }); EmitBlock(Continue.getBlock()); @@ -2124,14 +2385,16 @@ static void emitDistributeParallelForDistributeInnerBoundParams( const auto &Dir = cast<OMPLoopDirective>(S); LValue LB = CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); - llvm::Value *LBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + llvm::Value *LBCast = + CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), + CGF.SizeTy, /*isSigned=*/false); CapturedVars.push_back(LBCast); LValue UB = CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); - llvm::Value *UBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + llvm::Value *UBCast = + CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), + CGF.SizeTy, /*isSigned=*/false); CapturedVars.push_back(UBCast); } @@ -2304,6 +2567,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + *this, S, EmitLValue(S.getIterationVariable())); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPReductionClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); @@ -2351,47 +2616,60 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( /* Chunked */ Chunk != nullptr) || StaticChunkedOne) && !Ordered) { - if (isOpenMPSimdDirective(S.getDirectiveKind())) - EmitOMPSimdInit(S, /*IsMonotonic=*/true); - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] - // When no chunk_size is specified, the iteration space is divided into - // chunks that are approximately equal in size, and at most one chunk is - // distributed to each thread. Note that the size of the chunks is - // unspecified in this case. - CGOpenMPRuntime::StaticRTInput StaticInit( - IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress(), - StaticChunkedOne ? Chunk : nullptr); - RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), - ScheduleKind, StaticInit); JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); - // UB = min(UB, GlobalUB); - if (!StaticChunkedOne) - EmitIgnoredExpr(S.getEnsureUpperBound()); - // IV = LB; - EmitIgnoredExpr(S.getInit()); - // For unchunked static schedule generate: - // - // while (idx <= UB) { - // BODY; - // ++idx; - // } - // - // For static schedule with chunk one: - // - // while (IV <= PrevUB) { - // BODY; - // IV += ST; - // } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), - StaticChunkedOne ? S.getDistInc() : S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + emitCommonSimdLoop( + *this, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); }, - [](CodeGenFunction &) {}); + [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, + &S, ScheduleKind, LoopExit, + &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When no chunk_size is specified, the iteration space is divided + // into chunks that are approximately equal in size, and at most + // one chunk is distributed to each thread. Note that the size of + // the chunks is unspecified in this case. + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, IL.getAddress(CGF), + LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), + StaticChunkedOne ? Chunk : nullptr); + CGF.CGM.getOpenMPRuntime().emitForStaticInit( + CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, + StaticInit); + // UB = min(UB, GlobalUB); + if (!StaticChunkedOne) + CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB; + CGF.EmitIgnoredExpr(S.getInit()); + // For unchunked static schedule generate: + // + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // For static schedule with chunk one: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), + StaticChunkedOne ? S.getCombinedParForInDistCond() + : S.getCond(), + StaticChunkedOne ? S.getDistInc() : S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime() + .initLastprivateConditionalCounter(CGF, S); + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -2407,9 +2685,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), - ST.getAddress(), IL.getAddress(), - Chunk, EUB); + const OMPLoopArguments LoopArguments( + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + IL.getAddress(*this), Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, LoopArguments, CGDispatchBounds); } @@ -2569,6 +2847,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // break; // } // .omp.sections.exit: + CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); llvm::SwitchInst *SwitchStmt = CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), @@ -2603,6 +2882,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); @@ -2613,8 +2893,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { OpenMPScheduleTy ScheduleKind; ScheduleKind.Schedule = OMPC_SCHEDULE_static; CGOpenMPRuntime::StaticRTInput StaticInit( - /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress()); + /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), + LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); CGF.CGM.getOpenMPRuntime().emitForStaticInit( CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); @@ -2729,13 +3009,17 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { } } -void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { +static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; + CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); + emitMaster(*this, S); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { @@ -2779,6 +3063,35 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( emitEmptyBoundParameters); } +void CodeGenFunction::EmitOMPParallelMasterDirective( + const OMPParallelMasterDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'master' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + bool Copyins = CGF.EmitOMPCopyinClause(S); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (Copyins) { + // Emit implicit barrier to synchronize threads and avoid data races on + // propagation master's thread values of threadprivate variables to local + // instances of that variables of all other implicit threads. + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + emitMaster(CGF, S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit @@ -2948,7 +3261,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); Scope.addPrivate(Pair.first, [&CGF, &DRE]() { - return CGF.EmitLValue(&DRE).getAddress(); + return CGF.EmitLValue(&DRE).getAddress(CGF); }); } for (const auto &Pair : PrivatePtrs) { @@ -3044,7 +3357,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); - OMPLexicalScope Scope(*this, S); + OMPLexicalScope Scope(*this, S, llvm::None, + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPSimdDirective(S.getDirectiveKind())); TaskGen(*this, OutlinedFn, Data); } @@ -3112,7 +3427,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); QualType BaseAndPointersType = getContext().getConstantArrayType( - getContext().VoidPtrTy, ArrSize, ArrayType::Normal, + getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); BPVD = createImplicitFirstprivateForType( getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); @@ -3120,7 +3435,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); QualType SizesType = getContext().getConstantArrayType( getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), - ArrSize, ArrayType::Normal, + ArrSize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, S.getBeginLoc()); @@ -3402,11 +3717,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) || StaticChunked) { - if (isOpenMPSimdDirective(S.getDirectiveKind())) - EmitOMPSimdInit(S, /*IsMonotonic=*/true); CGOpenMPRuntime::StaticRTInput StaticInit( - IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress(), + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), StaticChunked ? Chunk : nullptr); RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); @@ -3453,18 +3766,28 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // IV = LB; // } // - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, - [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { - CodeGenLoop(CGF, S, LoopExit); - }, - [&S, StaticChunked](CodeGenFunction &CGF) { - if (StaticChunked) { - CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); - CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); - CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); - CGF.EmitIgnoredExpr(S.getCombinedInit()); - } - }); + emitCommonSimdLoop( + *this, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + }, + [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, + StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [&S, StaticChunked](CodeGenFunction &CGF) { + if (StaticChunked) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + } + }); + }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); @@ -3472,8 +3795,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. const OMPLoopArguments LoopArguments = { - LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), - Chunk}; + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + IL.getAddress(*this), Chunk}; EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, CodeGenLoop); } @@ -3673,11 +3996,11 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, // expression is simple and atomic is allowed for the given type for the // target platform. if (BO == BO_Comma || !Update.isScalar() || - !Update.getScalarVal()->getType()->isIntegerTy() || - !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && - (Update.getScalarVal()->getType() != - X.getAddress().getElementType())) || - !X.getAddress().getElementType()->isIntegerTy() || + !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || + (!isa<llvm::ConstantInt>(Update.getScalarVal()) && + (Update.getScalarVal()->getType() != + X.getAddress(CGF).getElementType())) || + !X.getAddress(CGF).getElementType()->isIntegerTy() || !Context.getTargetInfo().hasBuiltinAtomic( Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) return std::make_pair(false, RValue::get(nullptr)); @@ -3749,11 +4072,11 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, llvm::Value *UpdateVal = Update.getScalarVal(); if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { UpdateVal = CGF.Builder.CreateIntCast( - IC, X.getAddress().getElementType(), + IC, X.getAddress(CGF).getElementType(), X.getType()->hasSignedIntegerRepresentation()); } llvm::Value *Res = - CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); + CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } @@ -3991,6 +4314,9 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_reverse_offload: case OMPC_dynamic_allocators: case OMPC_atomic_default_mem_order: + case OMPC_device_type: + case OMPC_match: + case OMPC_nontemporal: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -4090,18 +4416,21 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); OMPLexicalScope Scope(CGF, S, OMPD_task); - auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) { - OMPLoopScope(CGF, D); - // Emit calculation of the iterations count. - llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); - NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, - /*isSigned=*/false); - return NumIterations; + auto &&SizeEmitter = + [IsOffloadEntry](CodeGenFunction &CGF, + const OMPLoopDirective &D) -> llvm::Value * { + if (IsOffloadEntry) { + OMPLoopScope(CGF, D); + // Emit calculation of the iterations count. + llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); + NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, + /*isSigned=*/false); + return NumIterations; + } + return nullptr; }; - if (IsOffloadEntry) - CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, - SizeEmitter); - CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, + SizeEmitter); } static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, @@ -4403,7 +4732,8 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, + CodeGen); emitPostUpdateForReductionClause(*this, S, [](CodeGenFunction &) { return nullptr; }); } @@ -4526,6 +4856,19 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { break; } } + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + // TODO: This check is necessary as we only generate `omp parallel` through + // the OpenMPIRBuilder for now. + if (S.getCancelRegion() == OMPD_parallel) { + llvm::Value *IfCondition = nullptr; + if (IfCond) + IfCondition = EmitScalarExpr(IfCond, + /*IgnoreResultAssign=*/true); + return Builder.restoreIP( + OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); + } + } + CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, S.getCancelRegion()); } @@ -4931,8 +5274,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.incrementProfileCounter(&S); } - if (isOpenMPSimdDirective(S.getDirectiveKind())) - CGF.EmitOMPSimdInit(S); + (void)CGF.EmitOMPLinearClauseInit(S); OMPPrivateScope LoopScope(CGF); // Emit helper vars inits. @@ -4950,6 +5292,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, LoopScope); CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + CGF.EmitOMPLinearClause(S, LoopScope); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); // Emit the loop iteration variable. @@ -4967,13 +5310,24 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitIgnoredExpr(S.getCalcLastIteration()); } - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, JumpDest()); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + { + OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); + emitCommonSimdLoop( + CGF, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S); + }, + [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); + } // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); @@ -4987,6 +5341,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, (*LIP)->getType(), S.getBeginLoc()))); } + CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, + (*LIP)->getType(), S.getBeginLoc())); + }); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, @@ -5022,9 +5381,62 @@ void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { + OMPLexicalScope Scope(*this, S); EmitOMPTaskLoopBasedDirective(S); } +void CodeGenFunction::EmitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( + const OMPMasterTaskLoopSimdDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( + const OMPParallelMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, + S.getBeginLoc()); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( + const OMPParallelMasterTaskLoopSimdDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, + S.getBeginLoc()); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, + emitEmptyBoundParameters); +} + // Generate the instructions for '#pragma omp target update' directive. void CodeGenFunction::EmitOMPTargetUpdateDirective( const OMPTargetUpdateDirective &S) { @@ -5058,11 +5470,11 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( OMPPrivateScope LoopGlobals(CGF); if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { for (const Expr *E : LD->counters()) { - const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { LValue GlobLVal = CGF.EmitLValue(E); LoopGlobals.addPrivate( - VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); + VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); } if (isa<OMPCapturedExprDecl>(VD)) { // Emit only those that were not explicitly referenced in clauses. diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp index 3cb3d3544838..59631e802373 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGVTables.cpp @@ -13,6 +13,7 @@ #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/CodeGenOptions.h" @@ -157,7 +158,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, GlobalDecl GD, const ThunkInfo &Thunk) { const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); + const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); QualType ResultType = FPT->getReturnType(); // Get the original function @@ -166,6 +167,15 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true); llvm::Function *BaseFn = cast<llvm::Function>(Callee); + // Cloning can't work if we don't have a definition. The Microsoft ABI may + // require thunks when a definition is not available. Emit an error in these + // cases. + if (!MD->isDefined()) { + CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments"); + return Fn; + } + assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method"); + // Clone to thunk. llvm::ValueToValueMapTy VMap; @@ -201,6 +211,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, Builder.SetInsertPoint(&*ThisStore); llvm::Value *AdjustedThisPtr = CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This); + AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, + ThisStore->getOperand(0)->getType()); ThisStore->setOperand(0, AdjustedThisPtr); if (!Thunk.Return.isEmpty()) { @@ -231,7 +243,6 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, // Build FunctionArgs. const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); QualType ThisType = MD->getThisType(); - const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); QualType ResultType; if (IsUnprototyped) ResultType = CGM.getContext().VoidTy; @@ -240,7 +251,7 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, else if (CGM.getCXXABI().hasMostDerivedReturn(GD)) ResultType = CGM.getContext().VoidPtrTy; else - ResultType = FPT->getReturnType(); + ResultType = MD->getType()->castAs<FunctionProtoType>()->getReturnType(); FunctionArgList FunctionArgs; // Create the implicit 'this' parameter declaration. @@ -291,14 +302,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, *this, LoadCXXThisAddress(), Thunk->This) : LoadCXXThis(); - if (CurFnInfo->usesInAlloca() || IsUnprototyped) { - // We don't handle return adjusting thunks, because they require us to call - // the copy constructor. For now, fall through and pretend the return - // adjustment was empty so we don't crash. + // If perfect forwarding is required a variadic method, a method using + // inalloca, or an unprototyped thunk, use musttail. Emit an error if this + // thunk requires a return adjustment, since that is impossible with musttail. + if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) { if (Thunk && !Thunk->Return.isEmpty()) { if (IsUnprototyped) CGM.ErrorUnsupported( MD, "return-adjusting thunk with incomplete parameter type"); + else if (CurFnInfo->isVariadic()) + llvm_unreachable("shouldn't try to emit musttail return-adjusting " + "thunks for variadic functions"); else CGM.ErrorUnsupported( MD, "non-trivial argument copy for return-adjusting thunk"); @@ -322,7 +336,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, for (const ParmVarDecl *PD : MD->parameters()) EmitDelegateCallArg(CallArgs, PD, SourceLocation()); - const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); + const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); #ifndef NDEBUG const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall( @@ -549,16 +563,32 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD, CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn); + // Thunks for variadic methods are special because in general variadic + // arguments cannot be perferctly forwarded. In the general case, clang + // implements such thunks by cloning the original function body. However, for + // thunks with no return adjustment on targets that support musttail, we can + // use musttail to perfectly forward the variadic arguments. + bool ShouldCloneVarArgs = false; if (!IsUnprototyped && ThunkFn->isVarArg()) { - // Varargs thunks are special; we can't just generate a call because - // we can't copy the varargs. Our implementation is rather - // expensive/sucky at the moment, so don't generate the thunk unless - // we have to. - // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly. + ShouldCloneVarArgs = true; + if (TI.Return.isEmpty()) { + switch (CGM.getTriple().getArch()) { + case llvm::Triple::x86_64: + case llvm::Triple::x86: + case llvm::Triple::aarch64: + ShouldCloneVarArgs = false; + break; + default: + break; + } + } + } + + if (ShouldCloneVarArgs) { if (UseAvailableExternallyLinkage) return ThunkFn; - ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, - TI); + ThunkFn = + CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI); } else { // Normal thunk body generation. CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped); @@ -646,7 +676,12 @@ void CodeGenVTables::addVTableComponent( // Method is acceptable, continue processing as usual. } - auto getSpecialVirtualFn = [&](StringRef name) { + auto getSpecialVirtualFn = [&](StringRef name) -> llvm::Constant * { + // For NVPTX devices in OpenMP emit special functon as null pointers, + // otherwise linking ends up with unresolved references. + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsDevice && + CGM.getTriple().isNVPTX()) + return llvm::ConstantPointerNull::get(CGM.Int8PtrTy); llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); llvm::Constant *fn = cast<llvm::Constant>( @@ -779,7 +814,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration"); CGM.setGVProperties(VTable, RD); - CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); + CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get()); return VTable; } @@ -1010,7 +1045,32 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return true; } -void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, +llvm::GlobalObject::VCallVisibility +CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) { + LinkageInfo LV = RD->getLinkageAndVisibility(); + llvm::GlobalObject::VCallVisibility TypeVis; + if (!isExternallyVisible(LV.getLinkage())) + TypeVis = llvm::GlobalObject::VCallVisibilityTranslationUnit; + else if (HasHiddenLTOVisibility(RD)) + TypeVis = llvm::GlobalObject::VCallVisibilityLinkageUnit; + else + TypeVis = llvm::GlobalObject::VCallVisibilityPublic; + + for (auto B : RD->bases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + for (auto B : RD->vbases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + return TypeVis; +} + +void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { if (!getCodeGenOpts().LTOUnit) return; @@ -1070,4 +1130,10 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD); } } + + if (getCodeGenOpts().VirtualFunctionElimination) { + llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD); + if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) + VTable->addVCallVisibilityMetadata(TypeVis); + } } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGValue.h b/contrib/llvm-project/clang/lib/CodeGen/CGValue.h index 71f95abe488a..9fd07bdb187d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGValue.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CGValue.h @@ -29,6 +29,7 @@ namespace llvm { namespace clang { namespace CodeGen { class AggValueSlot; + class CodeGenFunction; struct CGBitFieldInfo; /// RValue - This trivial value class is used to represent the result of an @@ -319,11 +320,13 @@ public: void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; } // simple lvalue - llvm::Value *getPointer() const { + llvm::Value *getPointer(CodeGenFunction &CGF) const { assert(isSimple()); return V; } - Address getAddress() const { return Address(getPointer(), getAlignment()); } + Address getAddress(CodeGenFunction &CGF) const { + return Address(getPointer(CGF), getAlignment()); + } void setAddress(Address address) { assert(isSimple()); V = address.getPointer(); @@ -427,8 +430,8 @@ public: return R; } - RValue asAggregateRValue() const { - return RValue::getAggregate(getAddress(), isVolatileQualified()); + RValue asAggregateRValue(CodeGenFunction &CGF) const { + return RValue::getAggregate(getAddress(CGF), isVolatileQualified()); } }; @@ -536,14 +539,12 @@ public: return AV; } - static AggValueSlot forLValue(const LValue &LV, - IsDestructed_t isDestructed, - NeedsGCBarriers_t needsGC, - IsAliased_t isAliased, - Overlap_t mayOverlap, - IsZeroed_t isZeroed = IsNotZeroed, - IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) { - return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC, + static AggValueSlot + forLValue(const LValue &LV, CodeGenFunction &CGF, IsDestructed_t isDestructed, + NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed, + IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) { + return forAddr(LV.getAddress(CGF), LV.getQuals(), isDestructed, needsGC, isAliased, mayOverlap, isZeroed, isChecked); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp index 0ae9ea427d65..7065e78f19a2 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp @@ -14,7 +14,9 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" +#include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/LangStandard.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" @@ -37,6 +39,7 @@ #include "llvm/Pass.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" @@ -79,6 +82,24 @@ namespace clang { BackendConsumer *BackendCon; }; + static void reportOptRecordError(Error E, DiagnosticsEngine &Diags, + const CodeGenOptions CodeGenOpts) { + handleAllErrors( + std::move(E), + [&](const RemarkSetupFileError &E) { + Diags.Report(diag::err_cannot_open_file) + << CodeGenOpts.OptRecordFile << E.message(); + }, + [&](const RemarkSetupPatternError &E) { + Diags.Report(diag::err_drv_optimization_remark_pattern) + << E.message() << CodeGenOpts.OptRecordPasses; + }, + [&](const RemarkSetupFormatError &E) { + Diags.Report(diag::err_drv_optimization_remark_format) + << CodeGenOpts.OptRecordFormat; + }); + } + class BackendConsumer : public ASTConsumer { using LinkModule = CodeGenAction::LinkModule; @@ -130,6 +151,29 @@ namespace clang { FrontendTimesIsEnabled = TimePasses; llvm::TimePassesIsEnabled = TimePasses; } + + // This constructor is used in installing an empty BackendConsumer + // to use the clang diagnostic handler for IR input files. It avoids + // initializing the OS field. + BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, bool TimePasses, + SmallVector<LinkModule, 4> LinkModules, LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr) + : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), + CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), + Context(nullptr), + LLVMIRGeneration("irgen", "LLVM IR Generation Time"), + LLVMIRGenerationRefCount(0), + Gen(CreateLLVMCodeGen(Diags, "", HeaderSearchOpts, PPOpts, + CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)) { + FrontendTimesIsEnabled = TimePasses; + llvm::TimePassesIsEnabled = TimePasses; + } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr<llvm::Module> takeModule() { return std::unique_ptr<llvm::Module>(Gen->ReleaseModule()); @@ -228,6 +272,7 @@ namespace clang { void HandleTranslationUnit(ASTContext &C) override { { + llvm::TimeTraceScope TimeScope("Frontend"); PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount += 1; @@ -260,33 +305,20 @@ namespace clang { std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler = Ctx.getDiagnosticHandler(); - Ctx.setDiagnosticHandler(llvm::make_unique<ClangDiagnosticHandler>( + Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>( CodeGenOpts, this)); Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = - setupOptimizationRemarks(Ctx, CodeGenOpts.OptRecordFile, - CodeGenOpts.OptRecordPasses, - CodeGenOpts.OptRecordFormat, - CodeGenOpts.DiagnosticsWithHotness, - CodeGenOpts.DiagnosticsHotnessThreshold); + setupOptimizationRemarks( + Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses, + CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness, + CodeGenOpts.DiagnosticsHotnessThreshold); if (Error E = OptRecordFileOrErr.takeError()) { - handleAllErrors( - std::move(E), - [&](const RemarkSetupFileError &E) { - Diags.Report(diag::err_cannot_open_file) - << CodeGenOpts.OptRecordFile << E.message(); - }, - [&](const RemarkSetupPatternError &E) { - Diags.Report(diag::err_drv_optimization_remark_pattern) - << E.message() << CodeGenOpts.OptRecordPasses; - }, - [&](const RemarkSetupFormatError &E) { - Diags.Report(diag::err_drv_optimization_remark_format) - << CodeGenOpts.OptRecordFormat; - }); + reportOptRecordError(std::move(E), Diags, CodeGenOpts); return; } + std::unique_ptr<llvm::ToolOutputFile> OptRecordFile = std::move(*OptRecordFileOrErr); @@ -327,6 +359,10 @@ namespace clang { Gen->CompleteTentativeDefinition(D); } + void CompleteExternalDeclaration(VarDecl *D) override { + Gen->CompleteExternalDeclaration(D); + } + void AssignInheritanceModel(CXXRecordDecl *RD) override { Gen->AssignInheritanceModel(RD); } @@ -362,6 +398,9 @@ namespace clang { bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); /// Specialized handler for unsupported backend feature diagnostic. void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); + /// Specialized handler for misexpect warnings. + /// Note that misexpect remarks are emitted through ORE + void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D); /// Specialized handlers for optimization remarks. /// Note that these handlers only accept remarks and they always handle /// them. @@ -561,13 +600,13 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( if (D.isLocationAvailable()) { D.getLocation(Filename, Line, Column); if (Line > 0) { - const FileEntry *FE = FileMgr.getFile(Filename); + auto FE = FileMgr.getFile(Filename); if (!FE) FE = FileMgr.getFile(D.getAbsolutePath()); if (FE) { // If -gcolumn-info was not used, Column will be 0. This upsets the // source manager, so pass 1 if Column is not set. - DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); + DILoc = SourceMgr.translateFileLineCol(*FE, Line, Column ? Column : 1); } } BadDebugInfo = DILoc.isInvalid(); @@ -600,10 +639,50 @@ void BackendConsumer::UnsupportedDiagHandler( StringRef Filename; unsigned Line, Column; bool BadDebugInfo = false; - FullSourceLoc Loc = - getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); + FullSourceLoc Loc; + std::string Msg; + raw_string_ostream MsgStream(Msg); + + // Context will be nullptr for IR input files, we will construct the diag + // message from llvm::DiagnosticInfoUnsupported. + if (Context != nullptr) { + Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); + MsgStream << D.getMessage(); + } else { + DiagnosticPrinterRawOStream DP(MsgStream); + D.print(DP); + } + Diags.Report(Loc, diag::err_fe_backend_unsupported) << MsgStream.str(); + + if (BadDebugInfo) + // If we were not able to translate the file:line:col information + // back to a SourceLocation, at least emit a note stating that + // we could not translate this location. This can happen in the + // case of #line directives. + Diags.Report(Loc, diag::note_fe_backend_invalid_loc) + << Filename << Line << Column; +} + +void BackendConsumer::MisExpectDiagHandler( + const llvm::DiagnosticInfoMisExpect &D) { + StringRef Filename; + unsigned Line, Column; + bool BadDebugInfo = false; + FullSourceLoc Loc; + std::string Msg; + raw_string_ostream MsgStream(Msg); + DiagnosticPrinterRawOStream DP(MsgStream); - Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str(); + // Context will be nullptr for IR input files, we will construct the diag + // message from llvm::DiagnosticInfoMisExpect. + if (Context != nullptr) { + Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); + MsgStream << D.getMsg(); + } else { + DiagnosticPrinterRawOStream DP(MsgStream); + D.print(DP); + } + Diags.Report(Loc, diag::warn_profile_data_misexpect) << MsgStream.str(); if (BadDebugInfo) // If we were not able to translate the file:line:col information @@ -623,12 +702,19 @@ void BackendConsumer::EmitOptimizationMessage( StringRef Filename; unsigned Line, Column; bool BadDebugInfo = false; - FullSourceLoc Loc = - getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); - + FullSourceLoc Loc; std::string Msg; raw_string_ostream MsgStream(Msg); - MsgStream << D.getMsg(); + + // Context will be nullptr for IR input files, we will construct the remark + // message from llvm::DiagnosticInfoOptimizationBase. + if (Context != nullptr) { + Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); + MsgStream << D.getMsg(); + } else { + DiagnosticPrinterRawOStream DP(MsgStream); + D.print(DP); + } if (D.getHotness()) MsgStream << " (hotness: " << *D.getHotness() << ")"; @@ -784,6 +870,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { case llvm::DK_Unsupported: UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI)); return; + case llvm::DK_MisExpect: + MisExpectDiagHandler(cast<DiagnosticInfoMisExpect>(DI)); + return; default: // Plugin IDs are not bound to any value as they are set dynamically. ComputeDiagRemarkID(Severity, backend_plugin, DiagID); @@ -914,7 +1003,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && CI.getCodeGenOpts().MacroDebugInfo) { std::unique_ptr<PPCallbacks> Callbacks = - llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(), + std::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(), CI.getPreprocessor()); CI.getPreprocessor().addPPCallbacks(std::move(Callbacks)); } @@ -975,7 +1064,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { // the file was already processed by indexing and will be passed to the // linker using merged object file. if (!Bm) { - auto M = llvm::make_unique<llvm::Module>("empty", *VMContext); + auto M = std::make_unique<llvm::Module>("empty", *VMContext); M->setTargetTriple(CI.getTargetOpts().Triple); return M; } @@ -1014,9 +1103,11 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { void CodeGenAction::ExecuteAction() { // If this is an IR file, we have to treat it specially. - if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) { + if (getCurrentFileKind().getLanguage() == Language::LLVM_IR) { BackendAction BA = static_cast<BackendAction>(Act); CompilerInstance &CI = getCompilerInstance(); + auto &CodeGenOpts = CI.getCodeGenOpts(); + auto &Diagnostics = CI.getDiagnostics(); std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, getCurrentFile(), BA); if (BA != Backend_EmitNothing && !OS) @@ -1035,23 +1126,49 @@ void CodeGenAction::ExecuteAction() { const TargetOptions &TargetOpts = CI.getTargetOpts(); if (TheModule->getTargetTriple() != TargetOpts.Triple) { - CI.getDiagnostics().Report(SourceLocation(), - diag::warn_fe_override_module) + Diagnostics.Report(SourceLocation(), + diag::warn_fe_override_module) << TargetOpts.Triple; TheModule->setTargetTriple(TargetOpts.Triple); } - EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(), + EmbedBitcode(TheModule.get(), CodeGenOpts, MainFile->getMemBufferRef()); LLVMContext &Ctx = TheModule->getContext(); Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler, - &CI.getDiagnostics()); + &Diagnostics); + + // Set clang diagnostic handler. To do this we need to create a fake + // BackendConsumer. + BackendConsumer Result(BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), + CI.getPreprocessorOpts(), CI.getCodeGenOpts(), + CI.getTargetOpts(), CI.getLangOpts(), + CI.getFrontendOpts().ShowTimers, + std::move(LinkModules), *VMContext, nullptr); + Ctx.setDiagnosticHandler( + std::make_unique<ClangDiagnosticHandler>(CodeGenOpts, &Result)); + + Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = + setupOptimizationRemarks( + Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses, + CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness, + CodeGenOpts.DiagnosticsHotnessThreshold); + + if (Error E = OptRecordFileOrErr.takeError()) { + reportOptRecordError(std::move(E), Diagnostics, CodeGenOpts); + return; + } + std::unique_ptr<llvm::ToolOutputFile> OptRecordFile = + std::move(*OptRecordFileOrErr); - EmitBackendOutput(CI.getDiagnostics(), CI.getHeaderSearchOpts(), - CI.getCodeGenOpts(), TargetOpts, CI.getLangOpts(), + EmitBackendOutput(Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, + TargetOpts, CI.getLangOpts(), CI.getTarget().getDataLayout(), TheModule.get(), BA, std::move(OS)); + + if (OptRecordFile) + OptRecordFile->keep(); return; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp index eafe26674434..648e6d9c214a 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.cpp @@ -12,9 +12,9 @@ #include "CodeGenFunction.h" #include "CGBlocks.h" -#include "CGCleanup.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenMPRuntime.h" #include "CodeGenModule.h" @@ -22,6 +22,7 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" @@ -33,6 +34,8 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/FPEnv.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" @@ -47,13 +50,10 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, if (CGOpts.DisableLifetimeMarkers) return false; - // Disable lifetime markers in msan builds. - // FIXME: Remove this when msan works with lifetime markers. - if (LangOpts.Sanitize.has(SanitizerKind::Memory)) - return false; - - // Asan uses markers for use-after-scope checks. - if (CGOpts.SanitizeAddressUseAfterScope) + // Sanitizers may use markers. + if (CGOpts.SanitizeAddressUseAfterScope || + LangOpts.Sanitize.has(SanitizerKind::HWAddress) || + LangOpts.Sanitize.has(SanitizerKind::Memory)) return true; // For now, only in optimized builds. @@ -90,6 +90,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) FMF.setAllowReassoc(); } Builder.setFastMathFlags(FMF); + SetFPModel(); } CodeGenFunction::~CodeGenFunction() { @@ -105,6 +106,51 @@ CodeGenFunction::~CodeGenFunction() { CGM.getOpenMPRuntime().functionFinished(*this); } +// Map the LangOption for rounding mode into +// the corresponding enum in the IR. +static llvm::fp::RoundingMode ToConstrainedRoundingMD( + LangOptions::FPRoundingModeKind Kind) { + + switch (Kind) { + case LangOptions::FPR_ToNearest: return llvm::fp::rmToNearest; + case LangOptions::FPR_Downward: return llvm::fp::rmDownward; + case LangOptions::FPR_Upward: return llvm::fp::rmUpward; + case LangOptions::FPR_TowardZero: return llvm::fp::rmTowardZero; + case LangOptions::FPR_Dynamic: return llvm::fp::rmDynamic; + } + llvm_unreachable("Unsupported FP RoundingMode"); +} + +// Map the LangOption for exception behavior into +// the corresponding enum in the IR. +static llvm::fp::ExceptionBehavior ToConstrainedExceptMD( + LangOptions::FPExceptionModeKind Kind) { + + switch (Kind) { + case LangOptions::FPE_Ignore: return llvm::fp::ebIgnore; + case LangOptions::FPE_MayTrap: return llvm::fp::ebMayTrap; + case LangOptions::FPE_Strict: return llvm::fp::ebStrict; + } + llvm_unreachable("Unsupported FP Exception Behavior"); +} + +void CodeGenFunction::SetFPModel() { + auto fpRoundingMode = ToConstrainedRoundingMD( + getLangOpts().getFPRoundingMode()); + auto fpExceptionBehavior = ToConstrainedExceptMD( + getLangOpts().getFPExceptionMode()); + + if (fpExceptionBehavior == llvm::fp::ebIgnore && + fpRoundingMode == llvm::fp::rmToNearest) + // Constrained intrinsics are not used. + ; + else { + Builder.setIsFPConstrained(true); + Builder.setDefaultConstrainedRounding(fpRoundingMode); + Builder.setDefaultConstrainedExcept(fpExceptionBehavior); + } +} + CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { @@ -197,7 +243,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { #define NON_CANONICAL_TYPE(name, parent) case Type::name: #define DEPENDENT_TYPE(name, parent) case Type::name: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(name, parent) case Type::name: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("non-canonical or dependent type in IR-generation"); case Type::Auto: @@ -332,9 +378,15 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { if (HasCleanups) { // Make sure the line table doesn't jump back into the body for // the ret after it's been at EndLoc. - if (CGDebugInfo *DI = getDebugInfo()) + Optional<ApplyDebugLocation> AL; + if (CGDebugInfo *DI = getDebugInfo()) { if (OnlySimpleReturnStmts) DI->EmitLocation(Builder, EndLoc); + else + // We may not have a valid end location. Try to apply it anyway, and + // fall back to an artificial location if needed. + AL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc); + } PopCleanupBlocks(PrologueCleanupDepth); } @@ -434,13 +486,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Scan function arguments for vector width. for (llvm::Argument &A : CurFn->args()) if (auto *VT = dyn_cast<llvm::VectorType>(A.getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Update vector width based on return type. if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Add the required-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. @@ -609,6 +661,13 @@ void CodeGenFunction::markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { } } +/// Check if the return value of this function requires sanitization. +bool CodeGenFunction::requiresReturnValueCheck() const { + return requiresReturnValueNullabilityCheck() || + (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && CurCodeDecl && + CurCodeDecl->getAttr<ReturnsNonNullAttr>()); +} + static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) { auto *MD = dyn_cast_or_null<CXXMethodDecl>(D); if (!MD || !MD->getDeclName().getAsIdentifierInfo() || @@ -638,8 +697,7 @@ static llvm::Constant *getPrologueSignature(CodeGenModule &CGM, return CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM); } -void CodeGenFunction::StartFunction(GlobalDecl GD, - QualType RetTy, +void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, @@ -732,8 +790,17 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, SanOpts.Mask &= ~SanitizerKind::CFIUnrelatedCast; } - // Apply xray attributes to the function (as a string, for now) + // Ignore null checks in coroutine functions since the coroutines passes + // are not aware of how to move the extra UBSan instructions across the split + // coroutine boundaries. + if (D && SanOpts.has(SanitizerKind::Null)) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) + if (FD->getBody() && + FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass) + SanOpts.Mask &= ~SanitizerKind::Null; + if (D) { + // Apply xray attributes to the function (as a string, for now) if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has( XRayInstrKind::Function)) { @@ -752,16 +819,37 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, "xray-instruction-threshold", llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); } + + unsigned Count, Offset; + if (const auto *Attr = D->getAttr<PatchableFunctionEntryAttr>()) { + Count = Attr->getCount(); + Offset = Attr->getOffset(); + } else { + Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount; + Offset = CGM.getCodeGenOpts().PatchableFunctionEntryOffset; + } + if (Count && Offset <= Count) { + Fn->addFnAttr("patchable-function-entry", std::to_string(Count - Offset)); + if (Offset) + Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset)); + } } // Add no-jump-tables value. Fn->addFnAttr("no-jump-tables", llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables)); + // Add no-inline-line-tables value. + if (CGM.getCodeGenOpts().NoInlineLineTables) + Fn->addFnAttr("no-inline-line-tables"); + // Add profile-sample-accurate value. if (CGM.getCodeGenOpts().ProfileSampleAccurate) Fn->addFnAttr("profile-sample-accurate"); + if (D && D->hasAttr<CFICanonicalJumpTableAttr>()) + Fn->addFnAttr("cfi-canonical-jump-table"); + if (getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) @@ -811,6 +899,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (FD->isMain()) Fn->addFnAttr(llvm::Attribute::NoRecurse); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) + if (FD->usesFPIntrin()) + Fn->addFnAttr(llvm::Attribute::StrictFP); + // If a custom alignment is used, force realigning to this alignment on // any main function which certainly will need it. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) @@ -880,9 +972,30 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr("instrument-function-entry-inlined", getTarget().getMCountName()); } + if (CGM.getCodeGenOpts().MNopMCount) { + if (!CGM.getCodeGenOpts().CallFEntry) + CGM.getDiags().Report(diag::err_opt_not_valid_without_opt) + << "-mnop-mcount" << "-mfentry"; + Fn->addFnAttr("mnop-mcount"); + } + + if (CGM.getCodeGenOpts().RecordMCount) { + if (!CGM.getCodeGenOpts().CallFEntry) + CGM.getDiags().Report(diag::err_opt_not_valid_without_opt) + << "-mrecord-mcount" << "-mfentry"; + Fn->addFnAttr("mrecord-mcount"); + } } } + if (CGM.getCodeGenOpts().PackedStack) { + if (getContext().getTargetInfo().getTriple().getArch() != + llvm::Triple::systemz) + CGM.getDiags().Report(diag::err_opt_not_valid_on_target) + << "-mpacked-stack"; + Fn->addFnAttr("packed-stack"); + } + if (RetTy->isVoidType()) { // Void type; nothing to return. ReturnValue = Address::invalid(); @@ -954,7 +1067,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField); if (!LambdaThisCaptureField->getType()->isPointerType()) { // If the enclosing object was captured by value, just use its address. - CXXThisValue = ThisFieldLValue.getAddress().getPointer(); + CXXThisValue = ThisFieldLValue.getAddress(*this).getPointer(); } else { // Load the lvalue pointed to by the field, since '*this' was captured // by reference. @@ -1662,7 +1775,7 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { llvm::GlobalVariable::PrivateLinkage, NullConstant, Twine()); CharUnits NullAlign = DestPtr.getAlignment(); - NullVariable->setAlignment(NullAlign.getQuantity()); + NullVariable->setAlignment(NullAlign.getAsAlign()); Address SrcPtr(Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy()), NullAlign); @@ -1862,7 +1975,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("unexpected dependent type!"); // These types are never variably-modified. @@ -1991,18 +2104,18 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { Address CodeGenFunction::EmitVAListRef(const Expr* E) { if (getContext().getBuiltinVaListType()->isArrayType()) return EmitPointerWithAlignment(E); - return EmitLValue(E).getAddress(); + return EmitLValue(E).getAddress(*this); } Address CodeGenFunction::EmitMSVAListRef(const Expr *E) { - return EmitLValue(E).getAddress(); + return EmitLValue(E).getAddress(*this); } void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E, const APValue &Init) { assert(Init.hasValue() && "Invalid DeclRefExpr initializer!"); if (CGDebugInfo *Dbg = getDebugInfo()) - if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().hasReducedDebugInfo()) Dbg->EmitGlobalVariable(E->getDecl(), Init); } @@ -2048,24 +2161,9 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, } void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, - QualType Ty, SourceLocation Loc, - SourceLocation AssumptionLoc, - unsigned Alignment, - llvm::Value *OffsetValue) { - llvm::Value *TheCheck; - llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( - CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck); - if (SanOpts.has(SanitizerKind::Alignment)) { - llvm::Value *AlignmentVal = llvm::ConstantInt::get(IntPtrTy, Alignment); - EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, AlignmentVal, - OffsetValue, TheCheck, Assumption); - } -} - -void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, SourceLocation AssumptionLoc, - unsigned Alignment, + llvm::Value *Alignment, llvm::Value *OffsetValue) { if (auto *CE = dyn_cast<CastExpr>(E)) E = CE->getSubExprAsWritten(); @@ -2159,7 +2257,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, // Now build up the set of caller features and verify that all the required // features are there. llvm::StringMap<bool> CallerFeatureMap; - CGM.getFunctionFeatureMap(CallerFeatureMap, GlobalDecl().getWithDecl(FD)); + CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD); // If we have at least one of the features in the feature list return // true, otherwise return false. @@ -2194,7 +2292,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, // Get the current enclosing function if it exists. If it doesn't // we can't check the target features anyhow. - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl); + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl); if (!FD) return; @@ -2216,16 +2314,18 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, << TargetDecl->getDeclName() << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); - } else if (TargetDecl->hasAttr<TargetAttr>() || - TargetDecl->hasAttr<CPUSpecificAttr>()) { + } else if (!TargetDecl->isMultiVersion() && + TargetDecl->hasAttr<TargetAttr>()) { // Get the required features for the callee. const TargetAttr *TD = TargetDecl->getAttr<TargetAttr>(); - TargetAttr::ParsedTargetAttr ParsedAttr = CGM.filterFunctionTargetAttrs(TD); + ParsedTargetAttr ParsedAttr = + CGM.getContext().filterFunctionTargetAttrs(TD); SmallVector<StringRef, 1> ReqFeatures; llvm::StringMap<bool> CalleeFeatureMap; - CGM.getFunctionFeatureMap(CalleeFeatureMap, TargetDecl); + CGM.getContext().getFunctionFeatureMap(CalleeFeatureMap, + GlobalDecl(TargetDecl)); for (const auto &F : ParsedAttr.Features) { if (F[0] == '+' && CalleeFeatureMap.lookup(F.substr(1))) @@ -2292,10 +2392,7 @@ static void CreateMultiVersionResolverReturn(CodeGenModule &CGM, void CodeGenFunction::EmitMultiVersionResolver( llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { - assert((getContext().getTargetInfo().getTriple().getArch() == - llvm::Triple::x86 || - getContext().getTargetInfo().getTriple().getArch() == - llvm::Triple::x86_64) && + assert(getContext().getTargetInfo().getTriple().isX86() && "Only implemented for x86 targets"); bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h index c3060d1fb351..3d8bc93eb965 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h @@ -75,6 +75,7 @@ class ObjCAtTryStmt; class ObjCAtThrowStmt; class ObjCAtSynchronizedStmt; class ObjCAutoreleasePoolStmt; +class ReturnsNonNullAttr; namespace analyze_os_log { class OSLogBufferLayout; @@ -947,6 +948,19 @@ public: } }; + /// Save/restore original map of previously emitted local vars in case when we + /// need to duplicate emission of the same code several times in the same + /// function for OpenMP code. + class OMPLocalDeclMapRAII { + CodeGenFunction &CGF; + DeclMapTy SavedMap; + + public: + OMPLocalDeclMapRAII(CodeGenFunction &CGF) + : CGF(CGF), SavedMap(CGF.LocalDeclMap) {} + ~OMPLocalDeclMapRAII() { SavedMap.swap(CGF.LocalDeclMap); } + }; + /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. void @@ -1034,7 +1048,7 @@ public: assert(isInConditionalBranch()); llvm::BasicBlock *block = OutermostConditional->getStartingBlock(); auto store = new llvm::StoreInst(value, addr.getPointer(), &block->back()); - store->setAlignment(addr.getAlignment().getQuantity()); + store->setAlignment(addr.getAlignment().getAsAlign()); } /// An RAII object to record that we're evaluating a statement @@ -1262,7 +1276,7 @@ private: CancelExit(OpenMPDirectiveKind Kind, JumpDest ExitBlock, JumpDest ContBlock) : Kind(Kind), ExitBlock(ExitBlock), ContBlock(ContBlock) {} - OpenMPDirectiveKind Kind = OMPD_unknown; + OpenMPDirectiveKind Kind = llvm::omp::OMPD_unknown; /// true if the exit block has been emitted already by the special /// emitExit() call, false if the default codegen is used. bool HasBeenEmitted = false; @@ -1584,11 +1598,7 @@ private: Address ReturnLocation = Address::invalid(); /// Check if the return value of this function requires sanitization. - bool requiresReturnValueCheck() const { - return requiresReturnValueNullabilityCheck() || - (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && - CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()); - } + bool requiresReturnValueCheck() const; llvm::BasicBlock *TerminateLandingPad = nullptr; llvm::BasicBlock *TerminateHandler = nullptr; @@ -2829,13 +2839,8 @@ public: llvm::Value *Alignment, llvm::Value *OffsetValue = nullptr); - void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, - SourceLocation Loc, SourceLocation AssumptionLoc, - unsigned Alignment, - llvm::Value *OffsetValue = nullptr); - void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, - SourceLocation AssumptionLoc, unsigned Alignment, + SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue = nullptr); //===--------------------------------------------------------------------===// @@ -3137,6 +3142,7 @@ public: void EmitOMPParallelForDirective(const OMPParallelForDirective &S); void EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &S); void EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective &S); + void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S); void EmitOMPTaskDirective(const OMPTaskDirective &S); void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S); void EmitOMPBarrierDirective(const OMPBarrierDirective &S); @@ -3160,6 +3166,13 @@ public: void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); + void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S); + void + EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S); + void EmitOMPParallelMasterTaskLoopDirective( + const OMPParallelMasterTaskLoopDirective &S); + void EmitOMPParallelMasterTaskLoopSimdDirective( + const OMPParallelMasterTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); @@ -3718,6 +3731,11 @@ public: /// Emit IR for __builtin_os_log_format. RValue emitBuiltinOSLogFormat(const CallExpr &E); + /// Emit IR for __builtin_is_aligned. + RValue EmitBuiltinIsAligned(const CallExpr *E); + /// Emit IR for __builtin_align_up/__builtin_align_down. + RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp); + llvm::Function *generateBuiltinOSLogHelperFunction( const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment); @@ -3726,14 +3744,19 @@ public: /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call /// is unhandled by the current target. - llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue); llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name = ""); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch); + llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, + llvm::Triple::ArchType Arch); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, @@ -3760,6 +3783,7 @@ public: llvm::Value *vectorWrapScalar16(llvm::Value *Op); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch); + llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); @@ -4148,6 +4172,9 @@ public: /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); + /// SetFPModel - Control floating point behavior via fp-model settings. + void SetFPModel(); + private: llvm::MDNode *getRangeForLoadFromType(QualType Ty); void EmitReturnOfRValue(RValue RV, QualType Ty); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp index 1fd4e4cf8b8f..f8866ac4f7f6 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp @@ -47,6 +47,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -55,6 +56,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" @@ -72,6 +74,7 @@ static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getTarget().getCXXABI().getKind()) { + case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: @@ -216,6 +219,14 @@ void CodeGenModule::createOpenMPRuntime() { OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } + + // The OpenMP-IR-Builder should eventually replace the above runtime codegens + // but we are not there yet so they both reside in CGModule for now and the + // OpenMP-IR-Builder is opt-in only. + if (LangOpts.OpenMPIRBuilder) { + OMPBuilder.reset(new llvm::OpenMPIRBuilder(TheModule)); + OMPBuilder->initialize(); + } } void CodeGenModule::createCUDARuntime() { @@ -414,12 +425,7 @@ void CodeGenModule::Release() { OpenMPRuntime->emitRequiresDirectiveRegFun()) { AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0); } - if (llvm::Function *OpenMPRegistrationFunction = - OpenMPRuntime->emitRegistrationFunction()) { - auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? - OpenMPRegistrationFunction : nullptr; - AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); - } + OpenMPRuntime->createOffloadEntriesAndInfoMetadata(); OpenMPRuntime->clear(); } if (PGOReader) { @@ -474,9 +480,7 @@ void CodeGenModule::Release() { CodeGenOpts.NumRegisterParameters); if (CodeGenOpts.DwarfVersion) { - // We actually want the latest version when there are conflicts. - // We can change from Warning to Latest if such mode is supported. - getModule().addModuleFlag(llvm::Module::Warning, "Dwarf Version", + getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version", CodeGenOpts.DwarfVersion); } if (CodeGenOpts.EmitCodeView) { @@ -487,8 +491,11 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1); } if (CodeGenOpts.ControlFlowGuard) { - // We want function ID tables for Control Flow Guard. - getModule().addModuleFlag(llvm::Module::Warning, "cfguardtable", 1); + // Function ID tables and checks for Control Flow Guard (cfguard=2). + getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 2); + } else if (CodeGenOpts.ControlFlowGuardNoChecks) { + // Function ID tables for Control Flow Guard (cfguard=1). + getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1); } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers @@ -530,11 +537,24 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); } + if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) { + StringRef ABIStr = Target.getABI(); + llvm::LLVMContext &Ctx = TheModule.getContext(); + getModule().addModuleFlag(llvm::Module::Error, "target-abi", + llvm::MDString::get(Ctx, ABIStr)); + } + if (CodeGenOpts.SanitizeCfiCrossDso) { // Indicate that we want cross-DSO control flow integrity checks. getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } + if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) { + getModule().addModuleFlag(llvm::Module::Override, + "CFI Canonical Jump Tables", + CodeGenOpts.SanitizeCfiCanonicalJumpTables); + } + if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. @@ -812,7 +832,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, const auto &CGOpts = CGM.getCodeGenOpts(); llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); - if (RM != llvm::Reloc::Static && !LOpts.PIE && !LOpts.OpenMPIsDevice) + if (RM != llvm::Reloc::Static && !LOpts.PIE) return false; // A definition cannot be preempted from an executable. @@ -953,7 +973,7 @@ static void AppendTargetMangling(const CodeGenModule &CGM, Out << '.'; const TargetInfo &Target = CGM.getTarget(); - TargetAttr::ParsedTargetAttr Info = + ParsedTargetAttr Info = Attr->parse([&Target](StringRef LHS, StringRef RHS) { // Multiversioning doesn't allow "no-${feature}", so we can // only have "+" prefixes here. @@ -1176,7 +1196,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. - list->setAlignment(0); + list->setAlignment(llvm::None); Fns.clear(); } @@ -1510,16 +1530,15 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; // We can't add optnone in the following cases, it won't pass the verifier. ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>(); - ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>(); - if (ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) { + // Add optnone, but do so only if the function isn't always_inline. + if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) && + !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. B.addAttribute(llvm::Attribute::NoInline); - assert(!F->hasFnAttribute(llvm::Attribute::AlwaysInline) && - "OptimizeNone and AlwaysInline on same function!"); // We still need to handle naked functions even though optnone subsumes // much of their semantics. @@ -1535,7 +1554,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr<NoDuplicateAttr>()) { B.addAttribute(llvm::Attribute::NoDuplicate); - } else if (D->hasAttr<NoInlineAttr>()) { + } else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { + // Add noinline if the function isn't always_inline. B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr<AlwaysInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::NoInline)) { @@ -1590,11 +1610,11 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) - F->setAlignment(alignment); + F->setAlignment(llvm::Align(alignment)); if (!D->hasAttr<AlignedAttr>()) if (LangOpts.FunctionAlignment) - F->setAlignment(1 << LangOpts.FunctionAlignment); + F->setAlignment(llvm::Align(1ull << LangOpts.FunctionAlignment)); // Some C++ ABIs require 2-byte alignment for member functions, in order to // reserve a bit for differentiating between virtual and non-virtual member @@ -1602,13 +1622,20 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { if (F->getAlignment() < 2 && isa<CXXMethodDecl>(D)) - F->setAlignment(2); + F->setAlignment(llvm::Align(2)); } - // In the cross-dso CFI mode, we want !type attributes on definitions only. - if (CodeGenOpts.SanitizeCfiCrossDso) - if (auto *FD = dyn_cast<FunctionDecl>(D)) - CreateFunctionTypeMetadataForIcall(FD, F); + // In the cross-dso CFI mode with canonical jump tables, we want !type + // attributes on definitions only. + if (CodeGenOpts.SanitizeCfiCrossDso && + CodeGenOpts.SanitizeCfiCanonicalJumpTables) { + if (auto *FD = dyn_cast<FunctionDecl>(D)) { + // Skip available_externally functions. They won't be codegen'ed in the + // current module anyway. + if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) + CreateFunctionTypeMetadataForIcall(FD, F); + } + } // Emit type metadata on member functions for member function pointer checks. // These are only ever necessary on definitions; we're guaranteed that the @@ -1656,7 +1683,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, bool AddedAttr = false; if (TD || SD) { llvm::StringMap<bool> FeatureMap; - getFunctionFeatureMap(FeatureMap, GD); + getContext().getFunctionFeatureMap(FeatureMap, GD); // Produce the canonical string for this set of features. for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap) @@ -1667,7 +1694,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // get and parse the target attribute so we can get the cpu for // the function. if (TD) { - TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); + ParsedTargetAttr ParsedAttr = TD->parse(); if (ParsedAttr.Architecture != "" && getTarget().isValidCPUName(ParsedAttr.Architecture)) TargetCPU = ParsedAttr.Architecture; @@ -1704,6 +1731,8 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, GV->addAttribute("data-section", SA->getName()); if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>()) GV->addAttribute("rodata-section", SA->getName()); + if (auto *SA = D->getAttr<PragmaClangRelroSectionAttr>()) + GV->addAttribute("relro-section", SA->getName()); } if (auto *F = dyn_cast<llvm::Function>(GO)) { @@ -1765,14 +1794,6 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic()) return; - // Additionally, if building with cross-DSO support... - if (CodeGenOpts.SanitizeCfiCrossDso) { - // Skip available_externally functions. They won't be codegen'ed in the - // current module anyway. - if (getContext().GetGVALinkageForFunction(FD) == GVA_AvailableExternally) - return; - } - llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); @@ -1826,6 +1847,11 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, else if (const auto *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); + if (FD->isInlineBuiltinDeclaration()) { + F->addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoBuiltin); + } + if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. @@ -1849,8 +1875,11 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't emit entries for function declarations in the cross-DSO mode. This - // is handled with better precision by the receiving DSO. - if (!CodeGenOpts.SanitizeCfiCrossDso) + // is handled with better precision by the receiving DSO. But if jump tables + // are non-canonical then we need type metadata in order to produce the local + // jump table. + if (!CodeGenOpts.SanitizeCfiCrossDso || + !CodeGenOpts.SanitizeCfiCanonicalJumpTables) CreateFunctionTypeMetadataForIcall(FD, F); if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) @@ -1925,6 +1954,8 @@ void CodeGenModule::AppendLinkerOptions(StringRef Opts) { void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) { llvm::SmallString<32> Opt; getTargetCodeGenInfo().getDetectMismatchOption(Name, Value, Opt); + if (Opt.empty()) + return; auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } @@ -2114,6 +2145,10 @@ void CodeGenModule::EmitDeferred() { if (!GV->isDeclaration()) continue; + // If this is OpenMP, check if it is legal to emit this global normally. + if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D)) + continue; + // Otherwise, emit the definition and move on to the next one. EmitGlobalDefinition(D, GV); @@ -2198,9 +2233,15 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, *UnitGV = EmitAnnotationUnit(L), *LineNoCst = EmitAnnotationLineNo(L); + llvm::Constant *ASZeroGV = GV; + if (GV->getAddressSpace() != 0) { + ASZeroGV = llvm::ConstantExpr::getAddrSpaceCast( + GV, GV->getValueType()->getPointerTo(0)); + } + // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[4] = { - llvm::ConstantExpr::getBitCast(GV, Int8PtrTy), + llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy), llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), LineNoCst @@ -2310,11 +2351,20 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { } bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { - if (const auto *FD = dyn_cast<FunctionDecl>(Global)) + if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) // Implicit template instantiations may change linkage if they are later // explicitly instantiated, so they should not be emitted eagerly. return false; + // In OpenMP 5.0 function may be marked as device_type(nohost) and we should + // not emit them eagerly unless we sure that the function must be emitted on + // the host. + if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd && + !LangOpts.OpenMPIsDevice && + !OMPDeclareTargetDeclAttr::getDeviceType(FD) && + !FD->isUsed(/*CheckUsedAttr=*/false) && !FD->isReferenced()) + return false; + } if (const auto *VD = dyn_cast<VarDecl>(Global)) if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::WeakUnknown) @@ -2437,8 +2487,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } if (LangOpts.OpenMP) { - // If this is OpenMP device, check if it is legal to emit this global - // normally. + // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) return; if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) { @@ -2512,6 +2561,11 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { return; } + // Check if this must be emitted as declare variant. + if (LangOpts.OpenMP && isa<FunctionDecl>(Global) && OpenMPRuntime && + OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/false)) + return; + // If we're deferring emission of a C++ variable with an // initializer, remember the order in which it appeared in the file. if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) && @@ -2717,6 +2771,50 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, EmitGlobalFunctionDefinition(GD, GV); } +void CodeGenModule::emitOpenMPDeviceFunctionRedefinition( + GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) { + assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && + OpenMPRuntime && "Expected OpenMP device mode."); + const auto *D = cast<FunctionDecl>(OldGD.getDecl()); + + // Compute the function info and LLVM type. + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + + // Get or create the prototype for the function. + if (!GV || (GV->getType()->getElementType() != Ty)) { + GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction( + getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false, + /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), + ForDefinition)); + SetFunctionAttributes(OldGD, cast<llvm::Function>(GV), + /*IsIncompleteFunction=*/false, + /*IsThunk=*/false); + } + // We need to set linkage and visibility on the function before + // generating code for it because various parts of IR generation + // want to propagate this information down (e.g. to local static + // declarations). + auto *Fn = cast<llvm::Function>(GV); + setFunctionLinkage(OldGD, Fn); + + // FIXME: this is redundant with part of + // setFunctionDefinitionAttributes + setGVProperties(Fn, OldGD); + + MaybeHandleStaticInExternC(D, Fn); + + maybeSetTrivialComdat(*D, *Fn); + + CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI); + + setNonAliasAttributes(OldGD, Fn); + SetLLVMFunctionAttributesForDefinition(D, Fn); + + if (D->hasAttr<AnnotateAttr>()) + AddGlobalAnnotations(D, Fn); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -2816,11 +2914,13 @@ void CodeGenModule::emitMultiVersionFunctions() { llvm::Function *ResolverFunc; const TargetInfo &TI = getTarget(); - if (TI.supportsIFunc() || FD->isTargetMultiVersion()) + if (TI.supportsIFunc() || FD->isTargetMultiVersion()) { ResolverFunc = cast<llvm::Function>( GetGlobalValue((getMangledName(GD) + ".resolver").str())); - else + ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); + } else { ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD))); + } if (supportsCOMDAT()) ResolverFunc->setComdat( @@ -2864,6 +2964,10 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction( ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); + ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); + if (supportsCOMDAT()) + ResolverFunc->setComdat( + getModule().getOrInsertComdat(ResolverFunc->getName())); SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; const TargetInfo &Target = getTarget(); @@ -2928,6 +3032,21 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); + + if (getTarget().supportsIFunc()) { + std::string AliasName = getMangledNameImpl( + *this, GD, FD, /*OmitMultiVersionMangling=*/true); + llvm::Constant *AliasFunc = GetGlobalValue(AliasName); + if (!AliasFunc) { + auto *IFunc = cast<llvm::GlobalIFunc>(GetOrCreateLLVMFunction( + AliasName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true, + /*IsThunk=*/false, llvm::AttributeList(), NotForDefinition)); + auto *GA = llvm::GlobalAlias::create( + DeclTy, 0, getFunctionLinkage(GD), AliasName, IFunc, &getModule()); + GA->setLinkage(llvm::Function::WeakODRLinkage); + SetCommonAttributes(GD, GA); + } + } } /// If a dispatcher for the specified mangled name is not in the module, create @@ -2964,7 +3083,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + DeclTy, 0, llvm::Function::WeakODRLinkage, "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); @@ -3010,6 +3129,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( EmitGlobal(GDDef); } } + // Check if this must be emitted as declare variant and emit reference to + // the the declare variant function. + if (LangOpts.OpenMP && OpenMPRuntime) + (void)OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true); if (FD->isMultiVersion()) { const auto *TA = FD->getAttr<TargetAttr>(); @@ -3241,8 +3364,14 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// type and name. llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeList ExtraAttrs, - bool Local) { + llvm::AttributeList ExtraAttrs, bool Local, + bool AssumeConvergent) { + if (AssumeConvergent) { + ExtraAttrs = + ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex, + llvm::Attribute::Convergent); + } + llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, @@ -3398,7 +3527,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // handling. GV->setConstant(isTypeConstant(D->getType(), false)); - GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); + GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); setLinkageForGV(GV, D); @@ -3455,7 +3584,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // Make a new global with the correct type, this is now guaranteed // to work. auto *NewGV = cast<llvm::GlobalVariable>( - GetAddrOfGlobalVar(D, InitType, IsForDefinition)); + GetAddrOfGlobalVar(D, InitType, IsForDefinition) + ->stripPointerCasts()); // Erase the old global, since it is no longer used. GV->eraseFromParent(); @@ -3472,6 +3602,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, } } + if (GV->isDeclaration()) + getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); + LangAS ExpectedAS = D ? D->getType().getAddressSpace() : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); @@ -3481,9 +3614,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, ExpectedAS, Ty); - if (GV->isDeclaration()) - getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); - return GV; } @@ -3548,7 +3678,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); - GV->setAlignment(Alignment); + GV->setAlignment(llvm::MaybeAlign(Alignment)); return GV; } @@ -3612,6 +3742,10 @@ void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { EmitGlobalVarDefinition(D); } +void CodeGenModule::EmitExternalDeclaration(const VarDecl *D) { + EmitExternalVarDeclaration(D); +} + CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { return Context.toCharUnitsFromBits( getDataLayout().getTypeStoreSizeInBits(Ty)); @@ -3768,9 +3902,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, return; llvm::Constant *Init = nullptr; - CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); bool NeedsGlobalCtor = false; - bool NeedsGlobalDtor = RD && !RD->hasTrivialDestructor(); + bool NeedsGlobalDtor = + D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); @@ -3837,14 +3971,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)); - // Strip off a bitcast if we got one back. - if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) { - assert(CE->getOpcode() == llvm::Instruction::BitCast || - CE->getOpcode() == llvm::Instruction::AddrSpaceCast || - // All zero index gep. - CE->getOpcode() == llvm::Instruction::GetElementPtr); - Entry = CE->getOperand(0); - } + // Strip off pointer casts if we got them. + Entry = Entry->stripPointerCasts(); // Entry is now either a Function or GlobalVariable. auto *GV = dyn_cast<llvm::GlobalVariable>(Entry); @@ -3867,7 +3995,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Make a new global with the correct type, this is now guaranteed to work. GV = cast<llvm::GlobalVariable>( - GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative))); + GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)) + ->stripPointerCasts()); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = @@ -3944,8 +4073,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, GV->setConstant(true); } - GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); - + GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); // On Darwin, if the normal linkage of a C++ thread_local variable is // LinkOnce or Weak, we keep the normal linkage to prevent multiple @@ -3997,10 +4125,23 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) + if (getCodeGenOpts().hasReducedDebugInfo()) DI->EmitGlobalVariable(GV, D); } +void CodeGenModule::EmitExternalVarDeclaration(const VarDecl *D) { + if (CGDebugInfo *DI = getModuleDebugInfo()) + if (getCodeGenOpts().hasReducedDebugInfo()) { + QualType ASTTy = D->getType(); + llvm::Type *Ty = getTypes().ConvertTypeForMem(D->getType()); + llvm::PointerType *PTy = + llvm::PointerType::get(Ty, getContext().getTargetAddressSpace(ASTTy)); + llvm::Constant *GV = GetOrCreateLLVMGlobal(D->getName(), PTy, D); + DI->EmitExternalVariable( + cast<llvm::GlobalVariable>(GV->stripPointerCasts()), D); + } +} + static bool isVarDeclStrongDefinition(const ASTContext &Context, CodeGenModule &CGM, const VarDecl *D, bool NoCommon) { @@ -4025,6 +4166,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, // If no specialized section name is applicable, it will resort to default. if (D->hasAttr<PragmaClangBSSSectionAttr>() || D->hasAttr<PragmaClangDataSectionAttr>() || + D->hasAttr<PragmaClangRelroSectionAttr>() || D->hasAttr<PragmaClangRodataSectionAttr>()) return true; @@ -4286,6 +4428,11 @@ void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { + // Check if this must be emitted as declare variant. + if (LangOpts.OpenMP && OpenMPRuntime && + OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true)) + return; + const auto *D = cast<FunctionDecl>(GD.getDecl()); // Compute the function info and LLVM type. @@ -4639,7 +4786,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { // of the string is via this class initializer. CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) : Context.getTypeAlignInChars(Context.CharTy); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during @@ -4674,7 +4821,10 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { } Fields.addInt(LengthTy, StringLength); - CharUnits Alignment = getPointerAlign(); + // Swift ABI requires 8-byte alignment to ensure that the _Atomic(uint64_t) is + // properly aligned on 32-bit platforms. + CharUnits Alignment = + IsSwiftABI ? Context.toCharUnitsFromBits(64) : getPointerAlign(); // The struct. GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, @@ -4714,7 +4864,7 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { Context.getPointerType(Context.getObjCIdType()), Context.getPointerType(Context.UnsignedLongTy), Context.getConstantArrayType(Context.UnsignedLongTy, - llvm::APInt(32, 5), ArrayType::Normal, 0) + llvm::APInt(32, 5), nullptr, ArrayType::Normal, 0) }; for (size_t i = 0; i < 4; ++i) { @@ -4789,7 +4939,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, auto *GV = new llvm::GlobalVariable( M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); - GV->setAlignment(Alignment.getQuantity()); + GV->setAlignment(Alignment.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (GV->isWeakForLinker()) { assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); @@ -4813,7 +4963,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) - GV->setAlignment(Alignment.getQuantity()); + GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } @@ -4876,7 +5026,7 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString( Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) - GV->setAlignment(Alignment.getQuantity()); + GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } @@ -4904,7 +5054,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( // If we're not materializing a subobject of the temporary, keep the // cv-qualifiers from the type of the MaterializeTemporaryExpr. QualType MaterializedType = Init->getType(); - if (Init == E->GetTemporaryExpr()) + if (Init == E->getSubExpr()) MaterializedType = E->getType(); CharUnits Align = getContext().getTypeAlignInChars(MaterializedType); @@ -4921,14 +5071,13 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( VD, E->getManglingNumber(), Out); APValue *Value = nullptr; - if (E->getStorageDuration() == SD_Static) { - // We might have a cached constant initializer for this temporary. Note - // that this might have a different value from the value computed by - // evaluating the initializer if the surrounding constant expression - // modifies the temporary. - Value = getContext().getMaterializedTemporaryValue(E, false); - if (Value && Value->isAbsent()) - Value = nullptr; + if (E->getStorageDuration() == SD_Static && VD && VD->evaluateValue()) { + // If the initializer of the extending declaration is a constant + // initializer, we should have a cached constant initializer for this + // temporary. Note that this might have a different value from the value + // computed by evaluating the initializer if the surrounding constant + // expression modifies the temporary. + Value = E->getOrCreateValue(false); } // Try evaluating it now, it might have a constant initializer. @@ -4979,7 +5128,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); setGVProperties(GV, VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) @@ -5008,11 +5157,12 @@ void CodeGenModule::EmitObjCPropertyImplementations(const // we want, that just indicates if the decl came from a // property. What we want to know is if the method is defined in // this implementation. - if (!D->getInstanceMethod(PD->getGetterName())) + auto *Getter = PID->getGetterMethodDecl(); + if (!Getter || Getter->isSynthesizedAccessorStub()) CodeGenFunction(*this).GenerateObjCGetter( - const_cast<ObjCImplementationDecl *>(D), PID); - if (!PD->isReadOnly() && - !D->getInstanceMethod(PD->getSetterName())) + const_cast<ObjCImplementationDecl *>(D), PID); + auto *Setter = PID->getSetterMethodDecl(); + if (!PD->isReadOnly() && (!Setter || Setter->isSynthesizedAccessorStub())) CodeGenFunction(*this).GenerateObjCSetter( const_cast<ObjCImplementationDecl *>(D), PID); } @@ -5049,12 +5199,13 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { if (needsDestructMethod(D)) { IdentifierInfo *II = &getContext().Idents.get(".cxx_destruct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); - ObjCMethodDecl *DTORMethod = - ObjCMethodDecl::Create(getContext(), D->getLocation(), D->getLocation(), - cxxSelector, getContext().VoidTy, nullptr, D, - /*isInstance=*/true, /*isVariadic=*/false, - /*isPropertyAccessor=*/true, /*isImplicitlyDeclared=*/true, - /*isDefined=*/false, ObjCMethodDecl::Required); + ObjCMethodDecl *DTORMethod = ObjCMethodDecl::Create( + getContext(), D->getLocation(), D->getLocation(), cxxSelector, + getContext().VoidTy, nullptr, D, + /*isInstance=*/true, /*isVariadic=*/false, + /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, + /*isImplicitlyDeclared=*/true, + /*isDefined=*/false, ObjCMethodDecl::Required); D->addInstanceMethod(DTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, DTORMethod, false); D->setHasDestructors(true); @@ -5069,17 +5220,13 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); // The constructor returns 'self'. - ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create(getContext(), - D->getLocation(), - D->getLocation(), - cxxSelector, - getContext().getObjCIdType(), - nullptr, D, /*isInstance=*/true, - /*isVariadic=*/false, - /*isPropertyAccessor=*/true, - /*isImplicitlyDeclared=*/true, - /*isDefined=*/false, - ObjCMethodDecl::Required); + ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create( + getContext(), D->getLocation(), D->getLocation(), cxxSelector, + getContext().getObjCIdType(), nullptr, D, /*isInstance=*/true, + /*isVariadic=*/false, + /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, + /*isImplicitlyDeclared=*/true, + /*isDefined=*/false, ObjCMethodDecl::Required); D->addInstanceMethod(CTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, CTORMethod, true); D->setHasNonZeroConstructors(true); @@ -5231,7 +5378,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { ObjCRuntime->GenerateClass(OMD); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) + if (getCodeGenOpts().hasReducedDebugInfo()) DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType( OMD->getClassInterface()), OMD->getLocation()); break; @@ -5755,61 +5902,9 @@ void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable, } } -TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const TargetAttr *TD) { - assert(TD != nullptr); - TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - - ParsedAttr.Features.erase( - llvm::remove_if(ParsedAttr.Features, - [&](const std::string &Feat) { - return !Target.isValidFeatureName( - StringRef{Feat}.substr(1)); - }), - ParsedAttr.Features.end()); - return ParsedAttr; -} - - -// Fills in the supplied string map with the set of target features for the -// passed in function. -void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, - GlobalDecl GD) { - StringRef TargetCPU = Target.getTargetOpts().CPU; - const FunctionDecl *FD = GD.getDecl()->getAsFunction(); - if (const auto *TD = FD->getAttr<TargetAttr>()) { - TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD); - - // Make a copy of the features as passed on the command line into the - // beginning of the additional features from the function to override. - ParsedAttr.Features.insert(ParsedAttr.Features.begin(), - Target.getTargetOpts().FeaturesAsWritten.begin(), - Target.getTargetOpts().FeaturesAsWritten.end()); - - if (ParsedAttr.Architecture != "" && - Target.isValidCPUName(ParsedAttr.Architecture)) - TargetCPU = ParsedAttr.Architecture; - - // Now populate the feature map, first with the TargetCPU which is either - // the default or a new one from the target attribute string. Then we'll use - // the passed in features (FeaturesAsWritten) along with the new ones from - // the attribute. - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, - ParsedAttr.Features); - } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) { - llvm::SmallVector<StringRef, 32> FeaturesTmp; - Target.getCPUSpecificCPUDispatchFeatures( - SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp); - std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end()); - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); - } else { - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, - Target.getTargetOpts().Features); - } -} - llvm::SanitizerStatReport &CodeGenModule::getSanStats() { if (!SanStats) - SanStats = llvm::make_unique<llvm::SanitizerStatReport>(&getModule()); + SanStats = std::make_unique<llvm::SanitizerStatReport>(&getModule()); return *SanStats; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h index 95964afed4ec..115e754bb392 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.h @@ -17,7 +17,6 @@ #include "CodeGenTypeCache.h" #include "CodeGenTypes.h" #include "SanitizerMetadata.h" -#include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" @@ -45,6 +44,7 @@ class GlobalValue; class DataLayout; class FunctionType; class LLVMContext; +class OpenMPIRBuilder; class IndexedInstrProfReader; } @@ -77,6 +77,9 @@ class AnnotateAttr; class CXXDestructorDecl; class Module; class CoverageSourceInfo; +class TargetAttr; +class InitSegAttr; +struct ParsedTargetAttr; namespace CodeGen { @@ -319,6 +322,7 @@ private: std::unique_ptr<CGObjCRuntime> ObjCRuntime; std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime; std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime; + std::unique_ptr<llvm::OpenMPIRBuilder> OMPBuilder; std::unique_ptr<CGCUDARuntime> CUDARuntime; std::unique_ptr<CGDebugInfo> DebugInfo; std::unique_ptr<ObjCEntrypoints> ObjCData; @@ -523,18 +527,18 @@ private: int GlobalUniqueCount; } Block; + GlobalDecl initializedGlobalDecl; + + /// @} + /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>) llvm::Function *LifetimeStartFn = nullptr; /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>) llvm::Function *LifetimeEndFn = nullptr; - GlobalDecl initializedGlobalDecl; - std::unique_ptr<SanitizerMetadata> SanitizerMD; - /// @} - llvm::MapVector<const Decl *, bool> DeferredEmptyCoverageMappingDecls; std::unique_ptr<CoverageMappingModuleGen> CoverageMapping; @@ -585,6 +589,9 @@ public: return *OpenMPRuntime; } + /// Return a pointer to the configured OpenMPIRBuilder, if any. + llvm::OpenMPIRBuilder *getOpenMPIRBuilder() { return OMPBuilder.get(); } + /// Return a reference to the configured CUDA runtime. CGCUDARuntime &getCUDARuntime() { assert(CUDARuntime != nullptr); @@ -1027,11 +1034,22 @@ public: } /// Create or return a runtime function declaration with the specified type - /// and name. + /// and name. If \p AssumeConvergent is true, the call will have the + /// convergent attribute added. llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs = llvm::AttributeList(), - bool Local = false); + bool Local = false, bool AssumeConvergent = false); + + /// Create or return a runtime function declaration with the specified type + /// and name. This will automatically add the convergent attribute to the + /// function declaration. + llvm::FunctionCallee CreateConvergentRuntimeFunction( + llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + bool Local = false) { + return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true); + } /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, @@ -1139,19 +1157,13 @@ public: /// It's up to you to ensure that this is safe. void AddDefaultFnAttrs(llvm::Function &F); - /// Parses the target attributes passed in, and returns only the ones that are - /// valid feature names. - TargetAttr::ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD); - - // Fills in the supplied string map with the set of target features for the - // passed in function. - void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, GlobalDecl GD); - StringRef getMangledName(GlobalDecl GD); StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD); void EmitTentativeDefinition(const VarDecl *D); + void EmitExternalDeclaration(const VarDecl *D); + void EmitVTable(CXXRecordDecl *Class); void RefreshTypeCacheForClass(const CXXRecordDecl *Class); @@ -1270,13 +1282,26 @@ public: /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); + /// Emits the definition of \p OldGD function with body from \p NewGD. + /// Required for proper handling of declare variant directive on the GPU. + void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD, + llvm::GlobalValue *GV); + /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which + /// a virtual function call could be made which ends up being dispatched to a + /// member function of this class. This scope can be wider than the visibility + /// of the class itself when the class has a more-visible dynamic base class. + llvm::GlobalObject::VCallVisibility + GetVCallVisibilityLevel(const CXXRecordDecl *RD); + /// Emit type metadata for the given vtable using the given layout. - void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, + void EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout); /// Generate a cross-DSO type identifier for MD. @@ -1374,6 +1399,7 @@ private: void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); + void EmitExternalVarDeclaration(const VarDecl *D); void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); void emitCPUDispatchDefinition(GlobalDecl GD); diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp index d10a321dc3d7..bad796bf92dc 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp @@ -17,6 +17,7 @@ #include "clang/AST/StmtVisitor.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" @@ -166,7 +167,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { bool TraverseBlockExpr(BlockExpr *BE) { return true; } bool TraverseLambdaExpr(LambdaExpr *LE) { // Traverse the captures, but not the body. - for (const auto &C : zip(LE->captures(), LE->capture_inits())) + for (auto C : zip(LE->captures(), LE->capture_inits())) TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C)); return true; } @@ -980,7 +981,7 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, return; } ProfRecord = - llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get())); + std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get())); RegionCounts = ProfRecord->Counts; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.h index 2e740f789243..a3778b549910 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.h +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.h @@ -41,8 +41,8 @@ private: public: CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0), - CurrentRegionCount(0) {} + : CGM(CGM), FuncNameVar(nullptr), NumValueSites({{0}}), + NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTBAA.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTBAA.cpp index 09de9591de7e..7d730cb1ed15 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -78,17 +78,18 @@ llvm::MDNode *CodeGenTBAA::getChar() { static bool TypeHasMayAlias(QualType QTy) { // Tagged types have declarations, and therefore may have attributes. - if (const TagType *TTy = dyn_cast<TagType>(QTy)) - return TTy->getDecl()->hasAttr<MayAliasAttr>(); + if (auto *TD = QTy->getAsTagDecl()) + if (TD->hasAttr<MayAliasAttr>()) + return true; - // Typedef types have declarations, and therefore may have attributes. - if (const TypedefType *TTy = dyn_cast<TypedefType>(QTy)) { - if (TTy->getDecl()->hasAttr<MayAliasAttr>()) + // Also look for may_alias as a declaration attribute on a typedef. + // FIXME: We should follow GCC and model may_alias as a type attribute + // rather than as a declaration attribute. + while (auto *TT = QTy->getAs<TypedefType>()) { + if (TT->getDecl()->hasAttr<MayAliasAttr>()) return true; - // Also, their underlying types may have relevant attributes. - return TypeHasMayAlias(TTy->desugar()); + QTy = TT->desugar(); } - return false; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp index 79b29b3d916f..a458811d7a30 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp @@ -135,8 +135,8 @@ isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT, // the class. if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { for (const auto &I : CRD->bases()) - if (!isSafeToConvert(I.getType()->getAs<RecordType>()->getDecl(), - CGT, AlreadyChecked)) + if (!isSafeToConvert(I.getType()->castAs<RecordType>()->getDecl(), CGT, + AlreadyChecked)) return false; } @@ -402,7 +402,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical or dependent types aren't possible."); case Type::Builtin: { @@ -512,6 +512,22 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty); break; + // TODO: real CodeGen support for SVE types requires more infrastructure + // to be added first. Report an error until then. +#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" + { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "cannot yet generate code for SVE type '%0'"); + auto *BT = cast<BuiltinType>(Ty); + auto Name = BT->getName(CGM.getContext().getPrintingPolicy()); + CGM.getDiags().Report(DiagID) << Name; + // Return something safe. + ResultType = llvm::IntegerType::get(getLLVMContext(), 32); + break; + } + case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ @@ -728,8 +744,7 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { for (const auto &I : CRD->bases()) { if (I.isVirtual()) continue; - - ConvertRecordDeclType(I.getType()->getAs<RecordType>()->getDecl()); + ConvertRecordDeclType(I.getType()->castAs<RecordType>()->getDecl()); } } diff --git a/contrib/llvm-project/clang/lib/CodeGen/ConstantEmitter.h b/contrib/llvm-project/clang/lib/CodeGen/ConstantEmitter.h index 59a19730f4eb..121acbac4fa9 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ConstantEmitter.h +++ b/contrib/llvm-project/clang/lib/CodeGen/ConstantEmitter.h @@ -23,7 +23,7 @@ namespace CodeGen { class ConstantEmitter { public: CodeGenModule &CGM; - CodeGenFunction *CGF; + CodeGenFunction *const CGF; private: bool Abstract = false; diff --git a/contrib/llvm-project/clang/lib/CodeGen/ConstantInitBuilder.cpp b/contrib/llvm-project/clang/lib/CodeGen/ConstantInitBuilder.cpp index 40b1607b5626..2d63d88020be 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ConstantInitBuilder.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ConstantInitBuilder.cpp @@ -79,7 +79,7 @@ ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer, /*insert before*/ nullptr, llvm::GlobalValue::NotThreadLocal, addressSpace); - GV->setAlignment(alignment.getQuantity()); + GV->setAlignment(alignment.getAsAlign()); resolveSelfReferences(GV); return GV; } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp index 6d18027f16a8..bdecff39c88f 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1114,8 +1114,8 @@ struct CounterCoverageMappingBuilder // Make a region for the body of the switch. If the body starts with // a case, that case will reuse this region; otherwise, this covers // the unreachable code at the beginning of the switch body. - size_t Index = - pushRegion(Counter::getZero(), getStart(CS->body_front())); + size_t Index = pushRegion(Counter::getZero(), getStart(CS)); + getRegion().setGap(true); for (const auto *Child : CS->children()) Visit(Child); @@ -1442,7 +1442,7 @@ void CoverageMappingModuleGen::emit() { CovDataVal, llvm::getCoverageMappingVarName()); CovData->setSection(getCoverageSection(CGM)); - CovData->setAlignment(8); + CovData->setAlignment(llvm::Align(8)); // Make sure the data doesn't get deleted. CGM.addUsedGlobal(CovData); diff --git a/contrib/llvm-project/clang/lib/CodeGen/EHScopeStack.h b/contrib/llvm-project/clang/lib/CodeGen/EHScopeStack.h index 3b0db35d982b..0ed67aabcd62 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/EHScopeStack.h +++ b/contrib/llvm-project/clang/lib/CodeGen/EHScopeStack.h @@ -199,14 +199,14 @@ public: SavedTuple Saved; template <std::size_t... Is> - T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) { + T restore(CodeGenFunction &CGF, std::index_sequence<Is...>) { // It's important that the restores are emitted in order. The braced init // list guarantees that. return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...}; } void Emit(CodeGenFunction &CGF, Flags flags) override { - restore(CGF, llvm::index_sequence_for<As...>()).Emit(CGF, flags); + restore(CGF, std::index_sequence_for<As...>()).Emit(CGF, flags); } public: diff --git a/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp index 51a2561a4552..b5b8702c551e 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -24,10 +24,11 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "clang/AST/Attr.h" #include "clang/AST/Mangle.h" -#include "clang/AST/Type.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/Type.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -43,6 +44,10 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { /// VTables - All the vtables which have been defined. llvm::DenseMap<const CXXRecordDecl *, llvm::GlobalVariable *> VTables; + /// All the thread wrapper functions that have been used. + llvm::SmallVector<std::pair<const VarDecl *, llvm::Function *>, 8> + ThreadWrappers; + protected: bool UseARMMethodPtrABI; bool UseARMGuardVarABI; @@ -322,7 +327,43 @@ public: ArrayRef<llvm::Function *> CXXThreadLocalInits, ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override; - bool usesThreadWrapperFunction() const override { return true; } + /// Determine whether we will definitely emit this variable with a constant + /// initializer, either because the language semantics demand it or because + /// we know that the initializer is a constant. + bool isEmittedWithConstantInitializer(const VarDecl *VD) const { + VD = VD->getMostRecentDecl(); + if (VD->hasAttr<ConstInitAttr>()) + return true; + + // All later checks examine the initializer specified on the variable. If + // the variable is weak, such examination would not be correct. + if (VD->isWeak() || VD->hasAttr<SelectAnyAttr>()) + return false; + + const VarDecl *InitDecl = VD->getInitializingDeclaration(); + if (!InitDecl) + return false; + + // If there's no initializer to run, this is constant initialization. + if (!InitDecl->hasInit()) + return true; + + // If we have the only definition, we don't need a thread wrapper if we + // will emit the value as a constant. + if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD))) + return !VD->needsDestruction(getContext()) && InitDecl->evaluateValue(); + + // Otherwise, we need a thread wrapper unless we know that every + // translation unit will emit the value as a constant. We rely on + // ICE-ness not varying between translation units, which isn't actually + // guaranteed by the standard but is necessary for sanity. + return InitDecl->isInitKnownICE() && InitDecl->isInitICE(); + } + + bool usesThreadWrapperFunction(const VarDecl *VD) const override { + return !isEmittedWithConstantInitializer(VD) || + VD->needsDestruction(getContext()); + } LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType LValType) override; @@ -415,8 +456,8 @@ public: class ARMCXXABI : public ItaniumCXXABI { public: ARMCXXABI(CodeGen::CodeGenModule &CGM) : - ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true, - /* UseARMGuardVarABI = */ true) {} + ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true) {} bool HasThisReturn(GlobalDecl GD) const override { return (isa<CXXConstructorDecl>(GD.getDecl()) || ( @@ -447,6 +488,19 @@ public: bool shouldRTTIBeUnique() const override { return false; } }; +class FuchsiaCXXABI final : public ItaniumCXXABI { +public: + explicit FuchsiaCXXABI(CodeGen::CodeGenModule &CGM) + : ItaniumCXXABI(CGM) {} + +private: + bool HasThisReturn(GlobalDecl GD) const override { + return isa<CXXConstructorDecl>(GD.getDecl()) || + (isa<CXXDestructorDecl>(GD.getDecl()) && + GD.getDtorType() != Dtor_Deleting); + } +}; + class WebAssemblyCXXABI final : public ItaniumCXXABI { public: explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM) @@ -476,15 +530,18 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { case TargetCXXABI::iOS64: return new iOS64CXXABI(CGM); + case TargetCXXABI::Fuchsia: + return new FuchsiaCXXABI(CGM); + // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't // include the other 32-bit ARM oddities: constructor/destructor return values // and array cookies. case TargetCXXABI::GenericAArch64: - return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true, - /* UseARMGuardVarABI = */ true); + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true); case TargetCXXABI::GenericMIPS: - return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true); + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); case TargetCXXABI::WebAssembly: return new WebAssemblyCXXABI(CGM); @@ -495,8 +552,7 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { // For PNaCl, use ARM-style method pointers so that PNaCl code // does not assume anything about the alignment of function // pointers. - return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true, - /* UseARMGuardVarABI = */ false); + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); } return new ItaniumCXXABI(CGM); @@ -541,8 +597,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( const FunctionProtoType *FPT = MPT->getPointeeType()->getAs<FunctionProtoType>(); - const CXXRecordDecl *RD = - cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); + auto *RD = + cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); @@ -605,8 +661,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty); VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy); } - // Compute the address of the virtual function pointer. - llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); // Check the address of the function pointer if CFI on member function // pointers is enabled. @@ -614,44 +668,81 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Constant *CheckTypeDesc; bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) && CGM.HasHiddenLTOVisibility(RD); - if (ShouldEmitCFICheck) { - CodeGenFunction::SanitizerScope SanScope(&CGF); + bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && + CGM.HasHiddenLTOVisibility(RD); + llvm::Value *VirtualFn = nullptr; - CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); - CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); - llvm::Constant *StaticData[] = { - llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), - CheckSourceLocation, - CheckTypeDesc, - }; - - llvm::Metadata *MD = - CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); - llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); - - llvm::Value *TypeTest = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId}); + { + CodeGenFunction::SanitizerScope SanScope(&CGF); + llvm::Value *TypeId = nullptr; + llvm::Value *CheckResult = nullptr; + + if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { + // If doing CFI or VFE, we will need the metadata node to check against. + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); + TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + } - if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { - CGF.EmitTrapCheck(TypeTest); + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); + + if (ShouldEmitVFEInfo) { + // If doing VFE, load from the vtable with a type.checked.load intrinsic + // call. Note that we use the GEP to calculate the address to load from + // and pass 0 as the offset to the intrinsic. This is because every + // vtable slot of the correct type is marked with matching metadata, and + // we know that the load must be from one of these slots. + llvm::Value *CheckedLoad = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId}); + CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); + VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0); + VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(), + "memptr.virtualfn"); } else { - llvm::Value *AllVtables = llvm::MetadataAsValue::get( - CGM.getLLVMContext(), - llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); - llvm::Value *ValidVtable = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); - CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall), - SanitizerHandler::CFICheckFail, StaticData, - {VTable, ValidVtable}); + // When not doing VFE, emit a normal load, as it allows more + // optimisations than type.checked.load. + if (ShouldEmitCFICheck) { + CheckResult = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), + {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); + } + VFPAddr = + Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); + VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(), + "memptr.virtualfn"); } + assert(VirtualFn && "Virtual fuction pointer not created!"); + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + "Check result required but not created!"); + + if (ShouldEmitCFICheck) { + // If doing CFI, emit the check. + CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); + CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), + CheckSourceLocation, + CheckTypeDesc, + }; - FnVirtual = Builder.GetInsertBlock(); - } + if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { + CGF.EmitTrapCheck(CheckResult); + } else { + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); + CGF.EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIMFCall), + SanitizerHandler::CFICheckFail, StaticData, + {VTable, ValidVtable}); + } + + FnVirtual = Builder.GetInsertBlock(); + } + } // End of sanitizer scope - // Load the virtual function to call. - VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); - llvm::Value *VirtualFn = Builder.CreateAlignedLoad( - VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); CGF.EmitBranch(FnEnd); // In the non-virtual path, the function pointer is actually a @@ -1104,7 +1195,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // Grab the vtable pointer as an intptr_t*. auto *ClassDecl = - cast<CXXRecordDecl>(ElementType->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(ElementType->castAs<RecordType>()->getDecl()); llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl); @@ -1307,7 +1398,7 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, Address ThisPtr, llvm::Type *StdTypeInfoPtrTy) { auto *ClassDecl = - cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); llvm::Value *Value = CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl); @@ -1373,7 +1464,7 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Type *DestLTy = CGF.ConvertType(DestTy); auto *ClassDecl = - cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); // Get the vtable pointer. llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(), ClassDecl); @@ -1595,7 +1686,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, EmitFundamentalRTTIDescriptors(RD); if (!VTable->isDeclarationForLinker()) - CGM.EmitVTableTypeMetadata(VTable, VTLayout); + CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( @@ -2155,7 +2246,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, guard->setVisibility(var->getVisibility()); // If the variable is thread-local, so is its guard variable. guard->setThreadLocalMode(var->getThreadLocalMode()); - guard->setAlignment(guardAlignment.getQuantity()); + guard->setAlignment(guardAlignment.getAsAlign()); // The ABI says: "It is suggested that it be emitted in the same COMDAT // group as the associated data object." In practice, this doesn't work for @@ -2332,7 +2423,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, } void CodeGenModule::registerGlobalDtorsWithAtExit() { - for (const auto I : DtorsUsingAtExit) { + for (const auto &I : DtorsUsingAtExit) { int Priority = I.first; const llvm::TinyPtrVector<llvm::Function *> &Dtors = I.second; @@ -2455,10 +2546,10 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM), WrapperName.str(), &CGM.getModule()); - CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper); + if (CGM.supportsCOMDAT() && Wrapper->isWeakForLinker()) + Wrapper->setComdat(CGM.getModule().getOrInsertComdat(Wrapper->getName())); - if (VD->hasDefinition()) - CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper); // Always resolve references to the wrapper at link time. if (!Wrapper->hasLocalLinkage()) @@ -2472,6 +2563,8 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); Wrapper->addFnAttr(llvm::Attribute::NoUnwind); } + + ThreadWrappers.push_back({VD, Wrapper}); return Wrapper; } @@ -2509,7 +2602,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( Guard->setThreadLocal(true); CharUnits GuardAlign = CharUnits::One(); - Guard->setAlignment(GuardAlign.getQuantity()); + Guard->setAlignment(GuardAlign.getAsAlign()); CodeGenFunction(CGM).GenerateCXXGlobalInitFunc( InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign)); @@ -2520,20 +2613,40 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( } } - // Emit thread wrappers. + // Create declarations for thread wrappers for all thread-local variables + // with non-discardable definitions in this translation unit. for (const VarDecl *VD : CXXThreadLocals) { + if (VD->hasDefinition() && + !isDiscardableGVALinkage(getContext().GetGVALinkageForVariable(VD))) { + llvm::GlobalValue *GV = CGM.GetGlobalValue(CGM.getMangledName(VD)); + getOrCreateThreadLocalWrapper(VD, GV); + } + } + + // Emit all referenced thread wrappers. + for (auto VDAndWrapper : ThreadWrappers) { + const VarDecl *VD = VDAndWrapper.first; llvm::GlobalVariable *Var = cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD))); - llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var); + llvm::Function *Wrapper = VDAndWrapper.second; // Some targets require that all access to thread local variables go through // the thread wrapper. This means that we cannot attempt to create a thread // wrapper or a thread helper. - if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) { - Wrapper->setLinkage(llvm::Function::ExternalLinkage); - continue; + if (!VD->hasDefinition()) { + if (isThreadWrapperReplaceable(VD, CGM)) { + Wrapper->setLinkage(llvm::Function::ExternalLinkage); + continue; + } + + // If this isn't a TU in which this variable is defined, the thread + // wrapper is discardable. + if (Wrapper->getLinkage() == llvm::Function::WeakODRLinkage) + Wrapper->setLinkage(llvm::Function::LinkOnceODRLinkage); } + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); + // Mangle the name for the thread_local initialization function. SmallString<256> InitFnName; { @@ -2548,7 +2661,10 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( // produce a declaration of the initialization function. llvm::GlobalValue *Init = nullptr; bool InitIsInitFunc = false; - if (VD->hasDefinition()) { + bool HasConstantInitialization = false; + if (!usesThreadWrapperFunction(VD)) { + HasConstantInitialization = true; + } else if (VD->hasDefinition()) { InitIsInitFunc = true; llvm::Function *InitFuncToUse = InitFunc; if (isTemplateInstantiation(VD->getTemplateSpecializationKind())) @@ -2571,13 +2687,17 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( if (Init) { Init->setVisibility(Var->getVisibility()); - Init->setDSOLocal(Var->isDSOLocal()); + // Don't mark an extern_weak function DSO local on windows. + if (!CGM.getTriple().isOSWindows() || !Init->hasExternalWeakLinkage()) + Init->setDSOLocal(Var->isDSOLocal()); } llvm::LLVMContext &Context = CGM.getModule().getContext(); llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper); CGBuilderTy Builder(CGM, Entry); - if (InitIsInitFunc) { + if (HasConstantInitialization) { + // No dynamic initialization to invoke. + } else if (InitIsInitFunc) { if (Init) { llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init); if (isThreadWrapperReplaceable(VD, CGM)) { @@ -2861,6 +2981,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: @@ -3034,8 +3157,8 @@ static bool CanUseSingleInheritance(const CXXRecordDecl *RD) { return false; // Check that the class is dynamic iff the base is. - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl()); if (!BaseDecl->isEmpty() && BaseDecl->isDynamicClass() != RD->isDynamicClass()) return false; @@ -3062,7 +3185,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical and dependent types shouldn't get here"); case Type::LValueReference: @@ -3308,7 +3431,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical and dependent types shouldn't get here"); // GCC treats vector types as fundamental types. @@ -3413,7 +3536,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( CharUnits Align = CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0)); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); // The Itanium ABI specifies that type_info objects must be globally // unique, with one exception: if the type is an incomplete class @@ -3498,8 +3621,8 @@ static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base, unsigned Flags = 0; - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl()); if (Base->isVirtual()) { // Mark the virtual base as seen. @@ -3597,8 +3720,8 @@ void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) { // The __base_type member points to the RTTI for the base type. Fields.push_back(ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(Base.getType())); - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl()); int64_t OffsetFlags = 0; diff --git a/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp index e02c9ae0b8f2..aff46135705a 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -19,11 +19,13 @@ #include "CodeGenModule.h" #include "CodeGenTypes.h" #include "TargetInfo.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "clang/AST/Attr.h" +#include "clang/AST/CXXInheritance.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/VTableBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/IR/Intrinsics.h" @@ -386,7 +388,9 @@ public: ArrayRef<llvm::Function *> CXXThreadLocalInits, ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override; - bool usesThreadWrapperFunction() const override { return false; } + bool usesThreadWrapperFunction(const VarDecl *VD) const override { + return false; + } LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType LValType) override; @@ -1211,7 +1215,7 @@ static bool hasDefaultCXXMethodCC(ASTContext &Context, CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention( /*IsVariadic=*/false, /*IsCXXMethod=*/true); CallingConv ActualCallingConv = - MD->getType()->getAs<FunctionProtoType>()->getCallConv(); + MD->getType()->castAs<FunctionProtoType>()->getCallConv(); return ExpectedCallingConv == ActualCallingConv; } @@ -1340,6 +1344,13 @@ void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { // The TU defining a dtor is only guaranteed to emit a base destructor. All // other destructor variants are delegating thunks. CGM.EmitGlobal(GlobalDecl(D, Dtor_Base)); + + // If the class is dllexported, emit the complete (vbase) destructor wherever + // the base dtor is emitted. + // FIXME: To match MSVC, this should only be done when the class is exported + // with -fdllexport-inlines enabled. + if (D->getParent()->getNumVBases() > 0 && D->hasAttr<DLLExportAttr>()) + CGM.EmitGlobal(GlobalDecl(D, Dtor_Complete)); } CharUnits @@ -2356,7 +2367,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) { /*isConstant=*/false, llvm::GlobalVariable::ExternalLinkage, /*Initializer=*/nullptr, VarName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return ConstantAddress(GV, Align); } @@ -2499,7 +2510,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, GV->getLinkage(), Zero, GuardName.str()); GuardVar->setVisibility(GV->getVisibility()); GuardVar->setDLLStorageClass(GV->getDLLStorageClass()); - GuardVar->setAlignment(GuardAlign.getQuantity()); + GuardVar->setAlignment(GuardAlign.getAsAlign()); if (GuardVar->isWeakForLinker()) GuardVar->setComdat( CGM.getModule().getOrInsertComdat(GuardVar->getName())); @@ -2602,27 +2613,27 @@ bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) { // we can't zero initialize. The field offset is sometimes also -1 if 0 is a // valid field offset. const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); - return (!MSInheritanceAttr::hasVBTableOffsetField(Inheritance) && + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); + return (!inheritanceModelHasVBTableOffsetField(Inheritance) && RD->nullFieldOffsetIsZero()); } llvm::Type * MicrosoftCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) { const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); llvm::SmallVector<llvm::Type *, 4> fields; if (MPT->isMemberFunctionPointer()) fields.push_back(CGM.VoidPtrTy); // FunctionPointerOrVirtualThunk else fields.push_back(CGM.IntTy); // FieldOffset - if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(), - Inheritance)) + if (inheritanceModelHasNVOffsetField(MPT->isMemberFunctionPointer(), + Inheritance)) fields.push_back(CGM.IntTy); - if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) + if (inheritanceModelHasVBPtrOffsetField(Inheritance)) fields.push_back(CGM.IntTy); - if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance)) + if (inheritanceModelHasVBTableOffsetField(Inheritance)) fields.push_back(CGM.IntTy); // VirtualBaseAdjustmentOffset if (fields.size() == 1) @@ -2635,7 +2646,7 @@ GetNullMemberPointerFields(const MemberPointerType *MPT, llvm::SmallVectorImpl<llvm::Constant *> &fields) { assert(fields.empty()); const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); if (MPT->isMemberFunctionPointer()) { // FunctionPointerOrVirtualThunk fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy)); @@ -2646,12 +2657,12 @@ GetNullMemberPointerFields(const MemberPointerType *MPT, fields.push_back(getAllOnesInt()); // FieldOffset } - if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(), - Inheritance)) + if (inheritanceModelHasNVOffsetField(MPT->isMemberFunctionPointer(), + Inheritance)) fields.push_back(getZeroInt()); - if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) + if (inheritanceModelHasVBPtrOffsetField(Inheritance)) fields.push_back(getZeroInt()); - if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance)) + if (inheritanceModelHasVBTableOffsetField(Inheritance)) fields.push_back(getAllOnesInt()); } @@ -2672,21 +2683,21 @@ MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField, const CXXRecordDecl *RD, CharUnits NonVirtualBaseAdjustment, unsigned VBTableIndex) { - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); // Single inheritance class member pointer are represented as scalars instead // of aggregates. - if (MSInheritanceAttr::hasOnlyOneField(IsMemberFunction, Inheritance)) + if (inheritanceModelHasOnlyOneField(IsMemberFunction, Inheritance)) return FirstField; llvm::SmallVector<llvm::Constant *, 4> fields; fields.push_back(FirstField); - if (MSInheritanceAttr::hasNVOffsetField(IsMemberFunction, Inheritance)) + if (inheritanceModelHasNVOffsetField(IsMemberFunction, Inheritance)) fields.push_back(llvm::ConstantInt::get( CGM.IntTy, NonVirtualBaseAdjustment.getQuantity())); - if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) { + if (inheritanceModelHasVBPtrOffsetField(Inheritance)) { CharUnits Offs = CharUnits::Zero(); if (VBTableIndex) Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset(); @@ -2694,7 +2705,7 @@ MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField, } // The rest of the fields are adjusted by conversions to a more derived class. - if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance)) + if (inheritanceModelHasVBTableOffsetField(Inheritance)) fields.push_back(llvm::ConstantInt::get(CGM.IntTy, VBTableIndex)); return llvm::ConstantStruct::getAnon(fields); @@ -2709,7 +2720,7 @@ MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT, llvm::Constant *MicrosoftCXXABI::EmitMemberDataPointer(const CXXRecordDecl *RD, CharUnits offset) { if (RD->getMSInheritanceModel() == - MSInheritanceAttr::Keyword_virtual_inheritance) + MSInheritanceModel::Virtual) offset -= getContext().getOffsetOfBaseWithVBPtr(RD); llvm::Constant *FirstField = llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity()); @@ -2815,7 +2826,7 @@ MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) { if (VBTableIndex == 0 && RD->getMSInheritanceModel() == - MSInheritanceAttr::Keyword_virtual_inheritance) + MSInheritanceModel::Virtual) NonVirtualBaseAdjustment -= getContext().getOffsetOfBaseWithVBPtr(RD); // The rest of the fields are common with data member pointers. @@ -2851,9 +2862,9 @@ MicrosoftCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF, // If this is a single field member pointer (single inheritance), this is a // single icmp. const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); - if (MSInheritanceAttr::hasOnlyOneField(MPT->isMemberFunctionPointer(), - Inheritance)) + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); + if (inheritanceModelHasOnlyOneField(MPT->isMemberFunctionPointer(), + Inheritance)) return Builder.CreateICmp(Eq, L, R); // Compare the first field. @@ -3053,7 +3064,7 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress( CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS); CGBuilderTy &Builder = CGF.Builder; const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); // Extract the fields we need, regardless of model. We'll apply them if we // have them. @@ -3064,9 +3075,9 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress( // We need to extract values. unsigned I = 0; FieldOffset = Builder.CreateExtractValue(MemPtr, I++); - if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) + if (inheritanceModelHasVBPtrOffsetField(Inheritance)) VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++); - if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance)) + if (inheritanceModelHasVBTableOffsetField(Inheritance)) VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++); } @@ -3161,8 +3172,8 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( CGBuilderTy &Builder) { const CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl(); const CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl(); - MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel(); - MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel(); + MSInheritanceModel SrcInheritance = SrcRD->getMSInheritanceModel(); + MSInheritanceModel DstInheritance = DstRD->getMSInheritanceModel(); bool IsFunc = SrcTy->isMemberFunctionPointer(); bool IsConstant = isa<llvm::Constant>(Src); @@ -3171,15 +3182,15 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( llvm::Value *NonVirtualBaseAdjustment = getZeroInt(); llvm::Value *VirtualBaseAdjustmentOffset = getZeroInt(); llvm::Value *VBPtrOffset = getZeroInt(); - if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) { + if (!inheritanceModelHasOnlyOneField(IsFunc, SrcInheritance)) { // We need to extract values. unsigned I = 0; FirstField = Builder.CreateExtractValue(Src, I++); - if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance)) + if (inheritanceModelHasNVOffsetField(IsFunc, SrcInheritance)) NonVirtualBaseAdjustment = Builder.CreateExtractValue(Src, I++); - if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance)) + if (inheritanceModelHasVBPtrOffsetField(SrcInheritance)) VBPtrOffset = Builder.CreateExtractValue(Src, I++); - if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance)) + if (inheritanceModelHasVBTableOffsetField(SrcInheritance)) VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(Src, I++); } @@ -3198,7 +3209,7 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( // adjustment to normalize the member pointer. llvm::Value *SrcVBIndexEqZero = Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt()); - if (SrcInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) { + if (SrcInheritance == MSInheritanceModel::Virtual) { if (int64_t SrcOffsetToFirstVBase = getContext().getOffsetOfBaseWithVBPtr(SrcRD).getQuantity()) { llvm::Value *UndoSrcAdjustment = Builder.CreateSelect( @@ -3232,8 +3243,8 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( // Update the vbindex to an appropriate value in the destination because // SrcRD's vbtable might not be a strict prefix of the one in DstRD. llvm::Value *DstVBIndexEqZero = SrcVBIndexEqZero; - if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance) && - MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance)) { + if (inheritanceModelHasVBTableOffsetField(DstInheritance) && + inheritanceModelHasVBTableOffsetField(SrcInheritance)) { if (llvm::GlobalVariable *VDispMap = getAddrOfVirtualDisplacementMap(SrcRD, DstRD)) { llvm::Value *VBIndex = Builder.CreateExactUDiv( @@ -3256,7 +3267,7 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( // Set the VBPtrOffset to zero if the vbindex is zero. Otherwise, initialize // it to the offset of the vbptr. - if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance)) { + if (inheritanceModelHasVBPtrOffsetField(DstInheritance)) { llvm::Value *DstVBPtrOffset = llvm::ConstantInt::get( CGM.IntTy, getContext().getASTRecordLayout(DstRD).getVBPtrOffset().getQuantity()); @@ -3267,7 +3278,7 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( // Likewise, apply a similar adjustment so that dereferencing the member // pointer correctly accounts for the distance between the start of the first // virtual base and the top of the MDC. - if (DstInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) { + if (DstInheritance == MSInheritanceModel::Virtual) { if (int64_t DstOffsetToFirstVBase = getContext().getOffsetOfBaseWithVBPtr(DstRD).getQuantity()) { llvm::Value *DoDstAdjustment = Builder.CreateSelect( @@ -3280,17 +3291,17 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion( // Recompose dst from the null struct and the adjusted fields from src. llvm::Value *Dst; - if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance)) { + if (inheritanceModelHasOnlyOneField(IsFunc, DstInheritance)) { Dst = FirstField; } else { Dst = llvm::UndefValue::get(ConvertMemberPointerType(DstTy)); unsigned Idx = 0; Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++); - if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance)) + if (inheritanceModelHasNVOffsetField(IsFunc, DstInheritance)) Dst = Builder.CreateInsertValue(Dst, NonVirtualBaseAdjustment, Idx++); - if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance)) + if (inheritanceModelHasVBPtrOffsetField(DstInheritance)) Dst = Builder.CreateInsertValue(Dst, VBPtrOffset, Idx++); - if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance)) + if (inheritanceModelHasVBTableOffsetField(DstInheritance)) Dst = Builder.CreateInsertValue(Dst, VirtualBaseAdjustmentOffset, Idx++); } return Dst; @@ -3346,7 +3357,7 @@ CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer( CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); CGBuilderTy &Builder = CGF.Builder; - MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel(); + MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); // Extract the fields we need, regardless of model. We'll apply them if we // have them. @@ -3358,11 +3369,11 @@ CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer( // We need to extract values. unsigned I = 0; FunctionPointer = Builder.CreateExtractValue(MemPtr, I++); - if (MSInheritanceAttr::hasNVOffsetField(MPT, Inheritance)) + if (inheritanceModelHasNVOffsetField(MPT, Inheritance)) NonVirtualBaseAdjustment = Builder.CreateExtractValue(MemPtr, I++); - if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) + if (inheritanceModelHasVBPtrOffsetField(Inheritance)) VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++); - if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance)) + if (inheritanceModelHasVBTableOffsetField(Inheritance)) VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp index 3b4e06045a37..01093cf20c18 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ModuleBuilder.cpp @@ -65,6 +65,13 @@ namespace { private: SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs; + static llvm::StringRef ExpandModuleName(llvm::StringRef ModuleName, + const CodeGenOptions &CGO) { + if (ModuleName == "-" && !CGO.MainFileName.empty()) + return CGO.MainFileName; + return ModuleName; + } + public: CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName, const HeaderSearchOptions &HSO, @@ -73,7 +80,8 @@ namespace { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0), - CoverageInfo(CoverageInfo), M(new llvm::Module(ModuleName, C)) { + CoverageInfo(CoverageInfo), + M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)) { C.setDiscardValueNames(CGO.DiscardValueNames); } @@ -121,7 +129,7 @@ namespace { llvm::Module *StartModule(llvm::StringRef ModuleName, llvm::LLVMContext &C) { assert(!M && "Replacing existing Module?"); - M.reset(new llvm::Module(ModuleName, C)); + M.reset(new llvm::Module(ExpandModuleName(ModuleName, CodeGenOpts), C)); Initialize(*Ctx); return M.get(); } @@ -232,6 +240,9 @@ namespace { if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Member)) { if (Ctx->DeclMustBeEmitted(DRD)) Builder->EmitGlobal(DRD); + } else if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Member)) { + if (Ctx->DeclMustBeEmitted(DMD)) + Builder->EmitGlobal(DMD); } } } @@ -279,6 +290,10 @@ namespace { Builder->EmitTentativeDefinition(D); } + void CompleteExternalDeclaration(VarDecl *D) override { + Builder->EmitExternalDeclaration(D); + } + void HandleVTable(CXXRecordDecl *RD) override { if (Diags.hasErrorOccurred()) return; diff --git a/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 15a2ab99fdac..284e8022a3c4 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -279,7 +279,7 @@ public: *M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data, "__clang_ast"); // The on-disk hashtable needs to be aligned. - ASTSym->setAlignment(8); + ASTSym->setAlignment(llvm::Align(8)); // Mach-O also needs a segment name. if (Triple.isOSBinFormatMachO()) @@ -297,7 +297,7 @@ public: Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayout(), M.get(), BackendAction::Backend_EmitLL, - llvm::make_unique<llvm::raw_svector_ostream>(Buffer)); + std::make_unique<llvm::raw_svector_ostream>(Buffer)); llvm::dbgs() << Buffer; }); @@ -321,7 +321,7 @@ ObjectFilePCHContainerWriter::CreatePCHContainerGenerator( const std::string &OutputFileName, std::unique_ptr<llvm::raw_pwrite_stream> OS, std::shared_ptr<PCHBuffer> Buffer) const { - return llvm::make_unique<PCHContainerGenerator>( + return std::make_unique<PCHContainerGenerator>( CI, MainFileName, OutputFileName, std::move(OS), Buffer); } @@ -335,7 +335,11 @@ ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const { // Find the clang AST section in the container. for (auto &Section : OF->sections()) { StringRef Name; - Section.getName(Name); + if (Expected<StringRef> NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if ((!IsCOFF && Name == "__clangast") || (IsCOFF && Name == "clangast")) { if (Expected<StringRef> E = Section.getContents()) return *E; diff --git a/contrib/llvm-project/clang/lib/CodeGen/SanitizerMetadata.cpp b/contrib/llvm-project/clang/lib/CodeGen/SanitizerMetadata.cpp index ebc9cd5529bc..24ae6c6e362f 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "SanitizerMetadata.h" #include "CodeGenModule.h" +#include "clang/AST/Attr.h" #include "clang/AST/Type.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" diff --git a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp index 81f40011f11c..682ef18da73b 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp @@ -17,10 +17,12 @@ #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" +#include "clang/AST/Attr.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -28,7 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> // std::sort +#include <algorithm> // std::sort using namespace clang; using namespace CodeGen; @@ -778,6 +780,12 @@ public: B.addAttribute("wasm-import-name", Attr->getImportName()); Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } + if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-export-name", Attr->getExportName()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } } if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { @@ -833,10 +841,13 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false, + bool IsIndirect = isAggregateTypeForABI(Ty) && + !isEmptyRecord(getContext(), Ty, true) && + !isSingleElementStruct(Ty, getContext()); + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), - /*AllowHigherAlign=*/ true); + /*AllowHigherAlign=*/true); } //===----------------------------------------------------------------------===// @@ -986,11 +997,13 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { /// Similar to llvm::CCState, but for Clang. struct CCState { - CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {} + CCState(CGFunctionInfo &FI) + : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} - unsigned CC; - unsigned FreeRegs; - unsigned FreeSSERegs; + llvm::SmallBitVector IsPreassigned; + unsigned CC = CallingConv::CC_C; + unsigned FreeRegs = 0; + unsigned FreeSSERegs = 0; }; enum { @@ -1061,8 +1074,7 @@ class X86_32ABIInfo : public SwiftABIInfo { void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; - void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, - bool &UsedInAlloca) const; + void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; public: @@ -1177,6 +1189,10 @@ static void rewriteInputConstraintReferences(unsigned FirstIn, if (NumDollars % 2 != 0 && Pos < AsmString.size()) { // We have an operand reference. size_t DigitStart = Pos; + if (AsmString[DigitStart] == '{') { + OS << '{'; + ++DigitStart; + } size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); if (DigitEnd == std::string::npos) DigitEnd = AsmString.size(); @@ -1222,7 +1238,7 @@ void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( ResultTruncRegTypes.push_back(CoerceTy); // Coerce the integer by bitcasting the return slot pointer. - ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(), + ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(CGF), CoerceTy->getPointerTo())); ResultRegDests.push_back(ReturnSlot); @@ -1626,9 +1642,38 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } +void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // In the second pass over the arguments, HVAs are passed in the remaining + // vector registers if possible, or indirectly by address. The address will be + // passed in ECX/EDX if available. Any other arguments are passed according to + // the usual fastcall rules. + MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); + for (int I = 0, E = Args.size(); I < E; ++I) { + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType &Ty = Args[I].type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + Args[I].info = ABIArgInfo::getDirect(); + State.IsPreassigned.set(I); + } + } + } +} + ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. + bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; + bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; + bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; Ty = useFirstFieldIfTransparentUnion(Ty); @@ -1648,11 +1693,16 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_RegCall && + if ((IsRegCall || IsVectorCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; + + // Vectorcall passes HVAs directly and does not flatten them, but regcall + // does. + if (IsVectorCall) + return getDirectX86Hva(); + if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); @@ -1694,10 +1744,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, if (getContext().getTypeSize(Ty) <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( - State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall, - PaddingType); + IsFastCall || IsVectorCall || IsRegCall, PaddingType); return getIndirectResult(Ty, true, State); } @@ -1736,60 +1783,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(); } -void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, - bool &UsedInAlloca) const { - // Vectorcall x86 works subtly different than in x64, so the format is - // a bit different than the x64 version. First, all vector types (not HVAs) - // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. - // This differs from the x64 implementation, where the first 6 by INDEX get - // registers. - // After that, integers AND HVAs are assigned Left to Right in the same pass. - // Integers are passed as ECX/EDX if one is available (in order). HVAs will - // first take up the remaining YMM/XMM registers. If insufficient registers - // remain but an integer register (ECX/EDX) is available, it will be passed - // in that, else, on the stack. - for (auto &I : FI.arguments()) { - // First pass do all the vector types. - const Type *Base = nullptr; - uint64_t NumElts = 0; - const QualType& Ty = I.type; - if ((Ty->isVectorType() || Ty->isBuiltinType()) && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - I.info = ABIArgInfo::getDirect(); - } else { - I.info = classifyArgumentType(Ty, State); - } - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - } - } - - for (auto &I : FI.arguments()) { - // Second pass, do the rest! - const Type *Base = nullptr; - uint64_t NumElts = 0; - const QualType& Ty = I.type; - bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); - - if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { - // Assign true HVAs (non vector/native FP types). - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - I.info = getDirectX86Hva(); - } else { - I.info = getIndirectResult(Ty, /*ByVal=*/false, State); - } - } else if (!IsHva) { - // Assign all Non-HVAs, so this will exclude Vector/FP args. - I.info = classifyArgumentType(Ty, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - } - } -} - void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { - CCState State(FI.getCallingConvention()); + CCState State(FI); if (IsMCUABI) State.FreeRegs = 3; else if (State.CC == llvm::CallingConv::X86_FastCall) @@ -1821,15 +1816,20 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { if (FI.isChainCall()) ++State.FreeRegs; + // For vectorcall, do a first pass over the arguments, assigning FP and vector + // arguments to XMM registers as available. + if (State.CC == llvm::CallingConv::X86_VectorCall) + runVectorCallFirstPass(FI, State); + bool UsedInAlloca = false; - if (State.CC == llvm::CallingConv::X86_VectorCall) { - computeVectorCallArgs(FI, State, UsedInAlloca); - } else { - // If not vectorcall, revert to normal behavior. - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - } + MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); + for (int I = 0, E = Args.size(); I < E; ++I) { + // Skip arguments that have already been assigned. + if (State.IsPreassigned.test(I)) + continue; + + Args[I].info = classifyArgumentType(Args[I].type, State); + UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); } // If we needed to use inalloca for any argument, do a second pass and rewrite @@ -2177,6 +2177,17 @@ class X86_64ABIInfo : public SwiftABIInfo { return true; } + // GCC classifies vectors of __int128 as memory. + bool passInt128VectorsInMem() const { + // Clang <= 9.0 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver9) + return false; + + const llvm::Triple &T = getTarget().getTriple(); + return T.isOSLinux() || T.isOSNetBSD(); + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -2657,6 +2668,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + QualType ElementType = VT->getElementType(); + + // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( + if (passInt128VectorsInMem() && Size != 128 && + (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || + ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) + return; + // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -2787,8 +2806,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); - const CXXRecordDecl *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // Classify this field. // @@ -2899,6 +2918,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; + QualType EltTy = VecTy->getElementType(); + if (passInt128VectorsInMem() && + (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || + EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) + return true; } return false; @@ -2973,14 +2997,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); - if (isa<llvm::VectorType>(IRType) || - IRType->getTypeID() == llvm::Type::FP128TyID) + if (isa<llvm::VectorType>(IRType)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (passInt128VectorsInMem() && + IRType->getVectorElementType()->isIntegerTy(128)) { + // Use a vXi64 vector. + uint64_t Size = getContext().getTypeSize(Ty); + return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); + } + + return IRType; + } + + if (IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); + // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); @@ -3030,8 +3068,8 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); - const CXXRecordDecl *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // If the base is after the span we care about, ignore it. unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); @@ -4950,7 +4988,7 @@ private: ABIKind getABIKind() const { return Kind; } bool isDarwinPCS() const { return Kind == DarwinPCS; } - ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, @@ -4960,7 +4998,8 @@ private: void computeInfo(CGFunctionInfo &FI) const override { if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + FI.getReturnInfo() = + classifyReturnType(FI.getReturnType(), FI.isVariadic()); for (auto &it : FI.arguments()) it.info = classifyArgumentType(it.type); @@ -5014,23 +5053,38 @@ public: const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; - llvm::Function *Fn = cast<llvm::Function>(GV); - auto Kind = CGM.getCodeGenOpts().getSignReturnAddress(); - if (Kind != CodeGenOptions::SignReturnAddressScope::None) { + CodeGenOptions::SignReturnAddressScope Scope = CGM.getCodeGenOpts().getSignReturnAddress(); + CodeGenOptions::SignReturnAddressKeyValue Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); + bool BranchTargetEnforcement = CGM.getCodeGenOpts().BranchTargetEnforcement; + if (const auto *TA = FD->getAttr<TargetAttr>()) { + ParsedTargetAttr Attr = TA->parse(); + if (!Attr.BranchProtection.empty()) { + TargetInfo::BranchProtectionInfo BPI; + StringRef Error; + (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + BPI, Error); + assert(Error.empty()); + Scope = BPI.SignReturnAddr; + Key = BPI.SignKey; + BranchTargetEnforcement = BPI.BranchTargetEnforcement; + } + } + + auto *Fn = cast<llvm::Function>(GV); + if (Scope != CodeGenOptions::SignReturnAddressScope::None) { Fn->addFnAttr("sign-return-address", - Kind == CodeGenOptions::SignReturnAddressScope::All + Scope == CodeGenOptions::SignReturnAddressScope::All ? "all" : "non-leaf"); - auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); Fn->addFnAttr("sign-return-address-key", Key == CodeGenOptions::SignReturnAddressKeyValue::AKey ? "a_key" : "b_key"); } - if (CGM.getCodeGenOpts().BranchTargetEnforcement) + if (BranchTargetEnforcement) Fn->addFnAttr("branch-target-enforcement"); } }; @@ -5143,23 +5197,24 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Alignment = getContext().getTypeUnadjustedAlign(Ty); Alignment = Alignment < 128 ? 64 : 128; } else { - Alignment = getContext().getTypeAlign(Ty); + Alignment = std::max(getContext().getTypeAlign(Ty), + (unsigned)getTarget().getPointerWidth(0)); } - Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, Alignment); // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. - if (Alignment < 128 && Size == 128) { - llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); - } - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); + llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment); + return ABIArgInfo::getDirect( + Size == Alignment ? BaseTy + : llvm::ArrayType::get(BaseTy, Size / Alignment)); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } -ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { +ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, + bool IsVariadic) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); @@ -5183,7 +5238,9 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(RetTy, Base, Members)) + if (isHomogeneousAggregate(RetTy, Base, Members) && + !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && + IsVariadic)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); @@ -5218,6 +5275,14 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; + + // arm64_32 has to be compatible with the ARM logic here, which allows huge + // vectors for some reason. + llvm::Triple Triple = getTarget().getTriple(); + if (Triple.getArch() == llvm::Triple::aarch64_32 && + Triple.isOSBinFormatMachO()) + return Size <= 32; + return Size != 64 && (Size != 128 || NumElements == 1); } return false; @@ -5509,7 +5574,8 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); - CharUnits SlotSize = CharUnits::fromQuantity(8); + uint64_t PointerSize = getTarget().getPointerWidth(0) / 8; + CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { @@ -7514,7 +7580,7 @@ public: bool shouldUseInReg(QualType Ty, CCState &State) const; void computeInfo(CGFunctionInfo &FI) const override { - CCState State(FI.getCallingConvention()); + CCState State(FI); // Lanai uses 4 registers to pass arguments unless the function has the // regparm attribute set. if (FI.getHasRegParm()) { @@ -7644,6 +7710,42 @@ private: bool isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const override; + // Coerce HIP pointer arguments from generic pointers to global ones. + llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, + unsigned ToAS) const { + // Structure types. + if (auto STy = dyn_cast<llvm::StructType>(Ty)) { + SmallVector<llvm::Type *, 8> EltTys; + bool Changed = false; + for (auto T : STy->elements()) { + auto NT = coerceKernelArgumentType(T, FromAS, ToAS); + EltTys.push_back(NT); + Changed |= (NT != T); + } + // Skip if there is no change in element types. + if (!Changed) + return STy; + if (STy->hasName()) + return llvm::StructType::create( + EltTys, (STy->getName() + ".coerce").str(), STy->isPacked()); + return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked()); + } + // Arrary types. + if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) { + auto T = ATy->getElementType(); + auto NT = coerceKernelArgumentType(T, FromAS, ToAS); + // Skip if there is no change in that element type. + if (NT == T) + return ATy; + return llvm::ArrayType::get(NT, ATy->getNumElements()); + } + // Single value types. + if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS) + return llvm::PointerType::get( + cast<llvm::PointerType>(Ty)->getElementType(), ToAS); + return Ty; + } + public: explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} @@ -7653,6 +7755,8 @@ public: ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; }; bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { @@ -7716,6 +7820,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { } } +Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + llvm_unreachable("AMDGPU does not support varargs"); +} + ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) { // Records with non-trivial destructors/copy-constructors should not be @@ -7764,14 +7873,22 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { // TODO: Can we omit empty structs? - // Coerce single element structs to its element. + llvm::Type *LTy = nullptr; if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + LTy = CGT.ConvertType(QualType(SeltTy, 0)); + + if (getContext().getLangOpts().HIP) { + if (!LTy) + LTy = CGT.ConvertType(Ty); + LTy = coerceKernelArgumentType( + LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), + /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); + } // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we // have to set it to false here. Other args of getDirect() are just defaults. - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, @@ -7909,8 +8026,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) || - (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && + + const bool IsOpenCLKernel = M.getLangOpts().OpenCL && + FD->hasAttr<OpenCLKernelAttr>(); + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr<CUDAGlobalAttr>(); + if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7936,6 +8057,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); + } else if (IsOpenCLKernel || IsHIPKernel) { + // By default, restrict the maximum size to a value specified by + // --gpu-max-threads-per-block=n or its default value. + std::string AttrVal = + std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { @@ -8429,7 +8556,7 @@ private: } void computeInfo(CGFunctionInfo &FI) const override { - CCState State(FI.getCallingConvention()); + CCState State(FI); // ARC uses 8 registers to pass arguments. State.FreeRegs = 8; @@ -9236,11 +9363,21 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(RetTy); // IsRetIndirect is true if classifyArgumentType indicated the value should - // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128 - // is passed direct in LLVM IR, relying on the backend lowering code to - // rewrite the argument list and pass indirectly on RV32. - bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || - getContext().getTypeSize(RetTy) > (2 * XLen); + // be passed indirect, or if the type size is a scalar greater than 2*XLen + // and not a complex type with elements <= FLen. e.g. fp128 is passed direct + // in LLVM IR, relying on the backend lowering code to rewrite the argument + // list and pass indirectly on RV32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + if (!IsRetIndirect && RetTy->isScalarType() && + getContext().getTypeSize(RetTy) > (2 * XLen)) { + if (RetTy->isComplexType() && FLen) { + QualType EltTy = RetTy->getAs<ComplexType>()->getElementType(); + IsRetIndirect = getContext().getTypeSize(EltTy) > FLen; + } else { + // This is a normal scalar > 2*XLen, such as fp128 on RV32. + IsRetIndirect = true; + } + } // We must track the number of GPRs used in order to conform to the RISC-V // ABI, as integer scalars passed in registers should have signext/zeroext @@ -9329,7 +9466,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, if (const auto *RTy = Ty->getAs<RecordType>()) { // Structures with either a non-trivial destructor or a non-trivial // copy constructor are not eligible for the FP calling convention. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, CGT.getCXXABI())) + if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; if (isEmptyRecord(getContext(), Ty, true)) return true; @@ -9390,7 +9527,7 @@ bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) - return IsCandidate = false; + return false; if (!IsCandidate) return false; if (Field1Ty && Field1Ty->isFloatingPointTy()) @@ -9484,7 +9621,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, // Complex types for the hard float ABI must be passed direct rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { - QualType EltTy = Ty->getAs<ComplexType>()->getElementType(); + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); if (getContext().getTypeSize(EltTy) <= FLen) { ArgFPRsLeft -= 2; return ABIArgInfo::getDirect(); @@ -9674,6 +9811,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new AVRTargetCodeGenInfo(Types)); case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") @@ -9906,7 +10044,7 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( Builder.SetInsertPoint(BB); unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); - BlockPtr->setAlignment(BlockAlign); + BlockPtr->setAlignment(llvm::MaybeAlign(BlockAlign)); Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); llvm::SmallVector<llvm::Value *, 2> Args; |
